{
  "best_metric": 0.69158399,
  "best_model_checkpoint": "/home/gaohuan03/weiyuancheng/weiyuancheng/ckpt/qwen2_5/threedataset_v4_options/v0-20250328-154206/checkpoint-2241",
  "epoch": 0.9995818349084219,
  "eval_steps": 200.0,
  "global_step": 2241,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00044604276435003206,
      "grad_norm": 14.581809043884277,
      "learning_rate": 8.849557522123894e-08,
      "loss": 1.2788121700286865,
      "memory(GiB)": 74.59,
      "step": 1,
      "token_acc": 0.5882352941176471,
      "train_speed(iter/s)": 0.009487
    },
    {
      "epoch": 0.00892085528700064,
      "grad_norm": 4.784027099609375,
      "learning_rate": 1.769911504424779e-06,
      "loss": 1.1330263238204152,
      "memory(GiB)": 74.59,
      "step": 20,
      "token_acc": 0.716116035455278,
      "train_speed(iter/s)": 0.016557
    },
    {
      "epoch": 0.01784171057400128,
      "grad_norm": 3.2026968002319336,
      "learning_rate": 3.539823008849558e-06,
      "loss": 0.8129110336303711,
      "memory(GiB)": 74.59,
      "step": 40,
      "token_acc": 0.7615552469748066,
      "train_speed(iter/s)": 0.016819
    },
    {
      "epoch": 0.026762565861001925,
      "grad_norm": 2.9704596996307373,
      "learning_rate": 5.309734513274337e-06,
      "loss": 0.7791303157806396,
      "memory(GiB)": 74.59,
      "step": 60,
      "token_acc": 0.7574870387313205,
      "train_speed(iter/s)": 0.016912
    },
    {
      "epoch": 0.03568342114800256,
      "grad_norm": 2.6082327365875244,
      "learning_rate": 7.079646017699116e-06,
      "loss": 0.7594192504882813,
      "memory(GiB)": 74.59,
      "step": 80,
      "token_acc": 0.7755320286600363,
      "train_speed(iter/s)": 0.017011
    },
    {
      "epoch": 0.044604276435003205,
      "grad_norm": 2.7924091815948486,
      "learning_rate": 8.849557522123895e-06,
      "loss": 0.7468526840209961,
      "memory(GiB)": 74.59,
      "step": 100,
      "token_acc": 0.7796342289455859,
      "train_speed(iter/s)": 0.01705
    },
    {
      "epoch": 0.05352513172200385,
      "grad_norm": 2.9287681579589844,
      "learning_rate": 9.999733013856755e-06,
      "loss": 0.7415700912475586,
      "memory(GiB)": 74.59,
      "step": 120,
      "token_acc": 0.7834681868333946,
      "train_speed(iter/s)": 0.017075
    },
    {
      "epoch": 0.062445987009004486,
      "grad_norm": 2.8752827644348145,
      "learning_rate": 9.996028390582532e-06,
      "loss": 0.7398290634155273,
      "memory(GiB)": 74.59,
      "step": 140,
      "token_acc": 0.7484848484848485,
      "train_speed(iter/s)": 0.017086
    },
    {
      "epoch": 0.07136684229600512,
      "grad_norm": 2.699077606201172,
      "learning_rate": 9.987968549940455e-06,
      "loss": 0.7392120361328125,
      "memory(GiB)": 74.59,
      "step": 160,
      "token_acc": 0.7770795435753266,
      "train_speed(iter/s)": 0.017101
    },
    {
      "epoch": 0.08028769758300577,
      "grad_norm": 3.137524127960205,
      "learning_rate": 9.975560517983058e-06,
      "loss": 0.7420564651489258,
      "memory(GiB)": 74.59,
      "step": 180,
      "token_acc": 0.7646754857379082,
      "train_speed(iter/s)": 0.017128
    },
    {
      "epoch": 0.08920855287000641,
      "grad_norm": 2.6313562393188477,
      "learning_rate": 9.958815111237401e-06,
      "loss": 0.7337265968322754,
      "memory(GiB)": 74.59,
      "step": 200,
      "token_acc": 0.7839015151515152,
      "train_speed(iter/s)": 0.017152
    },
    {
      "epoch": 0.09812940815700705,
      "grad_norm": 2.565857172012329,
      "learning_rate": 9.937746927275914e-06,
      "loss": 0.7408348083496094,
      "memory(GiB)": 74.59,
      "step": 220,
      "token_acc": 0.7705133830627469,
      "train_speed(iter/s)": 0.017151
    },
    {
      "epoch": 0.1070502634440077,
      "grad_norm": 3.0658340454101562,
      "learning_rate": 9.912374331991168e-06,
      "loss": 0.7453395843505859,
      "memory(GiB)": 74.59,
      "step": 240,
      "token_acc": 0.8014513598326359,
      "train_speed(iter/s)": 0.017149
    },
    {
      "epoch": 0.11597111873100834,
      "grad_norm": 2.4914870262145996,
      "learning_rate": 9.882719443585664e-06,
      "loss": 0.742877197265625,
      "memory(GiB)": 74.59,
      "step": 260,
      "token_acc": 0.79041248606466,
      "train_speed(iter/s)": 0.017153
    },
    {
      "epoch": 0.12489197401800897,
      "grad_norm": 2.507567882537842,
      "learning_rate": 9.848808113290609e-06,
      "loss": 0.7456643104553222,
      "memory(GiB)": 74.59,
      "step": 280,
      "token_acc": 0.774718264937274,
      "train_speed(iter/s)": 0.017161
    },
    {
      "epoch": 0.13381282930500962,
      "grad_norm": 2.3153159618377686,
      "learning_rate": 9.810669902830456e-06,
      "loss": 0.7467074871063233,
      "memory(GiB)": 74.59,
      "step": 300,
      "token_acc": 0.7635281385281385,
      "train_speed(iter/s)": 0.017162
    },
    {
      "epoch": 0.14273368459201025,
      "grad_norm": 2.468867540359497,
      "learning_rate": 9.768338058652905e-06,
      "loss": 0.7267765045166016,
      "memory(GiB)": 74.59,
      "step": 320,
      "token_acc": 0.7718188485515218,
      "train_speed(iter/s)": 0.017169
    },
    {
      "epoch": 0.1516545398790109,
      "grad_norm": 2.796816349029541,
      "learning_rate": 9.721849482946765e-06,
      "loss": 0.7359182357788085,
      "memory(GiB)": 74.59,
      "step": 340,
      "token_acc": 0.7720618137454249,
      "train_speed(iter/s)": 0.017178
    },
    {
      "epoch": 0.16057539516601155,
      "grad_norm": 2.883291721343994,
      "learning_rate": 9.671244701472999e-06,
      "loss": 0.7352351665496826,
      "memory(GiB)": 74.59,
      "step": 360,
      "token_acc": 0.7737668544120156,
      "train_speed(iter/s)": 0.017178
    },
    {
      "epoch": 0.16949625045301217,
      "grad_norm": 2.4550609588623047,
      "learning_rate": 9.616567828236964e-06,
      "loss": 0.7373301029205322,
      "memory(GiB)": 74.59,
      "step": 380,
      "token_acc": 0.7750453219831324,
      "train_speed(iter/s)": 0.017185
    },
    {
      "epoch": 0.17841710574001282,
      "grad_norm": 2.1687684059143066,
      "learning_rate": 9.55786652703264e-06,
      "loss": 0.733797550201416,
      "memory(GiB)": 74.59,
      "step": 400,
      "token_acc": 0.7664166067207893,
      "train_speed(iter/s)": 0.017194
    },
    {
      "epoch": 0.18733796102701347,
      "grad_norm": 2.539926528930664,
      "learning_rate": 9.495191969892398e-06,
      "loss": 0.7333598136901855,
      "memory(GiB)": 74.59,
      "step": 420,
      "token_acc": 0.7851207729468599,
      "train_speed(iter/s)": 0.0172
    },
    {
      "epoch": 0.1962588163140141,
      "grad_norm": 2.5285987854003906,
      "learning_rate": 9.428598792478494e-06,
      "loss": 0.7284451961517334,
      "memory(GiB)": 74.59,
      "step": 440,
      "token_acc": 0.7811644805133529,
      "train_speed(iter/s)": 0.017208
    },
    {
      "epoch": 0.20517967160101475,
      "grad_norm": 2.7813446521759033,
      "learning_rate": 9.358145046455208e-06,
      "loss": 0.7185512542724609,
      "memory(GiB)": 74.59,
      "step": 460,
      "token_acc": 0.7957014367480435,
      "train_speed(iter/s)": 0.017218
    },
    {
      "epoch": 0.2141005268880154,
      "grad_norm": 2.618666410446167,
      "learning_rate": 9.283892148883114e-06,
      "loss": 0.7193053245544434,
      "memory(GiB)": 74.59,
      "step": 480,
      "token_acc": 0.7836716874044969,
      "train_speed(iter/s)": 0.017221
    },
    {
      "epoch": 0.22302138217501602,
      "grad_norm": 2.6849210262298584,
      "learning_rate": 9.205904828679634e-06,
      "loss": 0.7396815299987793,
      "memory(GiB)": 74.59,
      "step": 500,
      "token_acc": 0.763501989766913,
      "train_speed(iter/s)": 0.017212
    },
    {
      "epoch": 0.23194223746201667,
      "grad_norm": 2.95625376701355,
      "learning_rate": 9.124251070192508e-06,
      "loss": 0.732808780670166,
      "memory(GiB)": 74.59,
      "step": 520,
      "token_acc": 0.7877953813104189,
      "train_speed(iter/s)": 0.017216
    },
    {
      "epoch": 0.24086309274901732,
      "grad_norm": 2.5588300228118896,
      "learning_rate": 9.039002053935399e-06,
      "loss": 0.7411352634429932,
      "memory(GiB)": 74.59,
      "step": 540,
      "token_acc": 0.7754504710509467,
      "train_speed(iter/s)": 0.017214
    },
    {
      "epoch": 0.24978394803601794,
      "grad_norm": 2.4380648136138916,
      "learning_rate": 8.95023209453728e-06,
      "loss": 0.7271821022033691,
      "memory(GiB)": 74.59,
      "step": 560,
      "token_acc": 0.7896144622763328,
      "train_speed(iter/s)": 0.017216
    },
    {
      "epoch": 0.2587048033230186,
      "grad_norm": 2.760838031768799,
      "learning_rate": 8.858018575959709e-06,
      "loss": 0.7257913589477539,
      "memory(GiB)": 74.59,
      "step": 580,
      "token_acc": 0.7805106382978724,
      "train_speed(iter/s)": 0.017218
    },
    {
      "epoch": 0.26762565861001925,
      "grad_norm": 2.3860268592834473,
      "learning_rate": 8.762441884038448e-06,
      "loss": 0.7130424976348877,
      "memory(GiB)": 74.59,
      "step": 600,
      "token_acc": 0.7727931769722814,
      "train_speed(iter/s)": 0.017222
    },
    {
      "epoch": 0.2765465138970199,
      "grad_norm": 2.5377020835876465,
      "learning_rate": 8.663585336408239e-06,
      "loss": 0.7241259574890136,
      "memory(GiB)": 74.59,
      "step": 620,
      "token_acc": 0.7707096409386486,
      "train_speed(iter/s)": 0.017224
    },
    {
      "epoch": 0.2854673691840205,
      "grad_norm": 2.703711748123169,
      "learning_rate": 8.561535109871837e-06,
      "loss": 0.7184478282928467,
      "memory(GiB)": 74.59,
      "step": 640,
      "token_acc": 0.7741780657547396,
      "train_speed(iter/s)": 0.017226
    },
    {
      "epoch": 0.29438822447102114,
      "grad_norm": 2.6190695762634277,
      "learning_rate": 8.45638016527659e-06,
      "loss": 0.7167672157287598,
      "memory(GiB)": 74.59,
      "step": 660,
      "token_acc": 0.7801072180739039,
      "train_speed(iter/s)": 0.017229
    },
    {
      "epoch": 0.3033090797580218,
      "grad_norm": 2.3281748294830322,
      "learning_rate": 8.348212169964076e-06,
      "loss": 0.7208720207214355,
      "memory(GiB)": 74.59,
      "step": 680,
      "token_acc": 0.7777380420205633,
      "train_speed(iter/s)": 0.017229
    },
    {
      "epoch": 0.31222993504502244,
      "grad_norm": 2.7742199897766113,
      "learning_rate": 8.237125417860392e-06,
      "loss": 0.7217154979705811,
      "memory(GiB)": 74.59,
      "step": 700,
      "token_acc": 0.7793293242279075,
      "train_speed(iter/s)": 0.017231
    },
    {
      "epoch": 0.3211507903320231,
      "grad_norm": 2.636087417602539,
      "learning_rate": 8.123216747276745e-06,
      "loss": 0.7159500122070312,
      "memory(GiB)": 74.59,
      "step": 720,
      "token_acc": 0.7801306973281495,
      "train_speed(iter/s)": 0.017233
    },
    {
      "epoch": 0.33007164561902375,
      "grad_norm": 2.3711416721343994,
      "learning_rate": 8.00658545649203e-06,
      "loss": 0.7088687896728516,
      "memory(GiB)": 74.59,
      "step": 740,
      "token_acc": 0.7677587706581618,
      "train_speed(iter/s)": 0.017233
    },
    {
      "epoch": 0.33899250090602434,
      "grad_norm": 2.356473684310913,
      "learning_rate": 7.88733321719094e-06,
      "loss": 0.7173014640808105,
      "memory(GiB)": 74.59,
      "step": 760,
      "token_acc": 0.7865473998070683,
      "train_speed(iter/s)": 0.017234
    },
    {
      "epoch": 0.347913356193025,
      "grad_norm": 2.4541633129119873,
      "learning_rate": 7.76556398583312e-06,
      "loss": 0.7031930923461914,
      "memory(GiB)": 74.59,
      "step": 780,
      "token_acc": 0.7820739488046737,
      "train_speed(iter/s)": 0.017236
    },
    {
      "epoch": 0.35683421148002564,
      "grad_norm": 2.2740061283111572,
      "learning_rate": 7.64138391303058e-06,
      "loss": 0.7068012237548829,
      "memory(GiB)": 74.59,
      "step": 800,
      "token_acc": 0.7922303473491773,
      "train_speed(iter/s)": 0.017237
    },
    {
      "epoch": 0.3657550667670263,
      "grad_norm": 2.5219359397888184,
      "learning_rate": 7.514901251012394e-06,
      "loss": 0.7092500686645508,
      "memory(GiB)": 74.59,
      "step": 820,
      "token_acc": 0.7766763303095483,
      "train_speed(iter/s)": 0.017235
    },
    {
      "epoch": 0.37467592205402694,
      "grad_norm": 2.4455716609954834,
      "learning_rate": 7.386226259257352e-06,
      "loss": 0.7173641681671142,
      "memory(GiB)": 74.59,
      "step": 840,
      "token_acc": 0.779245283018868,
      "train_speed(iter/s)": 0.017234
    },
    {
      "epoch": 0.3835967773410276,
      "grad_norm": 2.293743133544922,
      "learning_rate": 7.255471108376806e-06,
      "loss": 0.7066821575164794,
      "memory(GiB)": 74.59,
      "step": 860,
      "token_acc": 0.7943990018023014,
      "train_speed(iter/s)": 0.017238
    },
    {
      "epoch": 0.3925176326280282,
      "grad_norm": 2.494844675064087,
      "learning_rate": 7.122749782331519e-06,
      "loss": 0.7019121646881104,
      "memory(GiB)": 74.59,
      "step": 880,
      "token_acc": 0.7879621142682555,
      "train_speed(iter/s)": 0.017239
    },
    {
      "epoch": 0.40143848791502884,
      "grad_norm": 2.6380198001861572,
      "learning_rate": 6.9881779790677585e-06,
      "loss": 0.7143388748168945,
      "memory(GiB)": 74.59,
      "step": 900,
      "token_acc": 0.7860368607347736,
      "train_speed(iter/s)": 0.01724
    },
    {
      "epoch": 0.4103593432020295,
      "grad_norm": 2.755938768386841,
      "learning_rate": 6.851873009659234e-06,
      "loss": 0.6966420173645019,
      "memory(GiB)": 74.59,
      "step": 920,
      "token_acc": 0.8006147975408099,
      "train_speed(iter/s)": 0.017241
    },
    {
      "epoch": 0.41928019848903014,
      "grad_norm": 2.870432138442993,
      "learning_rate": 6.713953696042834e-06,
      "loss": 0.7180665969848633,
      "memory(GiB)": 74.59,
      "step": 940,
      "token_acc": 0.7882692908466105,
      "train_speed(iter/s)": 0.01724
    },
    {
      "epoch": 0.4282010537760308,
      "grad_norm": 2.570157051086426,
      "learning_rate": 6.5745402674372595e-06,
      "loss": 0.6956540107727051,
      "memory(GiB)": 74.59,
      "step": 960,
      "token_acc": 0.7983656792645557,
      "train_speed(iter/s)": 0.01724
    },
    {
      "epoch": 0.43712190906303144,
      "grad_norm": 2.6481871604919434,
      "learning_rate": 6.433754255534907e-06,
      "loss": 0.7040743827819824,
      "memory(GiB)": 74.59,
      "step": 980,
      "token_acc": 0.774559686888454,
      "train_speed(iter/s)": 0.017244
    },
    {
      "epoch": 0.44604276435003204,
      "grad_norm": 2.631838083267212,
      "learning_rate": 6.291718388558332e-06,
      "loss": 0.7015031814575196,
      "memory(GiB)": 74.59,
      "step": 1000,
      "token_acc": 0.7842915811088296,
      "train_speed(iter/s)": 0.017246
    },
    {
      "epoch": 0.4549636196370327,
      "grad_norm": 3.1000046730041504,
      "learning_rate": 6.148556484273636e-06,
      "loss": 0.7171984672546386,
      "memory(GiB)": 74.59,
      "step": 1020,
      "token_acc": 0.768834302006799,
      "train_speed(iter/s)": 0.017244
    },
    {
      "epoch": 0.46388447492403334,
      "grad_norm": 2.4541211128234863,
      "learning_rate": 6.004393342054082e-06,
      "loss": 0.7030771255493165,
      "memory(GiB)": 74.59,
      "step": 1040,
      "token_acc": 0.8046032357473035,
      "train_speed(iter/s)": 0.017246
    },
    {
      "epoch": 0.472805330211034,
      "grad_norm": 2.3521018028259277,
      "learning_rate": 5.859354634087999e-06,
      "loss": 0.7004799365997314,
      "memory(GiB)": 74.59,
      "step": 1060,
      "token_acc": 0.79979035639413,
      "train_speed(iter/s)": 0.017246
    },
    {
      "epoch": 0.48172618549803464,
      "grad_norm": 2.619112968444824,
      "learning_rate": 5.713566795825825e-06,
      "loss": 0.7009503364562988,
      "memory(GiB)": 74.59,
      "step": 1080,
      "token_acc": 0.7849806364707863,
      "train_speed(iter/s)": 0.017245
    },
    {
      "epoch": 0.49064704078503524,
      "grad_norm": 2.114140033721924,
      "learning_rate": 5.5671569157617735e-06,
      "loss": 0.6934898376464844,
      "memory(GiB)": 74.59,
      "step": 1100,
      "token_acc": 0.8063559714629853,
      "train_speed(iter/s)": 0.017245
    },
    {
      "epoch": 0.4995678960720359,
      "grad_norm": 2.079798460006714,
      "learning_rate": 5.420252624646238e-06,
      "loss": 0.6923507213592529,
      "memory(GiB)": 74.59,
      "step": 1120,
      "token_acc": 0.7771432922186691,
      "train_speed(iter/s)": 0.017245
    },
    {
      "epoch": 0.5084887513590366,
      "grad_norm": 2.520137310028076,
      "learning_rate": 5.2729819842254844e-06,
      "loss": 0.6845778942108154,
      "memory(GiB)": 74.59,
      "step": 1140,
      "token_acc": 0.7924451665312754,
      "train_speed(iter/s)": 0.017244
    },
    {
      "epoch": 0.5174096066460372,
      "grad_norm": 2.4338433742523193,
      "learning_rate": 5.125473375605621e-06,
      "loss": 0.6988918304443359,
      "memory(GiB)": 74.59,
      "step": 1160,
      "token_acc": 0.7706018518518518,
      "train_speed(iter/s)": 0.017244
    },
    {
      "epoch": 0.5263304619330378,
      "grad_norm": 2.465174674987793,
      "learning_rate": 4.977855387338179e-06,
      "loss": 0.6776053428649902,
      "memory(GiB)": 74.59,
      "step": 1180,
      "token_acc": 0.7994680235919972,
      "train_speed(iter/s)": 0.017244
    },
    {
      "epoch": 0.5352513172200385,
      "grad_norm": 2.223647356033325,
      "learning_rate": 4.8302567033248646e-06,
      "loss": 0.6847808837890625,
      "memory(GiB)": 74.59,
      "step": 1200,
      "token_acc": 0.7837174309020353,
      "train_speed(iter/s)": 0.017242
    },
    {
      "epoch": 0.5441721725070391,
      "grad_norm": 2.47381854057312,
      "learning_rate": 4.6828059906391645e-06,
      "loss": 0.7015344619750976,
      "memory(GiB)": 74.59,
      "step": 1220,
      "token_acc": 0.7903538536112458,
      "train_speed(iter/s)": 0.017242
    },
    {
      "epoch": 0.5530930277940398,
      "grad_norm": 2.184150218963623,
      "learning_rate": 4.53563178736265e-06,
      "loss": 0.7034894943237304,
      "memory(GiB)": 74.59,
      "step": 1240,
      "token_acc": 0.7847053630545133,
      "train_speed(iter/s)": 0.01724
    },
    {
      "epoch": 0.5620138830810404,
      "grad_norm": 2.5575106143951416,
      "learning_rate": 4.388862390533707e-06,
      "loss": 0.6856672763824463,
      "memory(GiB)": 74.59,
      "step": 1260,
      "token_acc": 0.7924824752235919,
      "train_speed(iter/s)": 0.017239
    },
    {
      "epoch": 0.570934738368041,
      "grad_norm": 2.6725082397460938,
      "learning_rate": 4.242625744306403e-06,
      "loss": 0.6775306701660156,
      "memory(GiB)": 74.59,
      "step": 1280,
      "token_acc": 0.8006240585323865,
      "train_speed(iter/s)": 0.017237
    },
    {
      "epoch": 0.5798555936550417,
      "grad_norm": 2.3536887168884277,
      "learning_rate": 4.097049328416979e-06,
      "loss": 0.6819396018981934,
      "memory(GiB)": 74.59,
      "step": 1300,
      "token_acc": 0.8047998230479982,
      "train_speed(iter/s)": 0.017236
    },
    {
      "epoch": 0.5887764489420423,
      "grad_norm": 2.3979926109313965,
      "learning_rate": 3.952260047055173e-06,
      "loss": 0.668601131439209,
      "memory(GiB)": 74.59,
      "step": 1320,
      "token_acc": 0.7856453946310883,
      "train_speed(iter/s)": 0.017236
    },
    {
      "epoch": 0.597697304229043,
      "grad_norm": 2.5855495929718018,
      "learning_rate": 3.8083841182372938e-06,
      "loss": 0.6772181034088135,
      "memory(GiB)": 74.59,
      "step": 1340,
      "token_acc": 0.7722038739623315,
      "train_speed(iter/s)": 0.017236
    },
    {
      "epoch": 0.6066181595160436,
      "grad_norm": 2.363443374633789,
      "learning_rate": 3.665546963777432e-06,
      "loss": 0.6870659351348877,
      "memory(GiB)": 74.59,
      "step": 1360,
      "token_acc": 0.7813822284908322,
      "train_speed(iter/s)": 0.017234
    },
    {
      "epoch": 0.6155390148030443,
      "grad_norm": 2.097912311553955,
      "learning_rate": 3.5238730999527683e-06,
      "loss": 0.6948359489440918,
      "memory(GiB)": 74.59,
      "step": 1380,
      "token_acc": 0.7918085319447226,
      "train_speed(iter/s)": 0.017234
    },
    {
      "epoch": 0.6244598700900449,
      "grad_norm": 2.7795698642730713,
      "learning_rate": 3.383486028958258e-06,
      "loss": 0.677157974243164,
      "memory(GiB)": 74.59,
      "step": 1400,
      "token_acc": 0.8044161035500142,
      "train_speed(iter/s)": 0.017235
    },
    {
      "epoch": 0.6333807253770455,
      "grad_norm": 2.392015218734741,
      "learning_rate": 3.244508131245336e-06,
      "loss": 0.6798694610595704,
      "memory(GiB)": 74.59,
      "step": 1420,
      "token_acc": 0.7972983228352433,
      "train_speed(iter/s)": 0.017232
    },
    {
      "epoch": 0.6423015806640462,
      "grad_norm": 2.016077756881714,
      "learning_rate": 3.107060558838479e-06,
      "loss": 0.684450912475586,
      "memory(GiB)": 74.59,
      "step": 1440,
      "token_acc": 0.7807692307692308,
      "train_speed(iter/s)": 0.017232
    },
    {
      "epoch": 0.6512224359510468,
      "grad_norm": 2.5572736263275146,
      "learning_rate": 2.971263129722637e-06,
      "loss": 0.6833638191223145,
      "memory(GiB)": 74.59,
      "step": 1460,
      "token_acc": 0.7776776940714052,
      "train_speed(iter/s)": 0.017231
    },
    {
      "epoch": 0.6601432912380475,
      "grad_norm": 2.6139795780181885,
      "learning_rate": 2.8372342233935957e-06,
      "loss": 0.6775733470916748,
      "memory(GiB)": 74.59,
      "step": 1480,
      "token_acc": 0.7752475247524753,
      "train_speed(iter/s)": 0.017232
    },
    {
      "epoch": 0.6690641465250481,
      "grad_norm": 2.252291202545166,
      "learning_rate": 2.705090677662311e-06,
      "loss": 0.679774284362793,
      "memory(GiB)": 74.59,
      "step": 1500,
      "token_acc": 0.7821614169052922,
      "train_speed(iter/s)": 0.017233
    },
    {
      "epoch": 0.6779850018120487,
      "grad_norm": 2.56968092918396,
      "learning_rate": 2.5749476868032025e-06,
      "loss": 0.6825155258178711,
      "memory(GiB)": 74.59,
      "step": 1520,
      "token_acc": 0.7915250121418164,
      "train_speed(iter/s)": 0.017235
    },
    {
      "epoch": 0.6869058570990494,
      "grad_norm": 2.648949384689331,
      "learning_rate": 2.4469187011351557e-06,
      "loss": 0.6678467273712159,
      "memory(GiB)": 74.59,
      "step": 1540,
      "token_acc": 0.7935873796353207,
      "train_speed(iter/s)": 0.017236
    },
    {
      "epoch": 0.69582671238605,
      "grad_norm": 2.342515230178833,
      "learning_rate": 2.321115328122821e-06,
      "loss": 0.7060600757598877,
      "memory(GiB)": 74.59,
      "step": 1560,
      "token_acc": 0.776742892889414,
      "train_speed(iter/s)": 0.017234
    },
    {
      "epoch": 0.7047475676730507,
      "grad_norm": 2.6851727962493896,
      "learning_rate": 2.197647235084362e-06,
      "loss": 0.6798261165618896,
      "memory(GiB)": 74.59,
      "step": 1580,
      "token_acc": 0.7924853498793519,
      "train_speed(iter/s)": 0.017234
    },
    {
      "epoch": 0.7136684229600513,
      "grad_norm": 2.3479652404785156,
      "learning_rate": 2.076622053590525e-06,
      "loss": 0.676541519165039,
      "memory(GiB)": 74.59,
      "step": 1600,
      "token_acc": 0.7941240955930717,
      "train_speed(iter/s)": 0.017235
    },
    {
      "epoch": 0.722589278247052,
      "grad_norm": 2.496194362640381,
      "learning_rate": 1.9581452856383344e-06,
      "loss": 0.6596598148345947,
      "memory(GiB)": 74.59,
      "step": 1620,
      "token_acc": 0.7876830710452106,
      "train_speed(iter/s)": 0.017235
    },
    {
      "epoch": 0.7315101335340526,
      "grad_norm": 2.547659158706665,
      "learning_rate": 1.8423202116812107e-06,
      "loss": 0.6647954940795898,
      "memory(GiB)": 74.59,
      "step": 1640,
      "token_acc": 0.8036138358286009,
      "train_speed(iter/s)": 0.017236
    },
    {
      "epoch": 0.7404309888210532,
      "grad_norm": 2.7125566005706787,
      "learning_rate": 1.7292478005956847e-06,
      "loss": 0.6651457786560059,
      "memory(GiB)": 74.59,
      "step": 1660,
      "token_acc": 0.7850253993963042,
      "train_speed(iter/s)": 0.017238
    },
    {
      "epoch": 0.7493518441080539,
      "grad_norm": 2.4441261291503906,
      "learning_rate": 1.619026621663215e-06,
      "loss": 0.6728306293487549,
      "memory(GiB)": 74.59,
      "step": 1680,
      "token_acc": 0.795984555984556,
      "train_speed(iter/s)": 0.017238
    },
    {
      "epoch": 0.7582726993950545,
      "grad_norm": 2.6311211585998535,
      "learning_rate": 1.511752758643789e-06,
      "loss": 0.6621832847595215,
      "memory(GiB)": 74.59,
      "step": 1700,
      "token_acc": 0.7816987329892069,
      "train_speed(iter/s)": 0.01724
    },
    {
      "epoch": 0.7671935546820552,
      "grad_norm": 2.279157876968384,
      "learning_rate": 1.4075197260162737e-06,
      "loss": 0.6692928314208985,
      "memory(GiB)": 74.59,
      "step": 1720,
      "token_acc": 0.7925182481751825,
      "train_speed(iter/s)": 0.01724
    },
    {
      "epoch": 0.7761144099690558,
      "grad_norm": 2.5061519145965576,
      "learning_rate": 1.3064183874584807e-06,
      "loss": 0.6805558681488038,
      "memory(GiB)": 74.59,
      "step": 1740,
      "token_acc": 0.794682422451994,
      "train_speed(iter/s)": 0.01724
    },
    {
      "epoch": 0.7850352652560564,
      "grad_norm": 2.36961030960083,
      "learning_rate": 1.2085368766380245e-06,
      "loss": 0.6658955574035644,
      "memory(GiB)": 74.59,
      "step": 1760,
      "token_acc": 0.796866150651312,
      "train_speed(iter/s)": 0.017241
    },
    {
      "epoch": 0.7939561205430571,
      "grad_norm": 2.598195791244507,
      "learning_rate": 1.113960520383059e-06,
      "loss": 0.6785523891448975,
      "memory(GiB)": 74.59,
      "step": 1780,
      "token_acc": 0.7914918528636941,
      "train_speed(iter/s)": 0.01724
    },
    {
      "epoch": 0.8028769758300577,
      "grad_norm": 2.394533634185791,
      "learning_rate": 1.0227717642998035e-06,
      "loss": 0.6734235763549805,
      "memory(GiB)": 74.59,
      "step": 1800,
      "token_acc": 0.800065364817387,
      "train_speed(iter/s)": 0.01724
    },
    {
      "epoch": 0.8117978311170584,
      "grad_norm": 2.432565212249756,
      "learning_rate": 9.350501009017493e-07,
      "loss": 0.6695761203765869,
      "memory(GiB)": 74.59,
      "step": 1820,
      "token_acc": 0.805836667451165,
      "train_speed(iter/s)": 0.017241
    },
    {
      "epoch": 0.820718686404059,
      "grad_norm": 2.1419076919555664,
      "learning_rate": 8.508720003131837e-07,
      "loss": 0.6529228210449218,
      "memory(GiB)": 74.59,
      "step": 1840,
      "token_acc": 0.8018465909090909,
      "train_speed(iter/s)": 0.017241
    },
    {
      "epoch": 0.8296395416910596,
      "grad_norm": 2.3110287189483643,
      "learning_rate": 7.703108436074358e-07,
      "loss": 0.6693912506103515,
      "memory(GiB)": 74.59,
      "step": 1860,
      "token_acc": 0.8062661260597125,
      "train_speed(iter/s)": 0.01724
    },
    {
      "epoch": 0.8385603969780603,
      "grad_norm": 2.3294360637664795,
      "learning_rate": 6.934368588379553e-07,
      "loss": 0.6734672546386719,
      "memory(GiB)": 74.59,
      "step": 1880,
      "token_acc": 0.7894939018534524,
      "train_speed(iter/s)": 0.017241
    },
    {
      "epoch": 0.8474812522650609,
      "grad_norm": 2.6479485034942627,
      "learning_rate": 6.203170598180025e-07,
      "loss": 0.6656166553497315,
      "memory(GiB)": 74.59,
      "step": 1900,
      "token_acc": 0.790343791095247,
      "train_speed(iter/s)": 0.017243
    },
    {
      "epoch": 0.8564021075520616,
      "grad_norm": 2.2192885875701904,
      "learning_rate": 5.510151877022968e-07,
      "loss": 0.6645929336547851,
      "memory(GiB)": 74.59,
      "step": 1920,
      "token_acc": 0.79801179812104,
      "train_speed(iter/s)": 0.017245
    },
    {
      "epoch": 0.8653229628390622,
      "grad_norm": 2.6121439933776855,
      "learning_rate": 4.855916554215617e-07,
      "loss": 0.6703991889953613,
      "memory(GiB)": 74.59,
      "step": 1940,
      "token_acc": 0.7830651989839119,
      "train_speed(iter/s)": 0.017244
    },
    {
      "epoch": 0.8742438181260629,
      "grad_norm": 2.3732004165649414,
      "learning_rate": 4.2410349501840975e-07,
      "loss": 0.6532537460327148,
      "memory(GiB)": 74.59,
      "step": 1960,
      "token_acc": 0.8027484143763214,
      "train_speed(iter/s)": 0.017246
    },
    {
      "epoch": 0.8831646734130635,
      "grad_norm": 2.129939556121826,
      "learning_rate": 3.666043079304571e-07,
      "loss": 0.6688889026641845,
      "memory(GiB)": 74.59,
      "step": 1980,
      "token_acc": 0.8005243088655862,
      "train_speed(iter/s)": 0.017248
    },
    {
      "epoch": 0.8920855287000641,
      "grad_norm": 2.2744195461273193,
      "learning_rate": 3.131442182640254e-07,
      "loss": 0.6637560844421386,
      "memory(GiB)": 74.59,
      "step": 2000,
      "token_acc": 0.8057661705513433,
      "train_speed(iter/s)": 0.017249
    },
    {
      "epoch": 0.9010063839870648,
      "grad_norm": 2.331909656524658,
      "learning_rate": 2.637698290991636e-07,
      "loss": 0.6683703899383545,
      "memory(GiB)": 74.59,
      "step": 2020,
      "token_acc": 0.8082558970693352,
      "train_speed(iter/s)": 0.017249
    },
    {
      "epoch": 0.9099272392740654,
      "grad_norm": 2.3515868186950684,
      "learning_rate": 2.1852418186406154e-07,
      "loss": 0.6710467338562012,
      "memory(GiB)": 74.59,
      "step": 2040,
      "token_acc": 0.7927631578947368,
      "train_speed(iter/s)": 0.01725
    },
    {
      "epoch": 0.9188480945610661,
      "grad_norm": 2.2498676776885986,
      "learning_rate": 1.774467188142992e-07,
      "loss": 0.6556891918182373,
      "memory(GiB)": 74.59,
      "step": 2060,
      "token_acc": 0.8043921873481683,
      "train_speed(iter/s)": 0.017249
    },
    {
      "epoch": 0.9277689498480667,
      "grad_norm": 2.2811636924743652,
      "learning_rate": 1.4057324864960976e-07,
      "loss": 0.6625730991363525,
      "memory(GiB)": 74.59,
      "step": 2080,
      "token_acc": 0.7793322734499205,
      "train_speed(iter/s)": 0.017249
    },
    {
      "epoch": 0.9366898051350673,
      "grad_norm": 2.5183255672454834,
      "learning_rate": 1.0793591529815295e-07,
      "loss": 0.6667513847351074,
      "memory(GiB)": 74.59,
      "step": 2100,
      "token_acc": 0.781785392245266,
      "train_speed(iter/s)": 0.017251
    },
    {
      "epoch": 0.945610660422068,
      "grad_norm": 2.268050193786621,
      "learning_rate": 7.956316989550073e-08,
      "loss": 0.6645870208740234,
      "memory(GiB)": 74.59,
      "step": 2120,
      "token_acc": 0.7865535248041775,
      "train_speed(iter/s)": 0.017251
    },
    {
      "epoch": 0.9545315157090686,
      "grad_norm": 2.458646535873413,
      "learning_rate": 5.547974598275552e-08,
      "loss": 0.6568970680236816,
      "memory(GiB)": 74.59,
      "step": 2140,
      "token_acc": 0.8039867109634552,
      "train_speed(iter/s)": 0.017252
    },
    {
      "epoch": 0.9634523709960693,
      "grad_norm": 2.647118091583252,
      "learning_rate": 3.570663794543683e-08,
      "loss": 0.6643817901611329,
      "memory(GiB)": 74.59,
      "step": 2160,
      "token_acc": 0.7905033063923586,
      "train_speed(iter/s)": 0.017252
    },
    {
      "epoch": 0.9723732262830699,
      "grad_norm": 2.5381767749786377,
      "learning_rate": 2.026108271192373e-08,
      "loss": 0.6675143241882324,
      "memory(GiB)": 74.59,
      "step": 2180,
      "token_acc": 0.8054619826756496,
      "train_speed(iter/s)": 0.017251
    },
    {
      "epoch": 0.9812940815700705,
      "grad_norm": 2.5388898849487305,
      "learning_rate": 9.156544727400574e-09,
      "loss": 0.6596790313720703,
      "memory(GiB)": 74.59,
      "step": 2200,
      "token_acc": 0.8080402010050252,
      "train_speed(iter/s)": 0.017253
    },
    {
      "epoch": 0.9902149368570712,
      "grad_norm": 2.4086954593658447,
      "learning_rate": 2.40270421641986e-09,
      "loss": 0.6641056060791015,
      "memory(GiB)": 74.59,
      "step": 2220,
      "token_acc": 0.796367217419849,
      "train_speed(iter/s)": 0.017252
    },
    {
      "epoch": 0.9991357921440718,
      "grad_norm": 2.0631699562072754,
      "learning_rate": 5.448744302971598e-12,
      "loss": 0.6517824649810791,
      "memory(GiB)": 74.59,
      "step": 2240,
      "token_acc": 0.8053242223645811,
      "train_speed(iter/s)": 0.017254
    },
    {
      "epoch": 0.9995818349084219,
      "eval_loss": 0.6916317343711853,
      "eval_runtime": 450.3834,
      "eval_samples_per_second": 6.432,
      "eval_steps_per_second": 0.806,
      "eval_token_acc": 0.3993507138176836,
      "step": 2241
    },
    {
      "epoch": 0.9995818349084219,
      "eval_loss": 0.6915839910507202,
      "eval_runtime": 441.7249,
      "eval_samples_per_second": 6.558,
      "eval_steps_per_second": 0.822,
      "step": 2241
    }
  ],
  "logging_steps": 20,
  "max_steps": 2241,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 1000.0,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 9.816735874758266e+19,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}
