Instructions to use khhuang/zerofec-qa2claim-t5-base with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use khhuang/zerofec-qa2claim-t5-base with Transformers:
# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("khhuang/zerofec-qa2claim-t5-base") model = AutoModelForSeq2SeqLM.from_pretrained("khhuang/zerofec-qa2claim-t5-base") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 92.6157, | |
| "best_model_checkpoint": "qa2claim-base/checkpoint-12000", | |
| "epoch": 1.5512736773350753, | |
| "global_step": 38000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 3.2255, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.8800000000000005e-06, | |
| "loss": 3.2465, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.182e-05, | |
| "loss": 2.6401, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.782e-05, | |
| "loss": 2.2893, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.3820000000000002e-05, | |
| "loss": 2.1163, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.982e-05, | |
| "loss": 2.0047, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.997105527638191e-05, | |
| "loss": 1.9368, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.9940904522613068e-05, | |
| "loss": 1.9029, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.9910753768844223e-05, | |
| "loss": 1.8627, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.988090452261307e-05, | |
| "loss": 1.8403, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.9850753768844224e-05, | |
| "loss": 1.835, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.9820603015075376e-05, | |
| "loss": 1.8027, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.979045226130653e-05, | |
| "loss": 1.8176, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.976030150753769e-05, | |
| "loss": 1.7983, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.9730150753768845e-05, | |
| "loss": 1.778, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.97e-05, | |
| "loss": 1.7764, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.9669849246231156e-05, | |
| "loss": 1.7599, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.9639698492462314e-05, | |
| "loss": 1.7572, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.960954773869347e-05, | |
| "loss": 1.7773, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.9579396984924625e-05, | |
| "loss": 1.7301, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.954924623115578e-05, | |
| "loss": 1.7536, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_bleu": 75.15293608723576, | |
| "eval_gen_len": 16.288, | |
| "eval_loss": 1.6770071983337402, | |
| "eval_meteor": 0.8848959776023978, | |
| "eval_rouge1": 92.6205, | |
| "eval_rouge2": 86.7136, | |
| "eval_rougeL": 89.2742, | |
| "eval_rougeLsum": 89.2914, | |
| "eval_runtime": 29.1055, | |
| "eval_samples_per_second": 17.179, | |
| "eval_steps_per_second": 2.165, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.951909547738694e-05, | |
| "loss": 1.7313, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.948894472361809e-05, | |
| "loss": 1.7308, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.9458793969849246e-05, | |
| "loss": 1.7186, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.94286432160804e-05, | |
| "loss": 1.7262, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.9398492462311556e-05, | |
| "loss": 1.702, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.9368341708542715e-05, | |
| "loss": 1.7107, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.933819095477387e-05, | |
| "loss": 1.7187, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.9308040201005025e-05, | |
| "loss": 1.707, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.927788944723618e-05, | |
| "loss": 1.7019, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.924773869346734e-05, | |
| "loss": 1.6977, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.9217587939698495e-05, | |
| "loss": 1.7047, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.918743718592965e-05, | |
| "loss": 1.6943, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.9157286432160802e-05, | |
| "loss": 1.6795, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.912713567839196e-05, | |
| "loss": 1.6781, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.9096984924623116e-05, | |
| "loss": 1.7098, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.906683417085427e-05, | |
| "loss": 1.6774, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.9036683417085426e-05, | |
| "loss": 1.6911, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.9006532663316585e-05, | |
| "loss": 1.6933, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.8976683417085427e-05, | |
| "loss": 1.6742, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.8946532663316586e-05, | |
| "loss": 1.6804, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_bleu": 77.38072312145013, | |
| "eval_gen_len": 16.306, | |
| "eval_loss": 1.6234748363494873, | |
| "eval_meteor": 0.892630886417938, | |
| "eval_rouge1": 93.2399, | |
| "eval_rouge2": 88.3086, | |
| "eval_rougeL": 90.3444, | |
| "eval_rougeLsum": 90.3735, | |
| "eval_runtime": 25.4106, | |
| "eval_samples_per_second": 19.677, | |
| "eval_steps_per_second": 2.479, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.891638190954774e-05, | |
| "loss": 1.673, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8886231155778896e-05, | |
| "loss": 1.6754, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.885608040201005e-05, | |
| "loss": 1.6668, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.8825929648241207e-05, | |
| "loss": 1.6759, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.8795778894472362e-05, | |
| "loss": 1.6526, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.8765628140703517e-05, | |
| "loss": 1.6654, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.8735477386934672e-05, | |
| "loss": 1.6466, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.870532663316583e-05, | |
| "loss": 1.6789, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.8675175879396986e-05, | |
| "loss": 1.6583, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.864502512562814e-05, | |
| "loss": 1.6639, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.8614874371859297e-05, | |
| "loss": 1.6464, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.8584723618090452e-05, | |
| "loss": 1.6654, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.8554874371859297e-05, | |
| "loss": 1.6364, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.8525025125628143e-05, | |
| "loss": 1.6325, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.8494874371859298e-05, | |
| "loss": 1.6496, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.846502512562814e-05, | |
| "loss": 1.6542, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.8434874371859295e-05, | |
| "loss": 1.6418, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.8404723618090454e-05, | |
| "loss": 1.6421, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.8374874371859296e-05, | |
| "loss": 1.6427, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.8344723618090454e-05, | |
| "loss": 1.6423, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_bleu": 78.69642597526826, | |
| "eval_gen_len": 16.314, | |
| "eval_loss": 1.593881607055664, | |
| "eval_meteor": 0.8979075563574866, | |
| "eval_rouge1": 93.7314, | |
| "eval_rouge2": 89.4698, | |
| "eval_rougeL": 91.3989, | |
| "eval_rougeLsum": 91.4076, | |
| "eval_runtime": 25.6849, | |
| "eval_samples_per_second": 19.467, | |
| "eval_steps_per_second": 2.453, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.831457286432161e-05, | |
| "loss": 1.6491, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.8284422110552765e-05, | |
| "loss": 1.6533, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.825427135678392e-05, | |
| "loss": 1.6501, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.8224120603015075e-05, | |
| "loss": 1.6463, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.8193969849246234e-05, | |
| "loss": 1.6341, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.816381909547739e-05, | |
| "loss": 1.6424, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.813366834170854e-05, | |
| "loss": 1.6399, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 2.8103517587939697e-05, | |
| "loss": 1.6362, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 2.8073366834170855e-05, | |
| "loss": 1.6357, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 2.804321608040201e-05, | |
| "loss": 1.6284, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 2.8013065326633166e-05, | |
| "loss": 1.6333, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 2.798291457286432e-05, | |
| "loss": 1.6414, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 2.795276381909548e-05, | |
| "loss": 1.6166, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 2.7922613065326635e-05, | |
| "loss": 1.6368, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 2.789246231155779e-05, | |
| "loss": 1.6313, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 2.7862311557788945e-05, | |
| "loss": 1.6241, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 2.7832160804020104e-05, | |
| "loss": 1.619, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 2.7802010050251256e-05, | |
| "loss": 1.6339, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 2.777185929648241e-05, | |
| "loss": 1.6152, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 2.7741708542713567e-05, | |
| "loss": 1.6253, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_bleu": 80.00978050192599, | |
| "eval_gen_len": 16.226, | |
| "eval_loss": 1.5748662948608398, | |
| "eval_meteor": 0.9021016869942787, | |
| "eval_rouge1": 94.2752, | |
| "eval_rouge2": 90.7004, | |
| "eval_rougeL": 92.2246, | |
| "eval_rougeLsum": 92.2489, | |
| "eval_runtime": 25.2021, | |
| "eval_samples_per_second": 19.84, | |
| "eval_steps_per_second": 2.5, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 2.7711557788944725e-05, | |
| "loss": 1.6291, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 2.768140703517588e-05, | |
| "loss": 1.6288, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 2.7651557788944726e-05, | |
| "loss": 1.6158, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 2.762140703517588e-05, | |
| "loss": 1.6285, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 2.7591256281407036e-05, | |
| "loss": 1.6231, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 2.756110552763819e-05, | |
| "loss": 1.6237, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.753095477386935e-05, | |
| "loss": 1.6059, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.7500804020100505e-05, | |
| "loss": 1.6094, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.7470954773869347e-05, | |
| "loss": 1.6092, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 2.744110552763819e-05, | |
| "loss": 1.6162, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 2.7410954773869348e-05, | |
| "loss": 1.6059, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 2.7380804020100503e-05, | |
| "loss": 1.6135, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 2.735065326633166e-05, | |
| "loss": 1.6144, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 2.7320804020100504e-05, | |
| "loss": 1.6288, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 2.729065326633166e-05, | |
| "loss": 1.6098, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 2.7260804020100504e-05, | |
| "loss": 1.617, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.723065326633166e-05, | |
| "loss": 1.6079, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.7200502512562815e-05, | |
| "loss": 1.611, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.7170351758793974e-05, | |
| "loss": 1.5935, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.7140201005025125e-05, | |
| "loss": 1.5965, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_bleu": 80.29588431741519, | |
| "eval_gen_len": 16.244, | |
| "eval_loss": 1.5651723146438599, | |
| "eval_meteor": 0.9028810347439424, | |
| "eval_rouge1": 94.3213, | |
| "eval_rouge2": 90.869, | |
| "eval_rougeL": 92.4221, | |
| "eval_rougeLsum": 92.4429, | |
| "eval_runtime": 25.551, | |
| "eval_samples_per_second": 19.569, | |
| "eval_steps_per_second": 2.466, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.711035175879397e-05, | |
| "loss": 1.611, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.7080201005025126e-05, | |
| "loss": 1.6066, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.705005025125628e-05, | |
| "loss": 1.6028, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.7019899497487437e-05, | |
| "loss": 1.6192, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.6989748743718595e-05, | |
| "loss": 1.6114, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.695959798994975e-05, | |
| "loss": 1.6043, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.6929447236180906e-05, | |
| "loss": 1.5949, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.689929648241206e-05, | |
| "loss": 1.6017, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.6869145728643216e-05, | |
| "loss": 1.6001, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.683929648241206e-05, | |
| "loss": 1.5983, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.6809145728643213e-05, | |
| "loss": 1.5914, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.6778994974874372e-05, | |
| "loss": 1.5997, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.6748844221105527e-05, | |
| "loss": 1.5962, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6718693467336683e-05, | |
| "loss": 1.5946, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6688542713567838e-05, | |
| "loss": 1.5969, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6658391959798997e-05, | |
| "loss": 1.6132, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6628241206030152e-05, | |
| "loss": 1.5893, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6598090452261307e-05, | |
| "loss": 1.6123, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.6567939698492462e-05, | |
| "loss": 1.5975, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.653778894472362e-05, | |
| "loss": 1.5908, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_bleu": 80.65979156040467, | |
| "eval_gen_len": 16.216, | |
| "eval_loss": 1.561901330947876, | |
| "eval_meteor": 0.9045236466427484, | |
| "eval_rouge1": 94.5279, | |
| "eval_rouge2": 91.2374, | |
| "eval_rougeL": 92.5949, | |
| "eval_rougeLsum": 92.6157, | |
| "eval_runtime": 25.5485, | |
| "eval_samples_per_second": 19.571, | |
| "eval_steps_per_second": 2.466, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.6508241206030153e-05, | |
| "loss": 1.6137, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6478090452261305e-05, | |
| "loss": 2.1155, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.644793969849246e-05, | |
| "loss": 2.6991, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.641778894472362e-05, | |
| "loss": 2.8329, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.6387638190954774e-05, | |
| "loss": 2.8749, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.635748743718593e-05, | |
| "loss": 2.8538, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.6327336683417085e-05, | |
| "loss": 2.8509, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.6297185929648243e-05, | |
| "loss": 2.8037, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.62670351758794e-05, | |
| "loss": 2.8414, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.6236884422110554e-05, | |
| "loss": 2.8386, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.620673366834171e-05, | |
| "loss": 2.8152, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.6176582914572868e-05, | |
| "loss": 2.7805, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.6146432160804023e-05, | |
| "loss": 2.7591, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.6116281407035175e-05, | |
| "loss": 2.7748, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.608613065326633e-05, | |
| "loss": 2.773, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.605597989949749e-05, | |
| "loss": 2.7776, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.602613065326633e-05, | |
| "loss": 2.752, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.599597989949749e-05, | |
| "loss": 2.7383, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.5965829145728645e-05, | |
| "loss": 2.7375, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.59356783919598e-05, | |
| "loss": 2.7502, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_bleu": 76.79295767834411, | |
| "eval_gen_len": 16.252, | |
| "eval_loss": 2.6775336265563965, | |
| "eval_meteor": 0.890923129283697, | |
| "eval_rouge1": 92.9876, | |
| "eval_rouge2": 88.6491, | |
| "eval_rougeL": 91.2913, | |
| "eval_rougeLsum": 91.2855, | |
| "eval_runtime": 25.3646, | |
| "eval_samples_per_second": 19.712, | |
| "eval_steps_per_second": 2.484, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.5905527638190955e-05, | |
| "loss": 2.7346, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.5875376884422114e-05, | |
| "loss": 2.7492, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.584522613065327e-05, | |
| "loss": 2.7273, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.5815075376884424e-05, | |
| "loss": 2.7279, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.5784924623115576e-05, | |
| "loss": 2.7241, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.575477386934673e-05, | |
| "loss": 2.7457, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.572462311557789e-05, | |
| "loss": 2.7347, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.5694472361809045e-05, | |
| "loss": 2.7168, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.56643216080402e-05, | |
| "loss": 2.7086, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.5634170854271356e-05, | |
| "loss": 2.7265, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.5604020100502515e-05, | |
| "loss": 2.7228, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.557386934673367e-05, | |
| "loss": 2.7089, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.5543718592964825e-05, | |
| "loss": 2.6962, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 2.551356783919598e-05, | |
| "loss": 2.7067, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 2.548341708542714e-05, | |
| "loss": 2.7016, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 2.545326633165829e-05, | |
| "loss": 2.6746, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 2.5423115577889446e-05, | |
| "loss": 2.6954, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.53929648241206e-05, | |
| "loss": 2.6972, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.536281407035176e-05, | |
| "loss": 2.6871, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.5332663316582915e-05, | |
| "loss": 2.7134, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_bleu": 78.02775295125716, | |
| "eval_gen_len": 16.242, | |
| "eval_loss": 2.631150484085083, | |
| "eval_meteor": 0.8935096956153317, | |
| "eval_rouge1": 93.4121, | |
| "eval_rouge2": 89.2356, | |
| "eval_rougeL": 91.593, | |
| "eval_rougeLsum": 91.6198, | |
| "eval_runtime": 25.3474, | |
| "eval_samples_per_second": 19.726, | |
| "eval_steps_per_second": 2.485, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.530251256281407e-05, | |
| "loss": 2.7203, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.5272361809045226e-05, | |
| "loss": 2.7245, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.5242211055276385e-05, | |
| "loss": 2.7023, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.521206030150754e-05, | |
| "loss": 2.7224, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.5181909547738695e-05, | |
| "loss": 2.6802, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.515175879396985e-05, | |
| "loss": 2.6996, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.5121608040201006e-05, | |
| "loss": 2.681, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.509145728643216e-05, | |
| "loss": 2.6895, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.5061306532663316e-05, | |
| "loss": 2.698, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.503115577889447e-05, | |
| "loss": 2.6802, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.5001005025125627e-05, | |
| "loss": 2.6914, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.4970854271356785e-05, | |
| "loss": 2.7011, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.494070351758794e-05, | |
| "loss": 2.6659, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.4910552763819096e-05, | |
| "loss": 2.6905, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.488040201005025e-05, | |
| "loss": 2.6557, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.485025125628141e-05, | |
| "loss": 2.6648, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.4820100502512565e-05, | |
| "loss": 2.6954, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.478994974874372e-05, | |
| "loss": 2.682, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.4759798994974872e-05, | |
| "loss": 2.6767, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.472964824120603e-05, | |
| "loss": 2.6865, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_bleu": 78.46041201212769, | |
| "eval_gen_len": 16.228, | |
| "eval_loss": 2.584897756576538, | |
| "eval_meteor": 0.8947853545083831, | |
| "eval_rouge1": 93.4999, | |
| "eval_rouge2": 89.2925, | |
| "eval_rougeL": 91.7008, | |
| "eval_rougeLsum": 91.7289, | |
| "eval_runtime": 24.8047, | |
| "eval_samples_per_second": 20.157, | |
| "eval_steps_per_second": 2.54, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.4699497487437186e-05, | |
| "loss": 2.6607, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.466934673366834e-05, | |
| "loss": 2.6692, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.4639195979899497e-05, | |
| "loss": 2.6633, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.4609045226130655e-05, | |
| "loss": 2.6655, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.457889447236181e-05, | |
| "loss": 2.6623, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.4548743718592966e-05, | |
| "loss": 2.6679, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.451859296482412e-05, | |
| "loss": 2.674, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.448844221105528e-05, | |
| "loss": 2.6719, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.4458291457286435e-05, | |
| "loss": 2.6832, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.4428140703517587e-05, | |
| "loss": 2.6776, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.4397989949748742e-05, | |
| "loss": 2.6682, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.4367839195979898e-05, | |
| "loss": 2.6956, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.4337688442211056e-05, | |
| "loss": 2.6586, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.430753768844221e-05, | |
| "loss": 2.6666, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.4277386934673367e-05, | |
| "loss": 2.6663, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.4247236180904522e-05, | |
| "loss": 2.6638, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.421708542713568e-05, | |
| "loss": 2.6112, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.4186934673366836e-05, | |
| "loss": 1.9824, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.415708542713568e-05, | |
| "loss": 1.7259, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 2.4126934673366836e-05, | |
| "loss": 2.2838, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_bleu": 77.92153227866292, | |
| "eval_gen_len": 16.232, | |
| "eval_loss": 2.501798629760742, | |
| "eval_meteor": 0.8944690987739373, | |
| "eval_rouge1": 93.4893, | |
| "eval_rouge2": 89.2497, | |
| "eval_rougeL": 91.4623, | |
| "eval_rougeLsum": 91.51, | |
| "eval_runtime": 25.1905, | |
| "eval_samples_per_second": 19.849, | |
| "eval_steps_per_second": 2.501, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 2.409678391959799e-05, | |
| "loss": 2.6248, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 2.4066633165829144e-05, | |
| "loss": 2.6643, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 2.4036482412060302e-05, | |
| "loss": 2.642, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 2.4006331658291458e-05, | |
| "loss": 2.67, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 2.3976180904522613e-05, | |
| "loss": 2.6563, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 2.3946030150753768e-05, | |
| "loss": 2.6873, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 2.3915879396984927e-05, | |
| "loss": 2.65, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 2.3885728643216082e-05, | |
| "loss": 2.6635, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 2.3855577889447237e-05, | |
| "loss": 2.6443, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.3825427135678393e-05, | |
| "loss": 2.6504, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.379527638190955e-05, | |
| "loss": 2.6425, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.3765125628140703e-05, | |
| "loss": 2.6774, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.373497487437186e-05, | |
| "loss": 2.6575, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.3704824120603014e-05, | |
| "loss": 2.6542, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.3674673366834172e-05, | |
| "loss": 2.6508, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.3644522613065328e-05, | |
| "loss": 2.6648, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 2.3614673366834173e-05, | |
| "loss": 2.6623, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 2.3584522613065328e-05, | |
| "loss": 2.66, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 2.3554371859296483e-05, | |
| "loss": 2.6568, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.352422110552764e-05, | |
| "loss": 2.6591, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_bleu": 79.49958192973479, | |
| "eval_gen_len": 16.234, | |
| "eval_loss": 2.5640199184417725, | |
| "eval_meteor": 0.9025279208048376, | |
| "eval_rouge1": 94.1823, | |
| "eval_rouge2": 90.5552, | |
| "eval_rougeL": 92.428, | |
| "eval_rougeLsum": 92.4351, | |
| "eval_runtime": 25.4062, | |
| "eval_samples_per_second": 19.68, | |
| "eval_steps_per_second": 2.48, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.3494070351758794e-05, | |
| "loss": 2.6622, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.3463919597989953e-05, | |
| "loss": 2.6638, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.3433768844221108e-05, | |
| "loss": 2.6521, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.340361809045226e-05, | |
| "loss": 2.657, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.3373467336683415e-05, | |
| "loss": 2.632, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.3343316582914574e-05, | |
| "loss": 2.6544, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.331316582914573e-05, | |
| "loss": 2.6429, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.3283015075376884e-05, | |
| "loss": 2.6534, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.325286432160804e-05, | |
| "loss": 2.6827, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.3222713567839198e-05, | |
| "loss": 2.6506, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.3192562814070353e-05, | |
| "loss": 2.6396, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.316241206030151e-05, | |
| "loss": 2.6777, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.3132261306532664e-05, | |
| "loss": 2.6548, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.3102110552763823e-05, | |
| "loss": 2.6735, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.3071959798994974e-05, | |
| "loss": 2.6713, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.304180904522613e-05, | |
| "loss": 2.6752, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.3011658291457285e-05, | |
| "loss": 2.6533, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.2981507537688444e-05, | |
| "loss": 2.6623, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.29513567839196e-05, | |
| "loss": 2.6596, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.2921206030150754e-05, | |
| "loss": 2.662, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_bleu": 79.32255832423239, | |
| "eval_gen_len": 16.256, | |
| "eval_loss": 2.555393695831299, | |
| "eval_meteor": 0.9010940538998614, | |
| "eval_rouge1": 94.0079, | |
| "eval_rouge2": 90.2547, | |
| "eval_rougeL": 92.1916, | |
| "eval_rougeLsum": 92.2075, | |
| "eval_runtime": 25.4585, | |
| "eval_samples_per_second": 19.64, | |
| "eval_steps_per_second": 2.475, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.289105527638191e-05, | |
| "loss": 2.6431, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.2860904522613068e-05, | |
| "loss": 2.6577, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.2830753768844223e-05, | |
| "loss": 2.6452, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.280060301507538e-05, | |
| "loss": 2.6465, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.2770452261306534e-05, | |
| "loss": 2.6566, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.2740301507537686e-05, | |
| "loss": 2.6632, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 2.2710150753768844e-05, | |
| "loss": 2.6179, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 2.268e-05, | |
| "loss": 2.6355, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.2649849246231155e-05, | |
| "loss": 2.656, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.261969849246231e-05, | |
| "loss": 2.6485, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.258954773869347e-05, | |
| "loss": 2.6834, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.2559396984924624e-05, | |
| "loss": 2.6586, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.252924623115578e-05, | |
| "loss": 2.6245, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 2.2499095477386935e-05, | |
| "loss": 2.6439, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 2.2468944723618093e-05, | |
| "loss": 2.657, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.243879396984925e-05, | |
| "loss": 2.651, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.2408643216080404e-05, | |
| "loss": 2.5159, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.2378492462311556e-05, | |
| "loss": 1.8454, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.2348341708542714e-05, | |
| "loss": 1.7432, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.2318793969849246e-05, | |
| "loss": 1.8874, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_bleu": 79.38237465310864, | |
| "eval_gen_len": 16.272, | |
| "eval_loss": 2.2466025352478027, | |
| "eval_meteor": 0.9008152345684776, | |
| "eval_rouge1": 94.0369, | |
| "eval_rouge2": 90.1224, | |
| "eval_rougeL": 91.9066, | |
| "eval_rougeLsum": 91.944, | |
| "eval_runtime": 24.8009, | |
| "eval_samples_per_second": 20.161, | |
| "eval_steps_per_second": 2.54, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 2.22886432160804e-05, | |
| "loss": 2.5847, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 2.2258492462311557e-05, | |
| "loss": 2.623, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 2.2228341708542716e-05, | |
| "loss": 2.665, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.219819095477387e-05, | |
| "loss": 2.647, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.2168040201005026e-05, | |
| "loss": 2.6653, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.213788944723618e-05, | |
| "loss": 2.6546, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.210773869346734e-05, | |
| "loss": 2.6471, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.2077587939698495e-05, | |
| "loss": 2.6871, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 2.2047437185929647e-05, | |
| "loss": 2.6601, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 2.2017286432160802e-05, | |
| "loss": 2.6541, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 2.198713567839196e-05, | |
| "loss": 2.6255, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 2.1956984924623116e-05, | |
| "loss": 2.6765, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 2.192683417085427e-05, | |
| "loss": 2.6448, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 2.1896683417085427e-05, | |
| "loss": 2.6667, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 2.1866532663316582e-05, | |
| "loss": 2.6544, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 2.183638190954774e-05, | |
| "loss": 2.6492, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 2.1806231155778896e-05, | |
| "loss": 2.6541, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 2.177608040201005e-05, | |
| "loss": 2.6601, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.1745929648241207e-05, | |
| "loss": 2.653, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.1715778894472362e-05, | |
| "loss": 2.6527, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_bleu": 80.15532470386316, | |
| "eval_gen_len": 16.238, | |
| "eval_loss": 2.573094129562378, | |
| "eval_meteor": 0.9042327142167804, | |
| "eval_rouge1": 94.281, | |
| "eval_rouge2": 90.8096, | |
| "eval_rougeL": 92.559, | |
| "eval_rougeLsum": 92.5681, | |
| "eval_runtime": 25.5646, | |
| "eval_samples_per_second": 19.558, | |
| "eval_steps_per_second": 2.464, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.1685628140703517e-05, | |
| "loss": 2.6359, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.1655477386934672e-05, | |
| "loss": 2.6562, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 2.1625326633165828e-05, | |
| "loss": 2.6518, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 2.1595175879396986e-05, | |
| "loss": 2.6403, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 2.156502512562814e-05, | |
| "loss": 2.658, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 2.1534874371859297e-05, | |
| "loss": 2.6583, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 2.1504723618090452e-05, | |
| "loss": 2.6719, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.147457286432161e-05, | |
| "loss": 2.6451, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.1444422110552766e-05, | |
| "loss": 2.6701, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.141427135678392e-05, | |
| "loss": 2.6666, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 2.1384120603015073e-05, | |
| "loss": 2.6536, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 2.1353969849246232e-05, | |
| "loss": 2.6423, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.1323819095477387e-05, | |
| "loss": 2.6586, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.1293668341708542e-05, | |
| "loss": 2.6554, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.1263517587939698e-05, | |
| "loss": 2.6623, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.1233366834170856e-05, | |
| "loss": 2.6749, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.120321608040201e-05, | |
| "loss": 2.6446, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.1173065326633167e-05, | |
| "loss": 2.6764, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.1142914572864322e-05, | |
| "loss": 2.6522, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.1112763819095477e-05, | |
| "loss": 2.6618, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_bleu": 79.74215483171986, | |
| "eval_gen_len": 16.236, | |
| "eval_loss": 2.4326839447021484, | |
| "eval_meteor": 0.9044505502074225, | |
| "eval_rouge1": 94.3369, | |
| "eval_rouge2": 90.6586, | |
| "eval_rougeL": 92.1744, | |
| "eval_rougeLsum": 92.1895, | |
| "eval_runtime": 25.4582, | |
| "eval_samples_per_second": 19.64, | |
| "eval_steps_per_second": 2.475, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.1082613065326636e-05, | |
| "loss": 1.977, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.105246231155779e-05, | |
| "loss": 1.731, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.1022311557788943e-05, | |
| "loss": 1.7164, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.09921608040201e-05, | |
| "loss": 1.6936, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.0962010050251257e-05, | |
| "loss": 1.6888, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.0931859296482412e-05, | |
| "loss": 1.7005, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.0901708542713568e-05, | |
| "loss": 1.6815, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.0871557788944723e-05, | |
| "loss": 1.6894, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.0841708542713568e-05, | |
| "loss": 1.6858, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.0811557788944723e-05, | |
| "loss": 2.3491, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.0781407035175882e-05, | |
| "loss": 2.6593, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.0751256281407037e-05, | |
| "loss": 2.6468, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.0721407035175883e-05, | |
| "loss": 2.6608, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.069155778894472e-05, | |
| "loss": 2.6587, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.066140703517588e-05, | |
| "loss": 2.6772, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.0631256281407035e-05, | |
| "loss": 2.6861, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.060140703517588e-05, | |
| "loss": 2.6909, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.0571256281407036e-05, | |
| "loss": 2.7178, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.054110552763819e-05, | |
| "loss": 2.7448, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.0510954773869346e-05, | |
| "loss": 2.7368, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_bleu": 79.88380574425997, | |
| "eval_gen_len": 16.228, | |
| "eval_loss": 2.729801893234253, | |
| "eval_meteor": 0.9013053514191987, | |
| "eval_rouge1": 94.1293, | |
| "eval_rouge2": 90.4052, | |
| "eval_rougeL": 92.4025, | |
| "eval_rougeLsum": 92.4078, | |
| "eval_runtime": 25.6909, | |
| "eval_samples_per_second": 19.462, | |
| "eval_steps_per_second": 2.452, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.048110552763819e-05, | |
| "loss": 2.7485, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.0450954773869347e-05, | |
| "loss": 2.7678, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.0420804020100506e-05, | |
| "loss": 2.7518, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.039065326633166e-05, | |
| "loss": 2.7449, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.0360502512562813e-05, | |
| "loss": 2.7359, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.0330351758793968e-05, | |
| "loss": 2.7379, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.0300201005025127e-05, | |
| "loss": 2.743, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.0270050251256282e-05, | |
| "loss": 2.73, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.0240201005025127e-05, | |
| "loss": 2.7259, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.0210050251256282e-05, | |
| "loss": 2.6952, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.0179899497487438e-05, | |
| "loss": 2.6952, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.0149748743718593e-05, | |
| "loss": 2.6855, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.011959798994975e-05, | |
| "loss": 2.7112, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.0089447236180907e-05, | |
| "loss": 2.6988, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.0059296482412062e-05, | |
| "loss": 2.6994, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.0029145728643214e-05, | |
| "loss": 2.6774, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.9998994974874373e-05, | |
| "loss": 2.6856, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.9968844221105528e-05, | |
| "loss": 2.6875, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.9938693467336683e-05, | |
| "loss": 2.6905, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.990854271356784e-05, | |
| "loss": 2.6858, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_bleu": 80.43692634867155, | |
| "eval_gen_len": 16.234, | |
| "eval_loss": 2.6981565952301025, | |
| "eval_meteor": 0.9051906006959359, | |
| "eval_rouge1": 94.4532, | |
| "eval_rouge2": 91.0106, | |
| "eval_rougeL": 92.3314, | |
| "eval_rougeLsum": 92.3438, | |
| "eval_runtime": 24.9557, | |
| "eval_samples_per_second": 20.035, | |
| "eval_steps_per_second": 2.524, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.9878391959798994e-05, | |
| "loss": 2.6896, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.9848241206030152e-05, | |
| "loss": 2.688, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.9818090452261308e-05, | |
| "loss": 2.6817, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.9787939698492463e-05, | |
| "loss": 2.6833, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.9758090452261308e-05, | |
| "loss": 2.6626, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.9727939698492464e-05, | |
| "loss": 2.6504, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.9697788944723615e-05, | |
| "loss": 2.6603, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.9667638190954774e-05, | |
| "loss": 2.6381, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.963748743718593e-05, | |
| "loss": 2.6358, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.9607336683417085e-05, | |
| "loss": 2.6053, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.957718592964824e-05, | |
| "loss": 2.6376, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.95470351758794e-05, | |
| "loss": 2.619, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.9516884422110554e-05, | |
| "loss": 2.6132, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.948673366834171e-05, | |
| "loss": 2.6241, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.9456582914572864e-05, | |
| "loss": 2.6128, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.9426432160804023e-05, | |
| "loss": 2.6169, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.9396281407035178e-05, | |
| "loss": 2.6206, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.936613065326633e-05, | |
| "loss": 2.5968, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.9335979899497485e-05, | |
| "loss": 2.6079, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.9305829145728644e-05, | |
| "loss": 2.6236, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_bleu": 80.05686051541119, | |
| "eval_gen_len": 16.246, | |
| "eval_loss": 2.579113245010376, | |
| "eval_meteor": 0.9032701849367213, | |
| "eval_rouge1": 94.3329, | |
| "eval_rouge2": 90.4972, | |
| "eval_rougeL": 92.1838, | |
| "eval_rougeLsum": 92.1803, | |
| "eval_runtime": 25.6081, | |
| "eval_samples_per_second": 19.525, | |
| "eval_steps_per_second": 2.46, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.92756783919598e-05, | |
| "loss": 2.6189, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.9245527638190955e-05, | |
| "loss": 2.6045, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.921537688442211e-05, | |
| "loss": 2.6162, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.918522613065327e-05, | |
| "loss": 2.6158, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.9155075376884424e-05, | |
| "loss": 2.6142, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.912492462311558e-05, | |
| "loss": 2.6383, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.9095075376884424e-05, | |
| "loss": 2.6594, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.906522613065327e-05, | |
| "loss": 2.6318, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.9035075376884425e-05, | |
| "loss": 2.6514, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.9004924623115577e-05, | |
| "loss": 2.6613, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.8974773869346732e-05, | |
| "loss": 2.6357, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.894462311557789e-05, | |
| "loss": 2.6453, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.8914773869346733e-05, | |
| "loss": 2.6371, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.888462311557789e-05, | |
| "loss": 2.6155, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.8854773869346733e-05, | |
| "loss": 2.6097, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.882522613065327e-05, | |
| "loss": 2.6346, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.8795075376884424e-05, | |
| "loss": 2.6374, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.876492462311558e-05, | |
| "loss": 2.6266, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.8734773869346734e-05, | |
| "loss": 2.6392, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.870462311557789e-05, | |
| "loss": 2.6279, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_bleu": 80.26404550464542, | |
| "eval_gen_len": 16.212, | |
| "eval_loss": 2.613689422607422, | |
| "eval_meteor": 0.9019056042294404, | |
| "eval_rouge1": 94.1522, | |
| "eval_rouge2": 90.7421, | |
| "eval_rougeL": 92.4197, | |
| "eval_rougeLsum": 92.443, | |
| "eval_runtime": 25.3778, | |
| "eval_samples_per_second": 19.702, | |
| "eval_steps_per_second": 2.482, | |
| "step": 38000 | |
| } | |
| ], | |
| "max_steps": 100000, | |
| "num_train_epochs": 5, | |
| "total_flos": 1.8512320510138778e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |