| - 0 babi_nli/counting | |
| - 1 babi_nli/indefinite-knowledge | |
| - 2 babi_nli/simple-negation | |
| - 3 babi_nli/three-arg-relations | |
| - 4 babi_nli/basic-induction | |
| - 5 babi_nli/time-reasoning | |
| - 6 babi_nli/compound-coreference | |
| - 7 babi_nli/path-finding | |
| - 8 babi_nli/positional-reasoning | |
| - 9 babi_nli/conjunction | |
| - 10 babi_nli/size-reasoning | |
| - 11 babi_nli/yes-no-questions | |
| - 12 babi_nli/basic-coreference | |
| - 13 babi_nli/two-supporting-facts | |
| - 14 babi_nli/lists-sets | |
| - 15 babi_nli/two-arg-relations | |
| - 16 babi_nli/three-supporting-facts | |
| - 17 babi_nli/basic-deduction | |
| - 18 babi_nli/single-supporting-fact | |
| - 19 anli/a1 | |
| - 20 anli/a2 | |
| - 21 anli/a3 | |
| - 22 sick/label | |
| - 23 sick/relatedness | |
| - 24 sick/entailment_AB | |
| - 25 sick/entailment_BA | |
| - 26 snli | |
| - 27 scitail/snli_format | |
| - 28 hans | |
| - 29 WANLI | |
| - 30 recast/recast_kg_relations | |
| - 31 recast/recast_puns | |
| - 32 recast/recast_factuality | |
| - 33 recast/recast_megaveridicality | |
| - 34 recast/recast_verbcorner | |
| - 35 recast/recast_verbnet | |
| - 36 recast/recast_ner | |
| - 37 recast/recast_sentiment | |
| - 38 probability_words_nli/usnli | |
| - 39 probability_words_nli/reasoning_1hop | |
| - 40 probability_words_nli/reasoning_2hop | |
| - 41 nan-nli/joey234--nan-nli | |
| - 42 nli_fever | |
| - 43 breaking_nli | |
| - 44 conj_nli | |
| - 45 fracas | |
| - 46 dialogue_nli | |
| - 47 mpe | |
| - 48 dnc | |
| - 49 gpt3_nli | |
| - 50 recast_white/fnplus | |
| - 51 recast_white/sprl | |
| - 52 recast_white/dpr | |
| - 53 joci | |
| - 54 contrast_nli | |
| - 55 robust_nli/IS_CS | |
| - 56 robust_nli/LI_LI | |
| - 57 robust_nli/ST_WO | |
| - 58 robust_nli/PI_SP | |
| - 59 robust_nli/PI_CD | |
| - 60 robust_nli/ST_SE | |
| - 61 robust_nli/ST_NE | |
| - 62 robust_nli/ST_LM | |
| - 63 robust_nli_is_sd | |
| - 64 robust_nli_li_ts | |
| - 65 gen_debiased_nli/snli_seq_z | |
| - 66 gen_debiased_nli/snli_z_aug | |
| - 67 gen_debiased_nli/snli_par_z | |
| - 68 gen_debiased_nli/mnli_par_z | |
| - 69 gen_debiased_nli/mnli_z_aug | |
| - 70 gen_debiased_nli/mnli_seq_z | |
| - 71 add_one_rte | |
| - 72 imppres/presupposition_cleft_uniqueness/presupposition | |
| - 73 imppres/presupposition_possessed_definites_uniqueness/presupposition | |
| - 74 imppres/presupposition_possessed_definites_existence/presupposition | |
| - 75 imppres/presupposition_only_presupposition/presupposition | |
| - 76 imppres/presupposition_all_n_presupposition/presupposition | |
| - 77 imppres/presupposition_both_presupposition/presupposition | |
| - 78 imppres/presupposition_change_of_state/presupposition | |
| - 79 imppres/presupposition_cleft_existence/presupposition | |
| - 80 imppres/presupposition_question_presupposition/presupposition | |
| - 81 imppres/implicature_modals/prag | |
| - 82 imppres/implicature_numerals_10_100/prag | |
| - 83 imppres/implicature_numerals_2_3/prag | |
| - 84 imppres/implicature_gradable_adjective/prag | |
| - 85 imppres/implicature_quantifiers/prag | |
| - 86 imppres/implicature_gradable_verb/prag | |
| - 87 imppres/implicature_connectives/prag | |
| - 88 imppres/implicature_gradable_adjective/log | |
| - 89 imppres/implicature_gradable_verb/log | |
| - 90 imppres/implicature_numerals_2_3/log | |
| - 91 imppres/implicature_numerals_10_100/log | |
| - 92 imppres/implicature_modals/log | |
| - 93 imppres/implicature_quantifiers/log | |
| - 94 imppres/implicature_connectives/log | |
| - 95 glue_diagnostics/diagnostics | |
| - 96 hlgd | |
| - 97 paws/labeled_final | |
| - 98 paws/labeled_swap | |
| - 99 quora | |
| - 100 medical_questions_pairs | |
| - 101 conll2003/pos_tags | |
| - 102 conll2003/chunk_tags | |
| - 103 conll2003/ner_tags | |
| - 104 hh-rlhf | |
| - 105 model-written-evals | |
| - 106 truthful_qa/multiple_choice | |
| - 107 fig-qa | |
| - 108 bigbench/fantasy_reasoning | |
| - 109 bigbench/nonsense_words_grammar | |
| - 110 bigbench/analytic_entailment | |
| - 111 bigbench/logic_grid_puzzle | |
| - 112 bigbench/geometric_shapes | |
| - 113 bigbench/key_value_maps | |
| - 114 bigbench/analogical_similarity | |
| - 115 bigbench/metaphor_understanding | |
| - 116 bigbench/metaphor_boolean | |
| - 117 bigbench/ruin_names | |
| - 118 bigbench/cs_algorithms | |
| - 119 bigbench/physical_intuition | |
| - 120 bigbench/mnist_ascii | |
| - 121 bigbench/moral_permissibility | |
| - 122 bigbench/emoji_movie | |
| - 123 bigbench/snarks | |
| - 124 bigbench/timedial | |
| - 125 bigbench/dark_humor_detection | |
| - 126 bigbench/gre_reading_comprehension | |
| - 127 bigbench/empirical_judgments | |
| - 128 bigbench/causal_judgment | |
| - 129 bigbench/fact_checker | |
| - 130 bigbench/logical_fallacy_detection | |
| - 131 bigbench/identify_math_theorems | |
| - 132 bigbench/dyck_languages | |
| - 133 bigbench/winowhy | |
| - 134 bigbench/logical_sequence | |
| - 135 bigbench/strategyqa | |
| - 136 bigbench/unit_interpretation | |
| - 137 bigbench/authorship_verification | |
| - 138 bigbench/undo_permutation | |
| - 139 bigbench/epistemic_reasoning | |
| - 140 bigbench/human_organs_senses | |
| - 141 bigbench/misconceptions | |
| - 142 bigbench/international_phonetic_alphabet_nli | |
| - 143 bigbench/identify_odd_metaphor | |
| - 144 bigbench/mathematical_induction | |
| - 145 bigbench/odd_one_out | |
| - 146 bigbench/reasoning_about_colored_objects | |
| - 147 bigbench/strange_stories | |
| - 148 bigbench/evaluating_information_essentiality | |
| - 149 bigbench/figure_of_speech_detection | |
| - 150 bigbench/english_proverbs | |
| - 151 bigbench/general_knowledge | |
| - 152 bigbench/tracking_shuffled_objects | |
| - 153 bigbench/physics | |
| - 154 bigbench/anachronisms | |
| - 155 bigbench/simple_ethical_questions | |
| - 156 bigbench/logical_args | |
| - 157 bigbench/suicide_risk | |
| - 158 bigbench/sentence_ambiguity | |
| - 159 bigbench/temporal_sequences | |
| - 160 bigbench/penguins_in_a_table | |
| - 161 bigbench/sports_understanding | |
| - 162 bigbench/hyperbaton | |
| - 163 bigbench/code_line_description | |
| - 164 bigbench/question_selection | |
| - 165 bigbench/disambiguation_qa | |
| - 166 bigbench/date_understanding | |
| - 167 bigbench/play_dialog_same_or_different | |
| - 168 bigbench/salient_translation_error_detection | |
| - 169 bigbench/irony_identification | |
| - 170 bigbench/emojis_emotion_prediction | |
| - 171 bigbench/hindu_knowledge | |
| - 172 bigbench/conceptual_combinations | |
| - 173 bigbench/implicatures | |
| - 174 bigbench/movie_dialog_same_or_different | |
| - 175 bigbench/social_support | |
| - 176 bigbench/presuppositions_as_nli | |
| - 177 bigbench/vitaminc_fact_verification | |
| - 178 bigbench/hhh_alignment | |
| - 179 bigbench/implicit_relations | |
| - 180 bigbench/bbq_lite_json | |
| - 181 bigbench/phrase_relatedness | |
| - 182 bigbench/logical_deduction | |
| - 183 bigbench/discourse_marker_prediction | |
| - 184 bigbench/movie_recommendation | |
| - 185 bigbench/real_or_fake_text | |
| - 186 bigbench/formal_fallacies_syllogisms_negation | |
| - 187 bigbench/crass_ai | |
| - 188 blimp/inchoative | |
| - 189 blimp/principle_A_c_command | |
| - 190 blimp/matrix_question_npi_licensor_present | |
| - 191 blimp/wh_questions_subject_gap_long_distance | |
| - 192 blimp/sentential_subject_island | |
| - 193 blimp/existential_there_quantifiers_2 | |
| - 194 blimp/sentential_negation_npi_scope | |
| - 195 blimp/complex_NP_island | |
| - 196 blimp/principle_A_reconstruction | |
| - 197 blimp/animate_subject_passive | |
| - 198 blimp/tough_vs_raising_1 | |
| - 199 blimp/wh_vs_that_with_gap | |
| - 200 blimp/principle_A_domain_2 | |
| - 201 blimp/npi_present_1 | |
| - 202 blimp/wh_vs_that_with_gap_long_distance | |
| - 203 blimp/superlative_quantifiers_1 | |
| - 204 blimp/npi_present_2 | |
| - 205 blimp/wh_questions_object_gap | |
| - 206 blimp/coordinate_structure_constraint_complex_left_branch | |
| - 207 blimp/coordinate_structure_constraint_object_extraction | |
| - 208 blimp/left_branch_island_echo_question | |
| - 209 blimp/drop_argument | |
| - 210 cos_e/v1.0 | |
| - 211 cosmos_qa | |
| - 212 dream | |
| - 213 openbookqa | |
| - 214 qasc | |
| - 215 quartz | |
| - 216 quail | |
| - 217 head_qa/en | |
| - 218 sciq | |
| - 219 social_i_qa | |
| - 220 wiki_hop | |
| - 221 wiqa | |
| - 222 piqa | |
| - 223 hellaswag | |
| - 224 super_glue/copa | |
| - 225 art | |
| - 226 hendrycks_test/moral_disputes | |
| - 227 hendrycks_test/moral_scenarios | |
| - 228 hendrycks_test/nutrition | |
| - 229 hendrycks_test/philosophy | |
| - 230 hendrycks_test/prehistory | |
| - 231 hendrycks_test/professional_accounting | |
| - 232 hendrycks_test/professional_law | |
| - 233 hendrycks_test/world_religions | |
| - 234 hendrycks_test/professional_psychology | |
| - 235 hendrycks_test/public_relations | |
| - 236 hendrycks_test/security_studies | |
| - 237 hendrycks_test/sociology | |
| - 238 hendrycks_test/us_foreign_policy | |
| - 239 hendrycks_test/virology | |
| - 240 hendrycks_test/miscellaneous | |
| - 241 hendrycks_test/professional_medicine | |
| - 242 hendrycks_test/medical_genetics | |
| - 243 hendrycks_test/college_mathematics | |
| - 244 hendrycks_test/management | |
| - 245 hendrycks_test/high_school_computer_science | |
| - 246 hendrycks_test/astronomy | |
| - 247 hendrycks_test/high_school_chemistry | |
| - 248 hendrycks_test/high_school_biology | |
| - 249 hendrycks_test/global_facts | |
| - 250 hendrycks_test/formal_logic | |
| - 251 hendrycks_test/elementary_mathematics | |
| - 252 hendrycks_test/high_school_european_history | |
| - 253 hendrycks_test/electrical_engineering | |
| - 254 hendrycks_test/conceptual_physics | |
| - 255 hendrycks_test/computer_security | |
| - 256 hendrycks_test/college_physics | |
| - 257 hendrycks_test/college_medicine | |
| - 258 hendrycks_test/college_computer_science | |
| - 259 hendrycks_test/college_chemistry | |
| - 260 hendrycks_test/college_biology | |
| - 261 hendrycks_test/econometrics | |
| - 262 hendrycks_test/clinical_knowledge | |
| - 263 hendrycks_test/anatomy | |
| - 264 hendrycks_test/marketing | |
| - 265 hendrycks_test/machine_learning | |
| - 266 hendrycks_test/logical_fallacies | |
| - 267 hendrycks_test/jurisprudence | |
| - 268 hendrycks_test/international_law | |
| - 269 hendrycks_test/human_sexuality | |
| - 270 hendrycks_test/human_aging | |
| - 271 hendrycks_test/high_school_world_history | |
| - 272 hendrycks_test/abstract_algebra | |
| - 273 hendrycks_test/high_school_us_history | |
| - 274 hendrycks_test/high_school_psychology | |
| - 275 hendrycks_test/high_school_physics | |
| - 276 hendrycks_test/high_school_microeconomics | |
| - 277 hendrycks_test/high_school_mathematics | |
| - 278 hendrycks_test/high_school_macroeconomics | |
| - 279 hendrycks_test/high_school_government_and_politics | |
| - 280 hendrycks_test/high_school_geography | |
| - 281 hendrycks_test/high_school_statistics | |
| - 282 hendrycks_test/business_ethics | |
| - 283 winogrande/winogrande_xl | |
| - 284 codah/codah | |
| - 285 ai2_arc/ARC-Challenge/challenge | |
| - 286 ai2_arc/ARC-Easy/challenge | |
| - 287 definite_pronoun_resolution | |
| - 288 swag | |
| - 289 math_qa | |
| - 290 utilitarianism | |
| - 291 TuringBench | |
| - 292 trec | |
| - 293 vitaminc/tals--vitaminc | |
| - 294 hope_edi/english | |
| - 295 rumoureval_2019/RumourEval2019 | |
| - 296 ethos/binary | |
| - 297 ethos/multilabel | |
| - 298 glue/cola | |
| - 299 glue/sst2 | |
| - 300 glue/mrpc | |
| - 301 glue/qqp | |
| - 302 glue/stsb | |
| - 303 glue/mnli | |
| - 304 glue/qnli | |
| - 305 glue/rte | |
| - 306 glue/wnli | |
| - 307 super_glue/boolq | |
| - 308 super_glue/cb | |
| - 309 super_glue/multirc | |
| - 310 super_glue/wic | |
| - 311 super_glue/axg | |
| - 312 tweet_eval/stance_feminist | |
| - 313 tweet_eval/stance_atheism | |
| - 314 tweet_eval/stance_hillary | |
| - 315 tweet_eval/stance_abortion | |
| - 316 tweet_eval/sentiment | |
| - 317 tweet_eval/offensive | |
| - 318 tweet_eval/stance_climate | |
| - 319 tweet_eval/irony | |
| - 320 tweet_eval/emotion | |
| - 321 tweet_eval/emoji | |
| - 322 tweet_eval/hate | |
| - 323 discovery/discovery | |
| - 324 pragmeval/switchboard | |
| - 325 pragmeval/squinky-informativeness | |
| - 326 pragmeval/emobank-arousal | |
| - 327 pragmeval/emobank-dominance | |
| - 328 pragmeval/emobank-valence | |
| - 329 pragmeval/mrda | |
| - 330 pragmeval/verifiability | |
| - 331 pragmeval/squinky-implicature | |
| - 332 pragmeval/squinky-formality | |
| - 333 pragmeval/gum | |
| - 334 pragmeval/emergent | |
| - 335 pragmeval/persuasiveness-premisetype | |
| - 336 pragmeval/pdtb | |
| - 337 pragmeval/persuasiveness-eloquence | |
| - 338 pragmeval/persuasiveness-specificity | |
| - 339 pragmeval/persuasiveness-strength | |
| - 340 pragmeval/sarcasm | |
| - 341 pragmeval/stac | |
| - 342 pragmeval/persuasiveness-claimtype | |
| - 343 pragmeval/persuasiveness-relevance | |
| - 344 lex_glue/eurlex | |
| - 345 lex_glue/scotus | |
| - 346 lex_glue/ledgar | |
| - 347 lex_glue/unfair_tos | |
| - 348 lex_glue/case_hold | |
| - 349 imdb | |
| - 350 rotten_tomatoes | |
| - 351 ag_news | |
| - 352 yelp_review_full/yelp_review_full | |
| - 353 financial_phrasebank/sentences_allagree | |
| - 354 poem_sentiment | |
| - 355 dbpedia_14/dbpedia_14 | |
| - 356 amazon_polarity/amazon_polarity | |
| - 357 app_reviews | |
| - 358 hate_speech18 | |
| - 359 sms_spam | |
| - 360 humicroedit/subtask-1 | |
| - 361 humicroedit/subtask-2 | |
| - 362 snips_built_in_intents | |
| - 363 banking77 | |
| - 364 hate_speech_offensive | |
| - 365 hyperpartisan_news_detection/byarticle | |
| - 366 hyperpartisan_news_detection/bypublisher | |
| - 367 go_emotions/simplified | |
| - 368 scicite | |
| - 369 liar | |
| - 370 lexical_relation_classification/ROOT09 | |
| - 371 lexical_relation_classification/EVALution | |
| - 372 lexical_relation_classification/CogALexV | |
| - 373 lexical_relation_classification/BLESS | |
| - 374 lexical_relation_classification/K&H+N | |
| - 375 linguisticprobing/coordination_inversion | |
| - 376 linguisticprobing/odd_man_out | |
| - 377 linguisticprobing/word_content | |
| - 378 linguisticprobing/obj_number | |
| - 379 linguisticprobing/past_present | |
| - 380 linguisticprobing/tree_depth | |
| - 381 linguisticprobing/sentence_length | |
| - 382 linguisticprobing/top_constituents | |
| - 383 linguisticprobing/bigram_shift | |
| - 384 linguisticprobing/subj_number | |
| - 385 crowdflower/sentiment_nuclear_power | |
| - 386 crowdflower/tweet_global_warming | |
| - 387 crowdflower/airline-sentiment | |
| - 388 crowdflower/economic-news | |
| - 389 crowdflower/political-media-audience | |
| - 390 crowdflower/political-media-bias | |
| - 391 crowdflower/political-media-message | |
| - 392 crowdflower/text_emotion | |
| - 393 crowdflower/corporate-messaging | |
| - 394 ethics/commonsense | |
| - 395 ethics/deontology | |
| - 396 ethics/justice | |
| - 397 ethics/virtue | |
| - 398 emo/emo2019 | |
| - 399 google_wellformed_query | |
| - 400 tweets_hate_speech_detection | |
| - 401 adv_glue/adv_sst2 | |
| - 402 adv_glue/adv_qqp | |
| - 403 adv_glue/adv_mnli | |
| - 404 adv_glue/adv_mnli_mismatched | |
| - 405 adv_glue/adv_qnli | |
| - 406 adv_glue/adv_rte | |
| - 407 has_part | |
| - 408 wnut_17/wnut_17 | |
| - 409 ncbi_disease/ncbi_disease | |
| - 410 acronym_identification | |
| - 411 jnlpba/jnlpba | |
| - 412 species_800/species_800 | |
| - 413 ontonotes_english/SpeedOfMagic--ontonotes_english | |
| - 414 blog_authorship_corpus/gender | |
| - 415 blog_authorship_corpus/age | |
| - 416 blog_authorship_corpus/horoscope | |
| - 417 blog_authorship_corpus/job | |
| - 418 open_question_type | |
| - 419 health_fact | |
| - 420 commonsense_qa | |
| - 421 mc_taco | |
| - 422 ade_corpus_v2/Ade_corpus_v2_classification | |
| - 423 discosense | |
| - 424 circa | |
| - 425 code_x_glue_cc_defect_detection | |
| - 426 code_x_glue_cc_clone_detection_big_clone_bench | |
| - 427 code_x_glue_cc_code_refinement/medium | |
| - 428 EffectiveFeedbackStudentWriting | |
| - 429 promptSentiment | |
| - 430 promptNLI | |
| - 431 promptSpoke | |
| - 432 promptProficiency | |
| - 433 promptGrammar | |
| - 434 promptCoherence | |
| - 435 phrase_similarity | |
| - 436 scientific-exaggeration-detection | |
| - 437 quarel | |
| - 438 fever-evidence-related/mwong--fever-related | |
| - 439 numer_sense | |
| - 440 dynasent/dynabench.dynasent.r1.all/r1 | |
| - 441 dynasent/dynabench.dynasent.r2.all/r2 | |
| - 442 Sarcasm_News_Headline | |
| - 443 sem_eval_2010_task_8 |