from pathlib import Path from leaderboard_tab import ( create_leaderboard_tab, search_leaderboard, update_columns_to_show, ) from utils import load_json_results # Constants RETRIEVAL_ABOUT_SECTION = """ ## About Retrieval Evaluation The retrieval evaluation assesses a model's ability to find and retrieve relevant documents from large Arabic text corpora. Models are evaluated across **three diverse datasets** to ensure robust performance across different domains and dialects. ### Evaluation Datasets | Dataset | Description | |---------|-------------| | **Web Search Dataset** | Real-world Arabic web search queries with relevant documents | | **Islamic Knowledge Dataset** | Religious and scholarly Arabic content retrieval | | **Arabic Dialect Dataset** | Dialectal Arabic QA pairs covering regional variations | ### Metrics | Metric | Description | Range | |--------|-------------|-------| | **MRR** (Mean Reciprocal Rank) | Measures where the first relevant document appears in the ranking. Higher is better. | 0-100 | | **nDCG** (Normalized DCG) | Evaluates overall ranking quality, rewarding relevant documents appearing earlier. | 0-100 | | **Recall@5** | Percentage of queries where the relevant document is in the top 5 results. | 0-100 | | **Overall Score** | Average of MRR, nDCG, and Recall@5 across all datasets. | 0-100 | ### Model Requirements 1. **Library Compatibility**: Must work with `sentence-transformers` library 2. **Arabic Support**: Must produce meaningful embeddings for Arabic text 3. **Public Access**: Model must be publicly available on HuggingFace Hub 4. **Output Format**: Fixed-dimension dense embeddings > **Special Model Support**: Models like `jinaai/jina-embeddings-v4` that require `task` and `prompt_name` parameters are automatically detected and handled. ### How to Submit Your Model 1. **Ensure your model is public** on [HuggingFace Hub](https://huggingface.co) 2. **Verify compatibility** by testing locally: ```python from sentence_transformers import SentenceTransformer model = SentenceTransformer("your-model-name", trust_remote_code=True) embeddings = model.encode(["مرحبا بالعالم", "كيف حالك؟"]) print(embeddings.shape) # Should output (2, embedding_dim) ``` 3. **Submit via the submit tab** on the second tab up there. Happy retrieving! 🥳 """ # Global variables retrieval_df = None def load_retrieval_leaderboard(): """Load and prepare the retrieval leaderboard data""" global retrieval_df # Prepare retrieval dataframe dataframe_path = Path(__file__).parent / "results" / "retrieval_results.json" retrieval_df = load_json_results( dataframe_path, True, "Average Score", drop_cols=["Revision", "Task"] ) retrieval_df.insert(0, "Rank", range(1, 1 + len(retrieval_df))) return retrieval_df def retrieval_search_leaderboard(model_name, columns_to_show): """Search function for retrieval leaderboard""" return search_leaderboard(retrieval_df, model_name, columns_to_show) def update_retrieval_columns_to_show(columns_to_show): """Update displayed columns for retrieval leaderboard""" return update_columns_to_show(retrieval_df, columns_to_show) def create_retrieval_tab(): """Create the complete retrieval leaderboard tab""" global retrieval_df # Load data if not already loaded if retrieval_df is None: retrieval_df = load_retrieval_leaderboard() # Define default columns to show default_columns = [ "Rank", "Model", "Average Score", "Model Size (MB)", "Context Length", "Embedding Dimension", "Web Search Dataset", "Islamic Knowledge Dataset", "Arabic Dialect Dataset", ] # Create and return the tab return create_leaderboard_tab( df=retrieval_df, initial_columns_to_show=default_columns, search_function=retrieval_search_leaderboard, update_function=update_retrieval_columns_to_show, about_section=RETRIEVAL_ABOUT_SECTION, task_type="Retriever", )