yangzhitao commited on
Commit
6c930b9
·
1 Parent(s): c842956

refactor: reorganize schema definitions and enhance data loading functions for improved clarity and maintainability

Browse files
src/about.py CHANGED
@@ -7,56 +7,11 @@ from loguru import logger
7
  from src.prepare import load_display_toml, load_meta_toml, prepare_space
8
 
9
  if typing.TYPE_CHECKING:
10
- from src.prepare import MetaToml_Benchmark
11
 
12
  prepare_space()
13
 
14
 
15
- # class _Task(BaseModel):
16
- # benchmark: Annotated[str, Field(description="The benchmark name")]
17
- # metric: Annotated[str, Field(description="The metric name")]
18
- # col_name: Annotated[str, Field(description="The column name")]
19
-
20
-
21
- # Select your tasks here
22
- # ---------------------------------------------------
23
- # class _Tasks(Enum):
24
- # # task_key in the json file, metric_key in the json file, name to display in the leaderboard
25
-
26
- # # acc
27
- # task1_1 = _Task(benchmark="MindCube", metric="acc", col_name="MindCube(acc)")
28
- # task2_1 = _Task(benchmark="MMSI", metric="acc", col_name="MMSI(acc)")
29
- # task3_1 = _Task(benchmark="Omni", metric="acc", col_name="Omni(acc)")
30
- # task4_1 = _Task(benchmark="Core", metric="acc", col_name="Core(acc)")
31
- # task5_1 = _Task(benchmark="SpatialViz", metric="acc", col_name="SpatialViz(acc)")
32
- # task6_1 = _Task(benchmark="STARE", metric="acc", col_name="STARE(acc)")
33
- # task7_1 = _Task(benchmark="SITEBench", metric="acc", col_name="SITEBench(acc)")
34
- # task8_1 = _Task(benchmark="VSI (MCQ)", metric="acc", col_name="VSI (MCQ)(acc)")
35
-
36
- # # caa
37
- # task1_2 = _Task(benchmark="MindCube", metric="caa", col_name="MindCube(caa)")
38
- # task2_2 = _Task(benchmark="MMSI", metric="caa", col_name="MMSI(caa)")
39
- # task3_2 = _Task(benchmark="Omni", metric="caa", col_name="Omni(caa)")
40
- # task4_2 = _Task(benchmark="Core", metric="caa", col_name="Core(caa)")
41
- # task5_2 = _Task(benchmark="SpatialViz", metric="caa", col_name="SpatialViz(caa)")
42
- # task6_2 = _Task(benchmark="STARE", metric="caa", col_name="STARE(caa)")
43
- # task7_2 = _Task(benchmark="SITEBench", metric="caa", col_name="SITEBench(caa)")
44
- # task8_2 = _Task(benchmark="VSI (MCQ)", metric="caa", col_name="VSI (MCQ)(caa)")
45
-
46
- # # rand
47
- # task1_3 = _Task(benchmark="MindCube", metric="rand", col_name="MindCube(rand)")
48
- # task2_3 = _Task(benchmark="MMSI", metric="rand", col_name="MMSI(rand)")
49
- # task3_3 = _Task(benchmark="Omni", metric="rand", col_name="Omni(rand)")
50
- # task4_3 = _Task(benchmark="Core", metric="rand", col_name="Core(rand)")
51
- # task5_3 = _Task(benchmark="SpatialViz", metric="rand", col_name="SpatialViz(rand)")
52
- # task6_3 = _Task(benchmark="STARE", metric="rand", col_name="STARE(rand)")
53
- # task7_3 = _Task(benchmark="SITEBench", metric="rand", col_name="SITEBench(rand)")
54
- # task8_3 = _Task(benchmark="VSI (MCQ)", metric="rand", col_name="VSI (MCQ)(rand)")
55
-
56
-
57
- # BENCHMARKS = {m.value.benchmark for m in Tasks}
58
- # METRICS = {m.value.metric for m in Tasks}
59
- # COL_NAMES = {m.value.col_name for m in Tasks}
60
  @lru_cache(maxsize=1)
61
  def get_benchmarks() -> list["MetaToml_Benchmark"]:
62
  meta_toml = load_meta_toml()
@@ -81,22 +36,22 @@ NUM_FEWSHOT = 0 # Change with your few shot
81
  TITLE = """<h1 align="center" id="space-title">EASI Leaderboard</h1>"""
82
 
83
  # What does your leaderboard evaluate?
84
- INTRODUCTION_TEXT = """
85
  **EASI: Holistic Evaluation of Multimodal LLMs on Spatial Intelligence**
86
 
87
  EASI conceptualizes a comprehensive taxonomy of spatial tasks that unifies existing benchmarks and a standardized protocol for the fair evaluation of state-of-the-art proprietary and open-source models.
88
- """
89
 
90
  # Which evaluations are you running? how can people reproduce what you have?
91
- LLM_BENCHMARKS_TEXT = """
92
  ## Leaderboard
93
 
94
  You can find the documentation of EASI here: [EvolvingLMMs-Lab/EASI](https://github.com/EvolvingLMMs-Lab/EASI).
95
 
96
  And the dataset for this leaderboard: [lmms-lab-si/EASI-Leaderboard-Data](https://huggingface.co/datasets/lmms-lab-si/EASI-Leaderboard-Data)
97
- """
98
 
99
- EVALUATION_QUEUE_TEXT = """
100
  ## Some good practices before submitting an evaluation with EASI
101
 
102
  ### 1) Make sure you can load your model and tokenizer using AutoClasses:
@@ -124,7 +79,7 @@ When we add extra information about models to the leaderboard, it will be automa
124
  If your model is displayed in the `FAILED` category, its execution stopped.
125
  Make sure you have followed the above steps first.
126
  If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
127
- """
128
 
129
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
130
  CITATION_BUTTON_TEXT = dedent("""
 
7
  from src.prepare import load_display_toml, load_meta_toml, prepare_space
8
 
9
  if typing.TYPE_CHECKING:
10
+ from src.schemas.meta_toml import MetaToml_Benchmark
11
 
12
  prepare_space()
13
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  @lru_cache(maxsize=1)
16
  def get_benchmarks() -> list["MetaToml_Benchmark"]:
17
  meta_toml = load_meta_toml()
 
36
  TITLE = """<h1 align="center" id="space-title">EASI Leaderboard</h1>"""
37
 
38
  # What does your leaderboard evaluate?
39
+ INTRODUCTION_TEXT = dedent("""
40
  **EASI: Holistic Evaluation of Multimodal LLMs on Spatial Intelligence**
41
 
42
  EASI conceptualizes a comprehensive taxonomy of spatial tasks that unifies existing benchmarks and a standardized protocol for the fair evaluation of state-of-the-art proprietary and open-source models.
43
+ """)
44
 
45
  # Which evaluations are you running? how can people reproduce what you have?
46
+ LLM_BENCHMARKS_TEXT = dedent("""
47
  ## Leaderboard
48
 
49
  You can find the documentation of EASI here: [EvolvingLMMs-Lab/EASI](https://github.com/EvolvingLMMs-Lab/EASI).
50
 
51
  And the dataset for this leaderboard: [lmms-lab-si/EASI-Leaderboard-Data](https://huggingface.co/datasets/lmms-lab-si/EASI-Leaderboard-Data)
52
+ """)
53
 
54
+ EVALUATION_QUEUE_TEXT = dedent("""
55
  ## Some good practices before submitting an evaluation with EASI
56
 
57
  ### 1) Make sure you can load your model and tokenizer using AutoClasses:
 
79
  If your model is displayed in the `FAILED` category, its execution stopped.
80
  Make sure you have followed the above steps first.
81
  If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
82
+ """)
83
 
84
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
85
  CITATION_BUTTON_TEXT = dedent("""
src/prepare.py CHANGED
@@ -1,15 +1,14 @@
1
  import os
2
  import sys
3
- from functools import cached_property, lru_cache
4
  from pathlib import Path
5
- from typing import Annotated
6
 
7
  from huggingface_hub import snapshot_download
8
  from loguru import logger
9
- from pydantic import BaseModel, ConfigDict, Field
10
- from typing_extensions import Self
11
 
12
  from src.envs import API, settings
 
 
13
 
14
  if sys.version_info < (3, 11):
15
  from tomli import load as toml_load
@@ -22,132 +21,49 @@ PREPARED_FLAG: bool = os.getenv("NO_DOWNLOAD", 0) == 1
22
  def prepare_space():
23
  """Space initialisation"""
24
 
25
- def _restart_space():
26
- API.restart_space(repo_id=settings.REPO_ID)
27
-
28
  global PREPARED_FLAG
29
  if not PREPARED_FLAG:
30
- try:
31
- snapshot_download(
32
- repo_id=settings.QUEUE_REPO_ID,
33
- local_dir=settings.EVAL_REQUESTS_PATH,
34
- repo_type="dataset",
35
- tqdm_class=None,
36
- etag_timeout=30,
37
- token=settings.HF_TOKEN.get_secret_value(),
38
- )
39
- except Exception as e:
40
- logger.error(f"Error downloading eval queue: {e!s}")
41
- _restart_space()
42
- try:
43
- snapshot_download(
44
- repo_id=settings.RESULTS_REPO_ID,
45
- local_dir=settings.EVAL_RESULTS_PATH,
46
- repo_type="dataset",
47
- tqdm_class=None,
48
- etag_timeout=30,
49
- allow_patterns=["leaderboard/*.toml", "leaderboard/**/*.json"],
50
- token=settings.HF_TOKEN.get_secret_value(),
51
- )
52
- except Exception as e:
53
- logger.error(f"Error downloading eval queue: {e!s}")
54
- _restart_space()
55
  PREPARED_FLAG = True
56
 
57
  load_meta_toml()
58
  load_display_toml()
59
 
60
 
61
- class MetaToml(BaseModel):
62
- model_config = ConfigDict(extra="allow", frozen=True)
63
-
64
- models: list["MetaToml_Model"]
65
-
66
- @cached_property
67
- def model_key_to_model(self) -> dict[str, "MetaToml_Model"]:
68
- return {model.key: model for model in self.models}
69
-
70
- @cached_property
71
- def model_title_to_model(self) -> dict[str, "MetaToml_Model"]:
72
- """Model title (lower case) to model mapping"""
73
- return {model.title.lower(): model for model in self.models}
74
-
75
- benchmarks: list["MetaToml_Benchmark"]
76
-
77
- @cached_property
78
- def benchmark_key_to_benchmark(self) -> dict[str, "MetaToml_Benchmark"]:
79
- return {benchmark.key: benchmark for benchmark in self.benchmarks}
80
-
81
- model_repos: list["MetaToml_ModelRepo"]
82
-
83
- @cached_property
84
- def model_key_to_repo(self) -> dict[str, "MetaToml_ModelRepo"]:
85
- return {repo.key: repo for repo in self.model_repos}
86
-
87
- # --- Helper properties ---
88
- @cached_property
89
- def model_title_to_repo(self) -> dict[str, "MetaToml_ModelRepo"]:
90
- """Model title (lower case) to model repo mapping"""
91
- mapping = {}
92
- for model in self.models:
93
- title = model.title.lower()
94
- key = model.key
95
- repo = self.model_key_to_repo.get(key)
96
- if repo:
97
- mapping[title] = repo
98
- return mapping
99
-
100
- @cached_property
101
- def model_title_to_key(self) -> dict[str, str]:
102
- return {model.title.lower(): model.key for model in self.models}
103
-
104
- @cached_property
105
- def benchmark_title_to_key(self) -> dict[str, str]:
106
- return {benchmark.title.lower(): benchmark.key for benchmark in self.benchmarks}
107
-
108
- @cached_property
109
- def model_key_to_repo_id(self) -> dict[str, str]:
110
- return {model.key: model.repo_id for model in self.model_repos if model.repo_id is not None}
111
-
112
-
113
- class _HashableComparableMixin(BaseModel):
114
- model_config = ConfigDict(extra="allow", frozen=True)
115
-
116
- key: str
117
- title: str
118
-
119
- def __hash__(self) -> int:
120
- return hash(self.key)
121
-
122
- def __eq__(self, other: Self) -> bool:
123
- return (self.key, self.title) == (other.key, other.title)
124
-
125
- def __lt__(self, other: Self) -> bool:
126
- return (self.key, self.title) < (other.key, other.title)
127
-
128
- def __gt__(self, other: Self) -> bool:
129
- return (self.key, self.title) > (other.key, other.title)
130
-
131
- def __le__(self, other: Self) -> bool:
132
- return (self.key, self.title) <= (other.key, other.title)
133
-
134
- def __ge__(self, other: Self) -> bool:
135
- return (self.key, self.title) >= (other.key, other.title)
136
-
137
-
138
- class MetaToml_Benchmark(_HashableComparableMixin):
139
- disabled: bool = False
140
-
141
-
142
- class MetaToml_Model(_HashableComparableMixin): ...
143
-
144
-
145
- class MetaToml_ModelRepo(BaseModel):
146
- model_config = ConfigDict(extra="allow", frozen=True)
147
-
148
- key: str
149
- repo_id: str | None = None
150
- link: str | None = None
151
 
152
 
153
  @lru_cache(maxsize=1)
@@ -162,25 +78,6 @@ def load_meta_toml() -> MetaToml:
162
  return meta_toml
163
 
164
 
165
- class DisplayToml(BaseModel):
166
- model_config = ConfigDict(extra="allow", frozen=True)
167
-
168
- version: Annotated[str, Field(..., description="The version of the results.")]
169
- benchmarks_order: Annotated[
170
- list[str],
171
- Field(
172
- default_factory=lambda: [
173
- "vsi_bench",
174
- "mmsi_bench",
175
- "mindcube_tiny",
176
- "viewspatial",
177
- "site",
178
- ],
179
- description="The predefined order of the benchmarks.",
180
- ),
181
- ]
182
-
183
-
184
  @lru_cache(maxsize=1)
185
  def load_display_toml() -> DisplayToml:
186
  display_toml_path = Path(settings.EVAL_RESULTS_PATH) / "leaderboard" / "display.toml"
 
1
  import os
2
  import sys
3
+ from functools import lru_cache
4
  from pathlib import Path
 
5
 
6
  from huggingface_hub import snapshot_download
7
  from loguru import logger
 
 
8
 
9
  from src.envs import API, settings
10
+ from src.schemas.display_toml import DisplayToml
11
+ from src.schemas.meta_toml import MetaToml
12
 
13
  if sys.version_info < (3, 11):
14
  from tomli import load as toml_load
 
21
  def prepare_space():
22
  """Space initialisation"""
23
 
 
 
 
24
  global PREPARED_FLAG
25
  if not PREPARED_FLAG:
26
+ download_results()
27
+ download_queue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  PREPARED_FLAG = True
29
 
30
  load_meta_toml()
31
  load_display_toml()
32
 
33
 
34
+ def _restart_space():
35
+ API.restart_space(repo_id=settings.REPO_ID)
36
+
37
+
38
+ def download_results():
39
+ try:
40
+ snapshot_download(
41
+ repo_id=settings.QUEUE_REPO_ID,
42
+ local_dir=settings.EVAL_REQUESTS_PATH,
43
+ repo_type="dataset",
44
+ tqdm_class=None,
45
+ etag_timeout=30,
46
+ token=settings.HF_TOKEN.get_secret_value(),
47
+ )
48
+ except Exception as e:
49
+ logger.error(f"Error downloading eval queue: {e!s}")
50
+ _restart_space()
51
+
52
+
53
+ def download_queue():
54
+ try:
55
+ snapshot_download(
56
+ repo_id=settings.RESULTS_REPO_ID,
57
+ local_dir=settings.EVAL_RESULTS_PATH,
58
+ repo_type="dataset",
59
+ tqdm_class=None,
60
+ etag_timeout=30,
61
+ allow_patterns=["leaderboard/*.toml", "leaderboard/**/*.json"],
62
+ token=settings.HF_TOKEN.get_secret_value(),
63
+ )
64
+ except Exception as e:
65
+ logger.error(f"Error downloading eval queue: {e!s}")
66
+ _restart_space()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  @lru_cache(maxsize=1)
 
78
  return meta_toml
79
 
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  @lru_cache(maxsize=1)
82
  def load_display_toml() -> DisplayToml:
83
  display_toml_path = Path(settings.EVAL_RESULTS_PATH) / "leaderboard" / "display.toml"
src/schemas/display_toml.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Annotated
2
+
3
+ from pydantic import BaseModel, ConfigDict, Field
4
+
5
+
6
+ class DisplayToml(BaseModel):
7
+ model_config = ConfigDict(extra="allow", frozen=True)
8
+
9
+ version: Annotated[str, Field(..., description="The version of the results.")]
10
+ benchmarks_order: Annotated[
11
+ list[str],
12
+ Field(
13
+ default_factory=lambda: [
14
+ "vsi_bench",
15
+ "mmsi_bench",
16
+ "mindcube_tiny",
17
+ "viewspatial",
18
+ "site",
19
+ ],
20
+ description="The predefined order of the benchmarks.",
21
+ ),
22
+ ]
src/schemas/init.py ADDED
File without changes
src/schemas/meta_toml.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import cached_property
2
+
3
+ from pydantic import BaseModel, ConfigDict
4
+ from typing_extensions import Self
5
+
6
+
7
+ class MetaToml(BaseModel):
8
+ model_config = ConfigDict(extra="allow", frozen=True)
9
+
10
+ models: list["MetaToml_Model"]
11
+
12
+ @cached_property
13
+ def model_key_to_model(self) -> dict[str, "MetaToml_Model"]:
14
+ return {model.key: model for model in self.models}
15
+
16
+ @cached_property
17
+ def model_title_to_model(self) -> dict[str, "MetaToml_Model"]:
18
+ """Model title (lower case) to model mapping"""
19
+ return {model.title.lower(): model for model in self.models}
20
+
21
+ benchmarks: list["MetaToml_Benchmark"]
22
+
23
+ @cached_property
24
+ def benchmark_key_to_benchmark(self) -> dict[str, "MetaToml_Benchmark"]:
25
+ return {benchmark.key: benchmark for benchmark in self.benchmarks}
26
+
27
+ model_repos: list["MetaToml_ModelRepo"]
28
+
29
+ @cached_property
30
+ def model_key_to_repo(self) -> dict[str, "MetaToml_ModelRepo"]:
31
+ return {repo.key: repo for repo in self.model_repos}
32
+
33
+ # --- Helper properties ---
34
+ @cached_property
35
+ def model_title_to_repo(self) -> dict[str, "MetaToml_ModelRepo"]:
36
+ """Model title (lower case) to model repo mapping"""
37
+ mapping = {}
38
+ for model in self.models:
39
+ title = model.title.lower()
40
+ key = model.key
41
+ repo = self.model_key_to_repo.get(key)
42
+ if repo:
43
+ mapping[title] = repo
44
+ return mapping
45
+
46
+ @cached_property
47
+ def model_title_to_key(self) -> dict[str, str]:
48
+ return {model.title.lower(): model.key for model in self.models}
49
+
50
+ @cached_property
51
+ def benchmark_title_to_key(self) -> dict[str, str]:
52
+ return {benchmark.title.lower(): benchmark.key for benchmark in self.benchmarks}
53
+
54
+ @cached_property
55
+ def model_key_to_repo_id(self) -> dict[str, str]:
56
+ return {model.key: model.repo_id for model in self.model_repos if model.repo_id is not None}
57
+
58
+
59
+ class _HashableComparableMixin(BaseModel):
60
+ model_config = ConfigDict(extra="allow", frozen=True)
61
+
62
+ key: str
63
+ title: str
64
+
65
+ def __hash__(self) -> int:
66
+ return hash(self.key)
67
+
68
+ def __eq__(self, other: Self) -> bool:
69
+ return (self.key, self.title) == (other.key, other.title)
70
+
71
+ def __lt__(self, other: Self) -> bool:
72
+ return (self.key, self.title) < (other.key, other.title)
73
+
74
+ def __gt__(self, other: Self) -> bool:
75
+ return (self.key, self.title) > (other.key, other.title)
76
+
77
+ def __le__(self, other: Self) -> bool:
78
+ return (self.key, self.title) <= (other.key, other.title)
79
+
80
+ def __ge__(self, other: Self) -> bool:
81
+ return (self.key, self.title) >= (other.key, other.title)
82
+
83
+
84
+ class MetaToml_Benchmark(_HashableComparableMixin):
85
+ disabled: bool = False
86
+
87
+
88
+ class MetaToml_Model(_HashableComparableMixin): ...
89
+
90
+
91
+ class MetaToml_ModelRepo(BaseModel):
92
+ model_config = ConfigDict(extra="allow", frozen=True)
93
+
94
+ key: str
95
+ repo_id: str | None = None
96
+ link: str | None = None