OliverPerrin commited on
Commit
d9dbe7c
·
1 Parent(s): 1ec7405

Update Gradio demo, inference factory, and evaluation results

Browse files
outputs/evaluation_report.json CHANGED
@@ -1,79 +1,79 @@
1
  {
2
  "split": "val",
3
  "summarization": {
4
- "rouge_like": 0.13567121660564777,
5
- "bleu": 0.014673668103097205
6
  },
7
  "emotion": {
8
- "f1_macro": 0.1939181685447693
9
  },
10
  "topic": {
11
- "accuracy": 0.741687849517031,
12
  "classification_report": {
13
  "Business & Finance": {
14
- "precision": 0.6439114391143912,
15
- "recall": 0.527190332326284,
16
- "f1-score": 0.579734219269103,
17
  "support": 1986
18
  },
19
  "Computers & Internet": {
20
- "precision": 0.8251038301799724,
21
- "recall": 0.9044006069802731,
22
- "f1-score": 0.862934362934363,
23
  "support": 1977
24
  },
25
  "Education & Reference": {
26
- "precision": 0.6439444076770351,
27
- "recall": 0.49642857142857144,
28
- "f1-score": 0.560645347162201,
29
  "support": 1960
30
  },
31
  "Entertainment & Music": {
32
- "precision": 0.7064310260186549,
33
- "recall": 0.7360613810741689,
34
- "f1-score": 0.7209418837675351,
35
  "support": 1955
36
  },
37
  "Family & Relationships": {
38
- "precision": 0.7182971014492754,
39
- "recall": 0.8071246819338422,
40
- "f1-score": 0.7601246105919003,
41
  "support": 1965
42
  },
43
  "Health": {
44
- "precision": 0.7610579115367077,
45
- "recall": 0.8489318413021363,
46
- "f1-score": 0.8025967780716519,
47
  "support": 1966
48
  },
49
  "Politics & Government": {
50
- "precision": 0.7711132437619962,
51
- "recall": 0.8173957273652085,
52
- "f1-score": 0.7935802469135802,
53
  "support": 1966
54
  },
55
  "Science & Mathematics": {
56
- "precision": 0.7456647398843931,
57
- "recall": 0.7885888945491595,
58
- "f1-score": 0.7665263679128497,
59
  "support": 1963
60
  },
61
  "Society & Culture": {
62
- "precision": 0.6496559633027523,
63
- "recall": 0.5783563042368556,
64
- "f1-score": 0.6119362678908993,
65
  "support": 1959
66
  },
67
  "Sports": {
68
- "precision": 0.8888339920948617,
69
- "recall": 0.9118094272681196,
70
- "f1-score": 0.9001751313485113,
71
  "support": 1973
72
  },
73
  "macro avg": {
74
- "precision": 0.735401365502004,
75
- "recall": 0.7416287768464619,
76
- "f1-score": 0.7359195215862595,
77
  "support": 19670
78
  }
79
  }
 
1
  {
2
  "split": "val",
3
  "summarization": {
4
+ "rouge_like": 0.2817535277055523,
5
+ "bleu": 0.06501593900536834
6
  },
7
  "emotion": {
8
+ "f1_macro": 0.4053446650505066
9
  },
10
  "topic": {
11
+ "accuracy": 0.7548042704626334,
12
  "classification_report": {
13
  "Business & Finance": {
14
+ "precision": 0.6826859776168532,
15
+ "recall": 0.5221550855991943,
16
+ "f1-score": 0.5917261055634807,
17
  "support": 1986
18
  },
19
  "Computers & Internet": {
20
+ "precision": 0.8468166586883676,
21
+ "recall": 0.894790085988872,
22
+ "f1-score": 0.8701426463354648,
23
  "support": 1977
24
  },
25
  "Education & Reference": {
26
+ "precision": 0.6067106710671067,
27
+ "recall": 0.5627551020408164,
28
+ "f1-score": 0.5839068290100582,
29
  "support": 1960
30
  },
31
  "Entertainment & Music": {
32
+ "precision": 0.732976653696498,
33
+ "recall": 0.7708439897698209,
34
+ "f1-score": 0.7514335577162802,
35
  "support": 1955
36
  },
37
  "Family & Relationships": {
38
+ "precision": 0.7356746765249538,
39
+ "recall": 0.8101781170483461,
40
+ "f1-score": 0.7711310244611286,
41
  "support": 1965
42
  },
43
  "Health": {
44
+ "precision": 0.7917267917267917,
45
+ "recall": 0.8372329603255341,
46
+ "f1-score": 0.8138442521631644,
47
  "support": 1966
48
  },
49
  "Politics & Government": {
50
+ "precision": 0.7916459472899056,
51
+ "recall": 0.8097660223804679,
52
+ "f1-score": 0.8006034699522253,
53
  "support": 1966
54
  },
55
  "Science & Mathematics": {
56
+ "precision": 0.749162278602202,
57
+ "recall": 0.7972491085073866,
58
+ "f1-score": 0.7724580454096742,
59
  "support": 1963
60
  },
61
  "Society & Culture": {
62
+ "precision": 0.6588683351468988,
63
+ "recall": 0.6181725370086779,
64
+ "f1-score": 0.637872004213853,
65
  "support": 1959
66
  },
67
  "Sports": {
68
+ "precision": 0.909317389138017,
69
+ "recall": 0.9249873289406995,
70
+ "f1-score": 0.9170854271356784,
71
  "support": 1973
72
  },
73
  "macro avg": {
74
+ "precision": 0.7505585379497595,
75
+ "recall": 0.7548130337609815,
76
+ "f1-score": 0.7510203361961008,
77
  "support": 19670
78
  }
79
  }
pyproject.toml CHANGED
@@ -29,7 +29,7 @@ kaggle = ">=1.5.12"
29
  streamlit = ">=1.25.0"
30
  plotly = ">=5.18.0"
31
  faiss-cpu = "1.9.0"
32
- huggingface_hub = ">=0.19.0"
33
  hydra-core = "^1.3.0"
34
  bitsandbytes = ">=0.41.0"
35
  accelerate = ">=0.21.0"
 
29
  streamlit = ">=1.25.0"
30
  plotly = ">=5.18.0"
31
  faiss-cpu = "1.9.0"
32
+ huggingface_hub = ">=0.34.0,<1.0"
33
  hydra-core = "^1.3.0"
34
  bitsandbytes = ">=0.41.0"
35
  accelerate = ">=0.21.0"
scripts/delete_hf_repo.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Delete Hugging Face repository."""
2
+ from huggingface_hub import HfApi
3
+
4
+ # Login uses HF_TOKEN environment variable automatically
5
+
6
+ # Initialize API
7
+ api = HfApi()
8
+
9
+ # Delete the OliverPerrin/LexiMind model repository
10
+ repo_id = "OliverPerrin/LexiMind"
11
+
12
+ try:
13
+ print(f"Deleting model repository {repo_id}...")
14
+ api.delete_repo(repo_id=repo_id, repo_type="model")
15
+ print(f"✓ Successfully deleted {repo_id}")
16
+ except Exception as e:
17
+ print(f"Error deleting repository: {e}")
scripts/demo_gradio.py CHANGED
@@ -65,13 +65,13 @@ def get_pipeline():
65
  repo_id="OliverPerrin/LexiMind-Model",
66
  filename="best.pt",
67
  local_dir="checkpoints",
68
- local_dir_use_symlinks=False,
69
  )
70
 
71
  _pipeline, _ = create_inference_pipeline(
72
  tokenizer_dir="artifacts/hf_tokenizer/",
73
  checkpoint_path="checkpoints/best.pt",
74
  labels_path="artifacts/labels.json",
 
75
  )
76
  return _pipeline
77
 
 
65
  repo_id="OliverPerrin/LexiMind-Model",
66
  filename="best.pt",
67
  local_dir="checkpoints",
 
68
  )
69
 
70
  _pipeline, _ = create_inference_pipeline(
71
  tokenizer_dir="artifacts/hf_tokenizer/",
72
  checkpoint_path="checkpoints/best.pt",
73
  labels_path="artifacts/labels.json",
74
+ model_config_path="configs/model/base.yaml",
75
  )
76
  return _pipeline
77
 
scripts/upload_model_to_hf.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Upload model checkpoint to Hugging Face Hub."""
2
+ import os
3
+ from huggingface_hub import HfApi, create_repo
4
+
5
+ # Login uses HF_TOKEN environment variable automatically
6
+
7
+ # Initialize API
8
+ api = HfApi()
9
+
10
+ # Model repository
11
+ repo_id = "OliverPerrin/LexiMind-Model"
12
+ model_file = "checkpoints/best.pt"
13
+
14
+ # Create repository if it doesn't exist
15
+ try:
16
+ print(f"Creating model repository {repo_id}...")
17
+ create_repo(repo_id=repo_id, repo_type="model", exist_ok=True, private=False)
18
+ print("✓ Repository created/verified")
19
+ except Exception as e:
20
+ print(f"Repository creation: {e}")
21
+
22
+ print(f"Uploading {model_file} to {repo_id}...")
23
+
24
+ # Upload the model file
25
+ api.upload_file(
26
+ path_or_fileobj=model_file,
27
+ path_in_repo="best.pt",
28
+ repo_id=repo_id,
29
+ repo_type="model",
30
+ )
31
+
32
+ print("✓ Model uploaded successfully!")
src/inference/factory.py CHANGED
@@ -58,7 +58,8 @@ def create_inference_pipeline(
58
 
59
  tokenizer = Tokenizer(resolved_tokenizer_config)
60
 
61
- # Default to base config if not specified (checkpoint was trained with base config)
 
62
  if model_config_path is None:
63
  model_config_path = (
64
  Path(__file__).resolve().parent.parent.parent / "configs" / "model" / "base.yaml"
 
58
 
59
  tokenizer = Tokenizer(resolved_tokenizer_config)
60
 
61
+ # Default to the base config because the published checkpoints were trained
62
+ # with the 12-layer FLAN-T5-base alignment (vocab 32128, rel pos bias).
63
  if model_config_path is None:
64
  model_config_path = (
65
  Path(__file__).resolve().parent.parent.parent / "configs" / "model" / "base.yaml"