Spaces:
Sleeping
Sleeping
OliverPerrin
commited on
Commit
·
d9dbe7c
1
Parent(s):
1ec7405
Update Gradio demo, inference factory, and evaluation results
Browse files- outputs/evaluation_report.json +37 -37
- pyproject.toml +1 -1
- scripts/delete_hf_repo.py +17 -0
- scripts/demo_gradio.py +1 -1
- scripts/upload_model_to_hf.py +32 -0
- src/inference/factory.py +2 -1
outputs/evaluation_report.json
CHANGED
|
@@ -1,79 +1,79 @@
|
|
| 1 |
{
|
| 2 |
"split": "val",
|
| 3 |
"summarization": {
|
| 4 |
-
"rouge_like": 0.
|
| 5 |
-
"bleu": 0.
|
| 6 |
},
|
| 7 |
"emotion": {
|
| 8 |
-
"f1_macro": 0.
|
| 9 |
},
|
| 10 |
"topic": {
|
| 11 |
-
"accuracy": 0.
|
| 12 |
"classification_report": {
|
| 13 |
"Business & Finance": {
|
| 14 |
-
"precision": 0.
|
| 15 |
-
"recall": 0.
|
| 16 |
-
"f1-score": 0.
|
| 17 |
"support": 1986
|
| 18 |
},
|
| 19 |
"Computers & Internet": {
|
| 20 |
-
"precision": 0.
|
| 21 |
-
"recall": 0.
|
| 22 |
-
"f1-score": 0.
|
| 23 |
"support": 1977
|
| 24 |
},
|
| 25 |
"Education & Reference": {
|
| 26 |
-
"precision": 0.
|
| 27 |
-
"recall": 0.
|
| 28 |
-
"f1-score": 0.
|
| 29 |
"support": 1960
|
| 30 |
},
|
| 31 |
"Entertainment & Music": {
|
| 32 |
-
"precision": 0.
|
| 33 |
-
"recall": 0.
|
| 34 |
-
"f1-score": 0.
|
| 35 |
"support": 1955
|
| 36 |
},
|
| 37 |
"Family & Relationships": {
|
| 38 |
-
"precision": 0.
|
| 39 |
-
"recall": 0.
|
| 40 |
-
"f1-score": 0.
|
| 41 |
"support": 1965
|
| 42 |
},
|
| 43 |
"Health": {
|
| 44 |
-
"precision": 0.
|
| 45 |
-
"recall": 0.
|
| 46 |
-
"f1-score": 0.
|
| 47 |
"support": 1966
|
| 48 |
},
|
| 49 |
"Politics & Government": {
|
| 50 |
-
"precision": 0.
|
| 51 |
-
"recall": 0.
|
| 52 |
-
"f1-score": 0.
|
| 53 |
"support": 1966
|
| 54 |
},
|
| 55 |
"Science & Mathematics": {
|
| 56 |
-
"precision": 0.
|
| 57 |
-
"recall": 0.
|
| 58 |
-
"f1-score": 0.
|
| 59 |
"support": 1963
|
| 60 |
},
|
| 61 |
"Society & Culture": {
|
| 62 |
-
"precision": 0.
|
| 63 |
-
"recall": 0.
|
| 64 |
-
"f1-score": 0.
|
| 65 |
"support": 1959
|
| 66 |
},
|
| 67 |
"Sports": {
|
| 68 |
-
"precision": 0.
|
| 69 |
-
"recall": 0.
|
| 70 |
-
"f1-score": 0.
|
| 71 |
"support": 1973
|
| 72 |
},
|
| 73 |
"macro avg": {
|
| 74 |
-
"precision": 0.
|
| 75 |
-
"recall": 0.
|
| 76 |
-
"f1-score": 0.
|
| 77 |
"support": 19670
|
| 78 |
}
|
| 79 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"split": "val",
|
| 3 |
"summarization": {
|
| 4 |
+
"rouge_like": 0.2817535277055523,
|
| 5 |
+
"bleu": 0.06501593900536834
|
| 6 |
},
|
| 7 |
"emotion": {
|
| 8 |
+
"f1_macro": 0.4053446650505066
|
| 9 |
},
|
| 10 |
"topic": {
|
| 11 |
+
"accuracy": 0.7548042704626334,
|
| 12 |
"classification_report": {
|
| 13 |
"Business & Finance": {
|
| 14 |
+
"precision": 0.6826859776168532,
|
| 15 |
+
"recall": 0.5221550855991943,
|
| 16 |
+
"f1-score": 0.5917261055634807,
|
| 17 |
"support": 1986
|
| 18 |
},
|
| 19 |
"Computers & Internet": {
|
| 20 |
+
"precision": 0.8468166586883676,
|
| 21 |
+
"recall": 0.894790085988872,
|
| 22 |
+
"f1-score": 0.8701426463354648,
|
| 23 |
"support": 1977
|
| 24 |
},
|
| 25 |
"Education & Reference": {
|
| 26 |
+
"precision": 0.6067106710671067,
|
| 27 |
+
"recall": 0.5627551020408164,
|
| 28 |
+
"f1-score": 0.5839068290100582,
|
| 29 |
"support": 1960
|
| 30 |
},
|
| 31 |
"Entertainment & Music": {
|
| 32 |
+
"precision": 0.732976653696498,
|
| 33 |
+
"recall": 0.7708439897698209,
|
| 34 |
+
"f1-score": 0.7514335577162802,
|
| 35 |
"support": 1955
|
| 36 |
},
|
| 37 |
"Family & Relationships": {
|
| 38 |
+
"precision": 0.7356746765249538,
|
| 39 |
+
"recall": 0.8101781170483461,
|
| 40 |
+
"f1-score": 0.7711310244611286,
|
| 41 |
"support": 1965
|
| 42 |
},
|
| 43 |
"Health": {
|
| 44 |
+
"precision": 0.7917267917267917,
|
| 45 |
+
"recall": 0.8372329603255341,
|
| 46 |
+
"f1-score": 0.8138442521631644,
|
| 47 |
"support": 1966
|
| 48 |
},
|
| 49 |
"Politics & Government": {
|
| 50 |
+
"precision": 0.7916459472899056,
|
| 51 |
+
"recall": 0.8097660223804679,
|
| 52 |
+
"f1-score": 0.8006034699522253,
|
| 53 |
"support": 1966
|
| 54 |
},
|
| 55 |
"Science & Mathematics": {
|
| 56 |
+
"precision": 0.749162278602202,
|
| 57 |
+
"recall": 0.7972491085073866,
|
| 58 |
+
"f1-score": 0.7724580454096742,
|
| 59 |
"support": 1963
|
| 60 |
},
|
| 61 |
"Society & Culture": {
|
| 62 |
+
"precision": 0.6588683351468988,
|
| 63 |
+
"recall": 0.6181725370086779,
|
| 64 |
+
"f1-score": 0.637872004213853,
|
| 65 |
"support": 1959
|
| 66 |
},
|
| 67 |
"Sports": {
|
| 68 |
+
"precision": 0.909317389138017,
|
| 69 |
+
"recall": 0.9249873289406995,
|
| 70 |
+
"f1-score": 0.9170854271356784,
|
| 71 |
"support": 1973
|
| 72 |
},
|
| 73 |
"macro avg": {
|
| 74 |
+
"precision": 0.7505585379497595,
|
| 75 |
+
"recall": 0.7548130337609815,
|
| 76 |
+
"f1-score": 0.7510203361961008,
|
| 77 |
"support": 19670
|
| 78 |
}
|
| 79 |
}
|
pyproject.toml
CHANGED
|
@@ -29,7 +29,7 @@ kaggle = ">=1.5.12"
|
|
| 29 |
streamlit = ">=1.25.0"
|
| 30 |
plotly = ">=5.18.0"
|
| 31 |
faiss-cpu = "1.9.0"
|
| 32 |
-
huggingface_hub = ">=0.
|
| 33 |
hydra-core = "^1.3.0"
|
| 34 |
bitsandbytes = ">=0.41.0"
|
| 35 |
accelerate = ">=0.21.0"
|
|
|
|
| 29 |
streamlit = ">=1.25.0"
|
| 30 |
plotly = ">=5.18.0"
|
| 31 |
faiss-cpu = "1.9.0"
|
| 32 |
+
huggingface_hub = ">=0.34.0,<1.0"
|
| 33 |
hydra-core = "^1.3.0"
|
| 34 |
bitsandbytes = ">=0.41.0"
|
| 35 |
accelerate = ">=0.21.0"
|
scripts/delete_hf_repo.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Delete Hugging Face repository."""
|
| 2 |
+
from huggingface_hub import HfApi
|
| 3 |
+
|
| 4 |
+
# Login uses HF_TOKEN environment variable automatically
|
| 5 |
+
|
| 6 |
+
# Initialize API
|
| 7 |
+
api = HfApi()
|
| 8 |
+
|
| 9 |
+
# Delete the OliverPerrin/LexiMind model repository
|
| 10 |
+
repo_id = "OliverPerrin/LexiMind"
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
print(f"Deleting model repository {repo_id}...")
|
| 14 |
+
api.delete_repo(repo_id=repo_id, repo_type="model")
|
| 15 |
+
print(f"✓ Successfully deleted {repo_id}")
|
| 16 |
+
except Exception as e:
|
| 17 |
+
print(f"Error deleting repository: {e}")
|
scripts/demo_gradio.py
CHANGED
|
@@ -65,13 +65,13 @@ def get_pipeline():
|
|
| 65 |
repo_id="OliverPerrin/LexiMind-Model",
|
| 66 |
filename="best.pt",
|
| 67 |
local_dir="checkpoints",
|
| 68 |
-
local_dir_use_symlinks=False,
|
| 69 |
)
|
| 70 |
|
| 71 |
_pipeline, _ = create_inference_pipeline(
|
| 72 |
tokenizer_dir="artifacts/hf_tokenizer/",
|
| 73 |
checkpoint_path="checkpoints/best.pt",
|
| 74 |
labels_path="artifacts/labels.json",
|
|
|
|
| 75 |
)
|
| 76 |
return _pipeline
|
| 77 |
|
|
|
|
| 65 |
repo_id="OliverPerrin/LexiMind-Model",
|
| 66 |
filename="best.pt",
|
| 67 |
local_dir="checkpoints",
|
|
|
|
| 68 |
)
|
| 69 |
|
| 70 |
_pipeline, _ = create_inference_pipeline(
|
| 71 |
tokenizer_dir="artifacts/hf_tokenizer/",
|
| 72 |
checkpoint_path="checkpoints/best.pt",
|
| 73 |
labels_path="artifacts/labels.json",
|
| 74 |
+
model_config_path="configs/model/base.yaml",
|
| 75 |
)
|
| 76 |
return _pipeline
|
| 77 |
|
scripts/upload_model_to_hf.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Upload model checkpoint to Hugging Face Hub."""
|
| 2 |
+
import os
|
| 3 |
+
from huggingface_hub import HfApi, create_repo
|
| 4 |
+
|
| 5 |
+
# Login uses HF_TOKEN environment variable automatically
|
| 6 |
+
|
| 7 |
+
# Initialize API
|
| 8 |
+
api = HfApi()
|
| 9 |
+
|
| 10 |
+
# Model repository
|
| 11 |
+
repo_id = "OliverPerrin/LexiMind-Model"
|
| 12 |
+
model_file = "checkpoints/best.pt"
|
| 13 |
+
|
| 14 |
+
# Create repository if it doesn't exist
|
| 15 |
+
try:
|
| 16 |
+
print(f"Creating model repository {repo_id}...")
|
| 17 |
+
create_repo(repo_id=repo_id, repo_type="model", exist_ok=True, private=False)
|
| 18 |
+
print("✓ Repository created/verified")
|
| 19 |
+
except Exception as e:
|
| 20 |
+
print(f"Repository creation: {e}")
|
| 21 |
+
|
| 22 |
+
print(f"Uploading {model_file} to {repo_id}...")
|
| 23 |
+
|
| 24 |
+
# Upload the model file
|
| 25 |
+
api.upload_file(
|
| 26 |
+
path_or_fileobj=model_file,
|
| 27 |
+
path_in_repo="best.pt",
|
| 28 |
+
repo_id=repo_id,
|
| 29 |
+
repo_type="model",
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
print("✓ Model uploaded successfully!")
|
src/inference/factory.py
CHANGED
|
@@ -58,7 +58,8 @@ def create_inference_pipeline(
|
|
| 58 |
|
| 59 |
tokenizer = Tokenizer(resolved_tokenizer_config)
|
| 60 |
|
| 61 |
-
# Default to base config
|
|
|
|
| 62 |
if model_config_path is None:
|
| 63 |
model_config_path = (
|
| 64 |
Path(__file__).resolve().parent.parent.parent / "configs" / "model" / "base.yaml"
|
|
|
|
| 58 |
|
| 59 |
tokenizer = Tokenizer(resolved_tokenizer_config)
|
| 60 |
|
| 61 |
+
# Default to the base config because the published checkpoints were trained
|
| 62 |
+
# with the 12-layer FLAN-T5-base alignment (vocab 32128, rel pos bias).
|
| 63 |
if model_config_path is None:
|
| 64 |
model_config_path = (
|
| 65 |
Path(__file__).resolve().parent.parent.parent / "configs" / "model" / "base.yaml"
|