Spaces:

OliverPerrin
/

LexiMind

Sleeping

App Files Files Community

OliverPerrin commited on 10 days ago

Commit

d9dbe7c

1 Parent(s): 1ec7405

Update Gradio demo, inference factory, and evaluation results

Browse files

Files changed (6) hide show

outputs/evaluation_report.json +37 -37
pyproject.toml +1 -1
scripts/delete_hf_repo.py +17 -0
scripts/demo_gradio.py +1 -1
scripts/upload_model_to_hf.py +32 -0
src/inference/factory.py +2 -1

outputs/evaluation_report.json CHANGED Viewed

@@ -1,79 +1,79 @@
 {
   "split": "val",
   "summarization": {
-    "rouge_like": 0.13567121660564777,
-    "bleu": 0.014673668103097205
   },
   "emotion": {
-    "f1_macro": 0.1939181685447693
   },
   "topic": {
-    "accuracy": 0.741687849517031,
     "classification_report": {
       "Business & Finance": {
-        "precision": 0.6439114391143912,
-        "recall": 0.527190332326284,
-        "f1-score": 0.579734219269103,
         "support": 1986
       },
       "Computers & Internet": {
-        "precision": 0.8251038301799724,
-        "recall": 0.9044006069802731,
-        "f1-score": 0.862934362934363,
         "support": 1977
       },
       "Education & Reference": {
-        "precision": 0.6439444076770351,
-        "recall": 0.49642857142857144,
-        "f1-score": 0.560645347162201,
         "support": 1960
       },
       "Entertainment & Music": {
-        "precision": 0.7064310260186549,
-        "recall": 0.7360613810741689,
-        "f1-score": 0.7209418837675351,
         "support": 1955
       },
       "Family & Relationships": {
-        "precision": 0.7182971014492754,
-        "recall": 0.8071246819338422,
-        "f1-score": 0.7601246105919003,
         "support": 1965
       },
       "Health": {
-        "precision": 0.7610579115367077,
-        "recall": 0.8489318413021363,
-        "f1-score": 0.8025967780716519,
         "support": 1966
       },
       "Politics & Government": {
-        "precision": 0.7711132437619962,
-        "recall": 0.8173957273652085,
-        "f1-score": 0.7935802469135802,
         "support": 1966
       },
       "Science & Mathematics": {
-        "precision": 0.7456647398843931,
-        "recall": 0.7885888945491595,
-        "f1-score": 0.7665263679128497,
         "support": 1963
       },
       "Society & Culture": {
-        "precision": 0.6496559633027523,
-        "recall": 0.5783563042368556,
-        "f1-score": 0.6119362678908993,
         "support": 1959
       },
       "Sports": {
-        "precision": 0.8888339920948617,
-        "recall": 0.9118094272681196,
-        "f1-score": 0.9001751313485113,
         "support": 1973
       },
       "macro avg": {
-        "precision": 0.735401365502004,
-        "recall": 0.7416287768464619,
-        "f1-score": 0.7359195215862595,
         "support": 19670
       }
     }

 {
   "split": "val",
   "summarization": {
+    "rouge_like": 0.2817535277055523,
+    "bleu": 0.06501593900536834
   },
   "emotion": {
+    "f1_macro": 0.4053446650505066
   },
   "topic": {
+    "accuracy": 0.7548042704626334,
     "classification_report": {
       "Business & Finance": {
+        "precision": 0.6826859776168532,
+        "recall": 0.5221550855991943,
+        "f1-score": 0.5917261055634807,
         "support": 1986
       },
       "Computers & Internet": {
+        "precision": 0.8468166586883676,
+        "recall": 0.894790085988872,
+        "f1-score": 0.8701426463354648,
         "support": 1977
       },
       "Education & Reference": {
+        "precision": 0.6067106710671067,
+        "recall": 0.5627551020408164,
+        "f1-score": 0.5839068290100582,
         "support": 1960
       },
       "Entertainment & Music": {
+        "precision": 0.732976653696498,
+        "recall": 0.7708439897698209,
+        "f1-score": 0.7514335577162802,
         "support": 1955
       },
       "Family & Relationships": {
+        "precision": 0.7356746765249538,
+        "recall": 0.8101781170483461,
+        "f1-score": 0.7711310244611286,
         "support": 1965
       },
       "Health": {
+        "precision": 0.7917267917267917,
+        "recall": 0.8372329603255341,
+        "f1-score": 0.8138442521631644,
         "support": 1966
       },
       "Politics & Government": {
+        "precision": 0.7916459472899056,
+        "recall": 0.8097660223804679,
+        "f1-score": 0.8006034699522253,
         "support": 1966
       },
       "Science & Mathematics": {
+        "precision": 0.749162278602202,
+        "recall": 0.7972491085073866,
+        "f1-score": 0.7724580454096742,
         "support": 1963
       },
       "Society & Culture": {
+        "precision": 0.6588683351468988,
+        "recall": 0.6181725370086779,
+        "f1-score": 0.637872004213853,
         "support": 1959
       },
       "Sports": {
+        "precision": 0.909317389138017,
+        "recall": 0.9249873289406995,
+        "f1-score": 0.9170854271356784,
         "support": 1973
       },
       "macro avg": {
+        "precision": 0.7505585379497595,
+        "recall": 0.7548130337609815,
+        "f1-score": 0.7510203361961008,
         "support": 19670
       }
     }

pyproject.toml CHANGED Viewed

@@ -29,7 +29,7 @@ kaggle = ">=1.5.12"
 streamlit = ">=1.25.0"
 plotly = ">=5.18.0"
 faiss-cpu = "1.9.0"
-huggingface_hub = ">=0.19.0"
 hydra-core = "^1.3.0"
 bitsandbytes = ">=0.41.0"
 accelerate = ">=0.21.0"

 streamlit = ">=1.25.0"
 plotly = ">=5.18.0"
 faiss-cpu = "1.9.0"
+huggingface_hub = ">=0.34.0,<1.0"
 hydra-core = "^1.3.0"
 bitsandbytes = ">=0.41.0"
 accelerate = ">=0.21.0"

scripts/delete_hf_repo.py ADDED Viewed

	@@ -0,0 +1,17 @@

+"""Delete Hugging Face repository."""
+from huggingface_hub import HfApi
+# Login uses HF_TOKEN environment variable automatically
+# Initialize API
+api = HfApi()
+# Delete the OliverPerrin/LexiMind model repository
+repo_id = "OliverPerrin/LexiMind"
+try:
+    print(f"Deleting model repository {repo_id}...")
+    api.delete_repo(repo_id=repo_id, repo_type="model")
+    print(f"✓ Successfully deleted {repo_id}")
+except Exception as e:
+    print(f"Error deleting repository: {e}")

scripts/demo_gradio.py CHANGED Viewed

@@ -65,13 +65,13 @@ def get_pipeline():
             repo_id="OliverPerrin/LexiMind-Model",
             filename="best.pt",
             local_dir="checkpoints",
-            local_dir_use_symlinks=False,
         )
     _pipeline, _ = create_inference_pipeline(
         tokenizer_dir="artifacts/hf_tokenizer/",
         checkpoint_path="checkpoints/best.pt",
         labels_path="artifacts/labels.json",
     )
     return _pipeline

             repo_id="OliverPerrin/LexiMind-Model",
             filename="best.pt",
             local_dir="checkpoints",
         )
     _pipeline, _ = create_inference_pipeline(
         tokenizer_dir="artifacts/hf_tokenizer/",
         checkpoint_path="checkpoints/best.pt",
         labels_path="artifacts/labels.json",
+        model_config_path="configs/model/base.yaml",
     )
     return _pipeline

scripts/upload_model_to_hf.py ADDED Viewed

	@@ -0,0 +1,32 @@

+"""Upload model checkpoint to Hugging Face Hub."""
+import os
+from huggingface_hub import HfApi, create_repo
+# Login uses HF_TOKEN environment variable automatically
+# Initialize API
+api = HfApi()
+# Model repository
+repo_id = "OliverPerrin/LexiMind-Model"
+model_file = "checkpoints/best.pt"
+# Create repository if it doesn't exist
+try:
+    print(f"Creating model repository {repo_id}...")
+    create_repo(repo_id=repo_id, repo_type="model", exist_ok=True, private=False)
+    print("✓ Repository created/verified")
+except Exception as e:
+    print(f"Repository creation: {e}")
+print(f"Uploading {model_file} to {repo_id}...")
+# Upload the model file
+api.upload_file(
+    path_or_fileobj=model_file,
+    path_in_repo="best.pt",
+    repo_id=repo_id,
+    repo_type="model",
+)
+print("✓ Model uploaded successfully!")

src/inference/factory.py CHANGED Viewed

@@ -58,7 +58,8 @@ def create_inference_pipeline(
     tokenizer = Tokenizer(resolved_tokenizer_config)
-    # Default to base config if not specified (checkpoint was trained with base config)
     if model_config_path is None:
         model_config_path = (
             Path(__file__).resolve().parent.parent.parent / "configs" / "model" / "base.yaml"

     tokenizer = Tokenizer(resolved_tokenizer_config)
+    # Default to the base config because the published checkpoints were trained
+    # with the 12-layer FLAN-T5-base alignment (vocab 32128, rel pos bias).
     if model_config_path is None:
         model_config_path = (
             Path(__file__).resolve().parent.parent.parent / "configs" / "model" / "base.yaml"