Spaces:

umerforsure
/

AI-Study-Assistant

Running

App Files Files Community

umerforsure commited on Aug 6

Commit

c577877

0 Parent(s):

🚀 Initial commit

Browse files

Files changed (3) hide show

Read Me.txt +78 -0
app.py +173 -0
requirements.txt +7 -0

Read Me.txt ADDED Viewed

	@@ -0,0 +1,78 @@

+# 📚 AI Study Assistant (Powered by Microsoft Phi-3)
+Welcome to the **AI Study Assistant** — your personalized academic learning tool, built to help students understand complex course material with clarity, precision, and speed.
+### 🚀 What it does
+- ✅ **Upload Study Material**: PDF, DOCX, TXT, or PPTX
+- 🤖 **Ask Questions**: About any concept in the uploaded files
+- 🧠 **AI-Powered Reasoning**: Uses Microsoft’s Phi-3 (128k) to provide deep, structured, academic responses
+- 📊 **Retrieval-Augmented Generation**: Combines vector-based document search with LLMs
+- 📘 **Context-Based Answers**: Doesn’t hallucinate — only uses what’s in your file
+- ✨ **Auto-Summary**: Large answers are summarized into a short academic recap
+---
+### 🎓 Who it's for
+- Students revising for exams
+- Professionals needing quick insights from documents
+- Educators building tutoring tools
+- Anyone learning from dense academic material
+---
+### 🛠 How it works
+1. **Upload** one or more lecture files (PDF, Word, PPT, TXT)
+2. Ask a question like:
+   - *"Define supervised learning"*
+   - *"Explain the difference between classification and regression"*
+3. The AI will:
+   - Extract your document
+   - Find the most relevant parts
+   - Think deeply using a powerful LLM
+   - Give a clean, well-structured, academic answer
+---
+### 🧠 Under the hood
+| Component        | Description                                              |
+|------------------|----------------------------------------------------------|
+| **LLM**           | `microsoft/phi-3-mini-128k-instruct` (4-bit CPU mode)     |
+| **Vector Search** | FAISS + HuggingFace MiniLM Embeddings                   |
+| **Framework**     | Gradio Interface via Hugging Face Spaces               |
+| **Auto Summary**  | DistilBART summarization for long answers               |
+---
+### 🔒 Privacy
+This app does **not** store or log any of your uploaded files or questions. All processing happens in memory and is discarded after your session.
+---
+### 💬 Example Prompts
+> - "List all types of memory in operating systems"
+> - "What does this document say about reinforcement learning?"
+> - "Explain deadlock prevention based on the slides"
+---
+### 💡 Inspiration
+Built to replicate how a personal AI tutor should behave: focused, accurate, context-aware, and reliable — not a chatbot, but a **study partner**.
+---
+### 🧑‍💻 Built by
+> Designed & developed by a student — for students — using only open-source models.
+---
+### 📎 Tags
+`#Education` `#AI` `#Phi3` `#StudentTools` `#NLP` `#Gradio` `#DocumentQA` `#OpenSource`

app.py ADDED Viewed

	@@ -0,0 +1,173 @@

+# app.py
+import os
+import tempfile
+import re
+import torch
+import gradio as gr
+from PyPDF2 import PdfReader
+from docx import Document as DocxDocument
+from pptx import Presentation
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_core.documents import Document
+# Load Reasoning Model
+model_id = "microsoft/phi-3-mini-128k-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
+reasoning_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer,
+                              max_new_tokens=512, temperature=0.7, top_p=0.9)
+# Embedding Model
+embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+vectorstore = None
+# Summarizer
+summary_pipeline = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
+def clean_text(text):
+    lines = text.split("\n")
+    cleaned = []
+    for line in lines:
+        line = line.strip()
+        if re.search(r'(Page \d+|Slide \d+|CS583|UIC|Bing Liu)', line, re.IGNORECASE):
+            continue
+        if len(line) < 3:
+            continue
+        line = re.sub(r'[^\x00-\x7F]+', ' ', line)
+        cleaned.append(line)
+    return "\n".join(cleaned)
+def extract_text(file_path, ext):
+    if ext == ".pdf":
+        reader = PdfReader(file_path)
+        return "\n".join([page.extract_text() or "" for page in reader.pages])
+    elif ext == ".docx":
+        doc = DocxDocument(file_path)
+        return "\n".join([p.text for p in doc.paragraphs])
+    elif ext == ".txt":
+        with open(file_path, "r", encoding="utf-8") as f:
+            return f.read()
+    elif ext == ".pptx":
+        prs = Presentation(file_path)
+        return "\n".join(shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text"))
+    else:
+        raise ValueError("Unsupported file format")
+def process_file(file):
+    global vectorstore
+    try:
+        ext = os.path.splitext(file.name)[1].lower()
+        with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
+            tmp.write(file.read())
+            tmp.flush()
+            full_text = extract_text(tmp.name, ext)
+        cleaned = clean_text(full_text)
+        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
+        chunks = splitter.split_text(cleaned)
+        docs = [Document(page_content=c) for c in chunks]
+        vectorstore = FAISS.from_documents(docs, embedding_model)
+        return "✅ File processed. You can now ask questions."
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+def generate_prompt(context, question):
+    return f"""
+You are a helpful academic tutor assisting a student strictly based on course slides or textbook material.
+Context:
+{context}
+Question:
+{question}
+Instructions:
+- Answer ONLY using the above context. Do NOT add outside knowledge.
+- Think clearly and deeply before answering.
+- Use structured academic language based strictly on the context.
+- Use clean formatting with helpful headings and minimal bullet points.
+- Do NOT repeat the question or include prompt labels.
+- If the context lacks an answer, say: "The provided material does not contain sufficient information to answer this question accurately."
+- Output must be academically concise, well-organized, and visually clear.
+""".strip()
+def detect_question_type(q):
+    q = q.lower().strip()
+    if q.startswith(("what is", "define", "give definition")):
+        return "definition"
+    elif q.startswith(("how", "explain", "why")):
+        return "explanation"
+    elif "difference between" in q or "compare" in q:
+        return "comparison"
+    elif q.startswith("list") or "types of" in q:
+        return "list"
+    return "general"
+def post_process_output(answer_text, question):
+    qtype = detect_question_type(question)
+    label_map = {
+        "definition": "📘 **Definition**",
+        "explanation": "📘 **Explanation**",
+        "comparison": "📘 **Comparison**",
+        "list": "📘 **Key Points**",
+        "general": "📘 **Insight**",
+    }
+    answer_text = f"{label_map.get(qtype)}\n\n{answer_text}"
+    if len(answer_text.split()) > 80:
+        summary = summary_pipeline(answer_text, max_length=60, min_length=25, do_sample=False)[0]['summary_text']
+        answer_text += f"\n\n📝 **Summary:** {summary.strip()}"
+    return answer_text
+def ask_question(question):
+    global vectorstore
+    if vectorstore is None:
+        return "❌ Please upload and process a file first."
+    docs = vectorstore.similarity_search(question, k=3)
+    if not docs:
+        return "❌ No relevant information found."
+    context = "\n".join([doc.page_content for doc in docs])
+    prompt = generate_prompt(context, question)
+    result = reasoning_pipeline(prompt)[0]['generated_text']
+    for marker in ["Context:", "Question:", "Instructions:"]:
+        if marker in result:
+            result = result.split(marker)[-1].strip()
+    if "." in result:
+        result = result.rsplit(".", 1)[0] + "."
+    return post_process_output(result.strip(), question)
+# Gradio UI
+title = "📚 AI Study Assistant"
+with gr.Blocks(css="footer {display:none !important}") as demo:
+    gr.Markdown("""# 📘 AI Study Assistant
+Upload your lecture notes and ask deep academic questions. Powered by Phi-3 & FAISS.""")
+    with gr.Row():
+        file_input = gr.File(label="Upload Course Material (PDF, DOCX, TXT, PPTX)")
+        upload_btn = gr.Button("Process File")
+    status = gr.Textbox(label="Status", interactive=False)
+    question = gr.Textbox(label="Ask a Question", placeholder="E.g., What is demand paging?")
+    ask_btn = gr.Button("Get Answer")
+    answer = gr.Markdown("", elem_id="answer-box")
+    upload_btn.click(fn=process_file, inputs=file_input, outputs=status)
+    ask_btn.click(fn=ask_question, inputs=question, outputs=answer)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers
+torch
+pypdf
+faiss-cpu
+python-pptx
+python-docx
+gradio