Spaces:
Running
Running
| import gradio as gr | |
| import asyncio | |
| import os | |
| import sys | |
| from langchain_mcp_adapters.client import MultiServerMCPClient | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain.agents import create_agent | |
| import tempfile | |
| import shutil | |
| from datetime import datetime | |
| import re | |
| # Get API keys from Hugging Face Secrets | |
| GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") | |
| ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY") | |
| if not GEMINI_API_KEY or not ELEVENLABS_API_KEY: | |
| raise ValueError("API keys must be set in Hugging Face Secrets") | |
| os.environ["ELEVENLABS_API_KEY"] = ELEVENLABS_API_KEY | |
| # Get the base directory | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| # Add MCP server paths to Python path for module imports | |
| sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp")) | |
| sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader")) | |
| sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop")) | |
| class ReasoningLogger: | |
| """Captures and formats the agent's reasoning process with clean output""" | |
| def __init__(self): | |
| self.logs = [] | |
| self.current_phase = None | |
| def log_phase(self, phase, content): | |
| """Log a major phase with enhanced formatting""" | |
| timestamp = datetime.now().strftime("%H:%M:%S") | |
| # Clean and format content | |
| cleaned_content = self._clean_content(content) | |
| separator = "─" * 80 | |
| formatted = f"\n{separator}\n {timestamp} | {phase}\n{separator}\n{cleaned_content}\n" | |
| self.logs.append(formatted) | |
| self.current_phase = phase | |
| def log_action(self, action, details): | |
| """Log an action with clean formatting""" | |
| timestamp = datetime.now().strftime("%H:%M:%S") | |
| cleaned_details = self._clean_content(details) | |
| formatted = f"\n {timestamp} | {action}\n{cleaned_details}\n" | |
| self.logs.append(formatted) | |
| def log_result(self, result): | |
| """Log a result with success formatting""" | |
| timestamp = datetime.now().strftime("%H:%M:%S") | |
| cleaned_result = self._clean_content(result) | |
| formatted = f"\n {timestamp} | {cleaned_result}\n" | |
| self.logs.append(formatted) | |
| def log_step(self, step_num, description): | |
| """Log a numbered step""" | |
| formatted = f" └─ Step {step_num}: {description}\n" | |
| self.logs.append(formatted) | |
| def _clean_content(self, content): | |
| """Clean and format content for better readability""" | |
| if not content: | |
| return "" | |
| # Convert to string | |
| content = str(content) | |
| # Remove excessive whitespace | |
| content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content) | |
| # Remove technical noise patterns | |
| noise_patterns = [ | |
| r'messages=\[.*?\]', | |
| r'content=\'.*?\'(?=\s|$)', | |
| r'ToolMessage\(.*?\)', | |
| r'additional_kwargs=\{.*?\}', | |
| r'response_metadata=\{.*?\}', | |
| r'id=\'.*?\'', | |
| r'usage_metadata=\{.*?\}', | |
| ] | |
| for pattern in noise_patterns: | |
| content = re.sub(pattern, '', content, flags=re.DOTALL) | |
| # Clean up result formatting | |
| if 'AgentFinish' in content: | |
| # Extract only the meaningful output | |
| match = re.search(r'output[\'"]:\s*[\'"](.+?)[\'"]', content, re.DOTALL) | |
| if match: | |
| content = match.group(1) | |
| # Format tool calls nicely | |
| content = re.sub(r'name=\'(\w+)\'', r'\n Tool: \1', content) | |
| content = re.sub(r'args=\{([^}]+)\}', lambda m: f'\n Parameters: {self._format_args(m.group(1))}', content) | |
| # Truncate very long outputs | |
| lines = content.split('\n') | |
| if len(lines) > 30: | |
| content = '\n'.join(lines[:25]) + f'\n\n... ({len(lines) - 25} more lines) ...\n' | |
| return content.strip() | |
| def _format_args(self, args_str): | |
| """Format tool arguments nicely""" | |
| # Simplify argument display | |
| args_str = args_str.replace('\'', '').replace('"', '') | |
| if len(args_str) > 100: | |
| return args_str[:100] + '...' | |
| return args_str | |
| def get_log(self): | |
| """Return formatted log output""" | |
| header = """ | |
| """ | |
| return header + "".join(self.logs) | |
| async def run_agent_with_reasoning(age: int, gender: str, topic: str, pdf_temp_path: str, progress=gr.Progress()): | |
| logger = ReasoningLogger() | |
| output_dir = tempfile.mkdtemp() | |
| # Phase 1: Planning | |
| progress(0.1, desc="Agent is analyzing task and creating plan...") | |
| planning_prompt = f""" | |
| You are an autonomous teaching agent. Analyze this task and create a concise plan: | |
| TASK: Create an engaging audio story for a {age}-year-old {gender} student about "{topic}" based on a lecture PDF. | |
| Provide a brief, numbered plan (4-5 steps maximum) without excessive detail. | |
| """ | |
| # Initialize LLM for planning | |
| llm = ChatGoogleGenerativeAI( | |
| model="gemini-2.5-flash", | |
| google_api_key=GEMINI_API_KEY, | |
| temperature=0.7 | |
| ) | |
| try: | |
| planning_response = await llm.ainvoke(planning_prompt) | |
| plan_text = planning_response.content if hasattr(planning_response, 'content') else str(planning_response) | |
| # Extract only the plan steps | |
| plan_lines = [line for line in plan_text.split('\n') if line.strip() and (line.strip()[0].isdigit() or line.strip().startswith('-'))] | |
| clean_plan = '\n'.join(plan_lines[:5]) # Limit to 5 steps | |
| logger.log_phase("PLANNING", clean_plan) | |
| yield logger.get_log(), None | |
| except Exception as e: | |
| logger.log_phase("PLANNING ERROR", str(e)) | |
| yield logger.get_log(), None | |
| return | |
| # Phase 2: Tool Setup | |
| progress(0.2, desc="🔧 Setting up MCP tools...") | |
| logger.log_action("TOOL INITIALIZATION", "Connecting to: PDF Reader, AI Writer, ElevenLabs TTS") | |
| yield logger.get_log(), None | |
| python_exe = sys.executable | |
| client = MultiServerMCPClient({ | |
| "pdf-reader": { | |
| "transport": "stdio", | |
| "command": python_exe, | |
| "args": [os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader", "src", "server.py")] | |
| }, | |
| "ai-writer": { | |
| "transport": "stdio", | |
| "command": python_exe, | |
| "args": [os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop", "mcp_server", "server.py")] | |
| }, | |
| "ElevenLabs": { | |
| "transport": "stdio", | |
| "command": python_exe, | |
| "args": [os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp", "elevenlabs_mcp", "server.py")], | |
| "env": {"ELEVENLABS_API_KEY": ELEVENLABS_API_KEY} | |
| } | |
| }) | |
| all_tools = [] | |
| seen = set() | |
| for server_name in ["pdf-reader", "ai-writer", "ElevenLabs"]: | |
| async with client.session(server_name): | |
| tools = await client.get_tools() | |
| for t in tools: | |
| if t.name not in seen: | |
| all_tools.append(t) | |
| seen.add(t.name) | |
| logger.log_result(f"Loaded {len(all_tools)} tools: {', '.join([t.name for t in all_tools])}") | |
| yield logger.get_log(), None | |
| # Phase 3: Autonomous Execution | |
| progress(0.3, desc="🤖 Agent executing plan autonomously...") | |
| system_instruction = f""" | |
| You are an autonomous teaching agent. Be concise in your responses. | |
| CONTEXT: | |
| - Student: {age}-year-old {gender} | |
| - Topic: "{topic}" | |
| - PDF Path: {pdf_temp_path} | |
| - Audio Output Directory: {output_dir} | |
| YOUR WORKFLOW: | |
| 1. Read PDF and extract relevant content about the topic | |
| 2. Write an age-appropriate story teaching key concepts | |
| 3. Generate audio with output_directory: "{output_dir}" | |
| Execute autonomously. Provide brief status updates only when starting a new major step. | |
| """ | |
| agent = create_agent(model=llm, tools=all_tools) | |
| agent_input = { | |
| "messages": [ | |
| {"role": "system", "content": system_instruction}, | |
| { | |
| "role": "user", | |
| "content": f"Execute the plan. Give brief updates for each major step." | |
| } | |
| ] | |
| } | |
| logger.log_phase("EXECUTION", "Agent is working autonomously...") | |
| yield logger.get_log(), None | |
| progress(0.5, desc="📖 Processing content...") | |
| try: | |
| result = await agent.ainvoke(agent_input) | |
| # Extract clean summary from result | |
| result_text = str(result) | |
| # Try to extract key information | |
| if 'output' in result: | |
| summary = result.get('output', 'Execution completed') | |
| else: | |
| # Extract just the essential info | |
| summary_match = re.search(r'(Story.*?generated|Audio.*?created|File saved.*?\.mp3)', result_text, re.IGNORECASE | re.DOTALL) | |
| summary = summary_match.group(0) if summary_match else "Task completed successfully" | |
| if len(summary) > 200: | |
| summary = summary[:200] + "..." | |
| logger.log_phase("EXECUTION COMPLETE", summary) | |
| progress(0.9, desc="🎵 Finalizing audio generation...") | |
| yield logger.get_log(), None | |
| # Look for audio file | |
| audio_path = None | |
| if output_dir and os.path.exists(output_dir): | |
| mp3_files = [f for f in os.listdir(output_dir) if f.endswith('.mp3')] | |
| if mp3_files: | |
| audio_path = os.path.join(output_dir, mp3_files[0]) | |
| logger.log_result(f"Audio generated: {mp3_files[0]}") | |
| # Check result for file paths | |
| if not audio_path and "File saved as:" in result_text: | |
| match = re.search(r'File saved as:\s*([^\s]+\.mp3)', result_text) | |
| if match: | |
| file_path = match.group(1) | |
| if os.path.exists(file_path): | |
| audio_path = file_path | |
| logger.log_result(f"Audio file: {os.path.basename(file_path)}") | |
| if not audio_path: | |
| logger.log_result("⚠️ Audio generation completed but file location uncertain") | |
| progress(1.0, desc="✅ Complete!") | |
| yield logger.get_log(), audio_path | |
| except Exception as e: | |
| logger.log_phase("ERROR", str(e)) | |
| yield logger.get_log(), None | |
| def gradio_handler(age, gender, topic, pdf_file, progress=gr.Progress()): | |
| if not pdf_file: | |
| return "❌ Please upload a PDF.", None | |
| temp_dir = tempfile.mkdtemp() | |
| pdf_path = os.path.join(temp_dir, "lecture.pdf") | |
| shutil.copy(pdf_file, pdf_path) | |
| try: | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| try: | |
| # Use async generator to get updates | |
| generator = run_agent_with_reasoning(age, gender, topic, pdf_path, progress) | |
| final_log = None | |
| final_audio = None | |
| # Run through all updates | |
| async def run_generator(): | |
| nonlocal final_log, final_audio | |
| async for log, audio in generator: | |
| final_log = log | |
| final_audio = audio | |
| loop.run_until_complete(run_generator()) | |
| return final_log, final_audio | |
| finally: | |
| loop.close() | |
| except Exception as e: | |
| import traceback | |
| return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None | |
| finally: | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| <h1 style='text-align:center;'>LOTUS</h1> | |
| <p style='text-align:center; font-size:18px;'> | |
| "Lecture Overwritten To Unique Story"<br> | |
| </p> | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Student Configuration") | |
| age = gr.Number(label="Student Age", value=12, minimum=5, maximum=18) | |
| gender = gr.Radio(["male", "female"], value="female", label="Student Gender") | |
| topic = gr.Textbox(label="Topic / Concept", placeholder="e.g., Introduction to chemical reactions...") | |
| pdf_input = gr.File(label="Upload Lecture PDF", file_types=[".pdf"]) | |
| generate_btn = gr.Button("Start Autonomous Agent", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Agent Reasoning & Execution Log") | |
| output_text = gr.Textbox( | |
| label="Autonomous Agent Process", | |
| lines=20, | |
| max_lines=25 | |
| ) | |
| with gr.Row(): | |
| audio_out = gr.Audio(label="🎵 Generated The Audio Story") | |
| generate_btn.click( | |
| fn=gradio_handler, | |
| inputs=[age, gender, topic, pdf_input], | |
| outputs=[output_text, audio_out] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |