import gradio as gr import asyncio import os import sys from langchain_mcp_adapters.client import MultiServerMCPClient from langchain_google_genai import ChatGoogleGenerativeAI from langchain.agents import create_agent import tempfile import shutil from datetime import datetime import re # Get API keys from Hugging Face Secrets GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY") if not GEMINI_API_KEY or not ELEVENLABS_API_KEY: raise ValueError("API keys must be set in Hugging Face Secrets") os.environ["ELEVENLABS_API_KEY"] = ELEVENLABS_API_KEY # Get the base directory BASE_DIR = os.path.dirname(os.path.abspath(__file__)) # Add MCP server paths to Python path for module imports sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp")) sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader")) sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop")) class ReasoningLogger: """Captures and formats the agent's reasoning process with clean output""" def __init__(self): self.logs = [] self.current_phase = None def log_phase(self, phase, content): """Log a major phase with enhanced formatting""" timestamp = datetime.now().strftime("%H:%M:%S") # Clean and format content cleaned_content = self._clean_content(content) separator = "─" * 80 formatted = f"\n{separator}\n {timestamp} | {phase}\n{separator}\n{cleaned_content}\n" self.logs.append(formatted) self.current_phase = phase def log_action(self, action, details): """Log an action with clean formatting""" timestamp = datetime.now().strftime("%H:%M:%S") cleaned_details = self._clean_content(details) formatted = f"\n {timestamp} | {action}\n{cleaned_details}\n" self.logs.append(formatted) def log_result(self, result): """Log a result with success formatting""" timestamp = datetime.now().strftime("%H:%M:%S") cleaned_result = self._clean_content(result) formatted = f"\n {timestamp} | {cleaned_result}\n" self.logs.append(formatted) def log_step(self, step_num, description): """Log a numbered step""" formatted = f" └─ Step {step_num}: {description}\n" self.logs.append(formatted) def _clean_content(self, content): """Clean and format content for better readability""" if not content: return "" # Convert to string content = str(content) # Remove excessive whitespace content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content) # Remove technical noise patterns noise_patterns = [ r'messages=\[.*?\]', r'content=\'.*?\'(?=\s|$)', r'ToolMessage\(.*?\)', r'additional_kwargs=\{.*?\}', r'response_metadata=\{.*?\}', r'id=\'.*?\'', r'usage_metadata=\{.*?\}', ] for pattern in noise_patterns: content = re.sub(pattern, '', content, flags=re.DOTALL) # Clean up result formatting if 'AgentFinish' in content: # Extract only the meaningful output match = re.search(r'output[\'"]:\s*[\'"](.+?)[\'"]', content, re.DOTALL) if match: content = match.group(1) # Format tool calls nicely content = re.sub(r'name=\'(\w+)\'', r'\n Tool: \1', content) content = re.sub(r'args=\{([^}]+)\}', lambda m: f'\n Parameters: {self._format_args(m.group(1))}', content) # Truncate very long outputs lines = content.split('\n') if len(lines) > 30: content = '\n'.join(lines[:25]) + f'\n\n... ({len(lines) - 25} more lines) ...\n' return content.strip() def _format_args(self, args_str): """Format tool arguments nicely""" # Simplify argument display args_str = args_str.replace('\'', '').replace('"', '') if len(args_str) > 100: return args_str[:100] + '...' return args_str def get_log(self): """Return formatted log output""" header = """ """ return header + "".join(self.logs) async def run_agent_with_reasoning(age: int, gender: str, topic: str, pdf_temp_path: str, progress=gr.Progress()): logger = ReasoningLogger() output_dir = tempfile.mkdtemp() # Phase 1: Planning progress(0.1, desc="Agent is analyzing task and creating plan...") planning_prompt = f""" You are an autonomous teaching agent. Analyze this task and create a concise plan: TASK: Create an engaging audio story for a {age}-year-old {gender} student about "{topic}" based on a lecture PDF. Provide a brief, numbered plan (4-5 steps maximum) without excessive detail. """ # Initialize LLM for planning llm = ChatGoogleGenerativeAI( model="gemini-2.5-flash", google_api_key=GEMINI_API_KEY, temperature=0.7 ) try: planning_response = await llm.ainvoke(planning_prompt) plan_text = planning_response.content if hasattr(planning_response, 'content') else str(planning_response) # Extract only the plan steps plan_lines = [line for line in plan_text.split('\n') if line.strip() and (line.strip()[0].isdigit() or line.strip().startswith('-'))] clean_plan = '\n'.join(plan_lines[:5]) # Limit to 5 steps logger.log_phase("PLANNING", clean_plan) yield logger.get_log(), None except Exception as e: logger.log_phase("PLANNING ERROR", str(e)) yield logger.get_log(), None return # Phase 2: Tool Setup progress(0.2, desc="🔧 Setting up MCP tools...") logger.log_action("TOOL INITIALIZATION", "Connecting to: PDF Reader, AI Writer, ElevenLabs TTS") yield logger.get_log(), None python_exe = sys.executable client = MultiServerMCPClient({ "pdf-reader": { "transport": "stdio", "command": python_exe, "args": [os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader", "src", "server.py")] }, "ai-writer": { "transport": "stdio", "command": python_exe, "args": [os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop", "mcp_server", "server.py")] }, "ElevenLabs": { "transport": "stdio", "command": python_exe, "args": [os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp", "elevenlabs_mcp", "server.py")], "env": {"ELEVENLABS_API_KEY": ELEVENLABS_API_KEY} } }) all_tools = [] seen = set() for server_name in ["pdf-reader", "ai-writer", "ElevenLabs"]: async with client.session(server_name): tools = await client.get_tools() for t in tools: if t.name not in seen: all_tools.append(t) seen.add(t.name) logger.log_result(f"Loaded {len(all_tools)} tools: {', '.join([t.name for t in all_tools])}") yield logger.get_log(), None # Phase 3: Autonomous Execution progress(0.3, desc="🤖 Agent executing plan autonomously...") system_instruction = f""" You are an autonomous teaching agent. Be concise in your responses. CONTEXT: - Student: {age}-year-old {gender} - Topic: "{topic}" - PDF Path: {pdf_temp_path} - Audio Output Directory: {output_dir} YOUR WORKFLOW: 1. Read PDF and extract relevant content about the topic 2. Write an age-appropriate story teaching key concepts 3. Generate audio with output_directory: "{output_dir}" Execute autonomously. Provide brief status updates only when starting a new major step. """ agent = create_agent(model=llm, tools=all_tools) agent_input = { "messages": [ {"role": "system", "content": system_instruction}, { "role": "user", "content": f"Execute the plan. Give brief updates for each major step." } ] } logger.log_phase("EXECUTION", "Agent is working autonomously...") yield logger.get_log(), None progress(0.5, desc="📖 Processing content...") try: result = await agent.ainvoke(agent_input) # Extract clean summary from result result_text = str(result) # Try to extract key information if 'output' in result: summary = result.get('output', 'Execution completed') else: # Extract just the essential info summary_match = re.search(r'(Story.*?generated|Audio.*?created|File saved.*?\.mp3)', result_text, re.IGNORECASE | re.DOTALL) summary = summary_match.group(0) if summary_match else "Task completed successfully" if len(summary) > 200: summary = summary[:200] + "..." logger.log_phase("EXECUTION COMPLETE", summary) progress(0.9, desc="🎵 Finalizing audio generation...") yield logger.get_log(), None # Look for audio file audio_path = None if output_dir and os.path.exists(output_dir): mp3_files = [f for f in os.listdir(output_dir) if f.endswith('.mp3')] if mp3_files: audio_path = os.path.join(output_dir, mp3_files[0]) logger.log_result(f"Audio generated: {mp3_files[0]}") # Check result for file paths if not audio_path and "File saved as:" in result_text: match = re.search(r'File saved as:\s*([^\s]+\.mp3)', result_text) if match: file_path = match.group(1) if os.path.exists(file_path): audio_path = file_path logger.log_result(f"Audio file: {os.path.basename(file_path)}") if not audio_path: logger.log_result("⚠️ Audio generation completed but file location uncertain") progress(1.0, desc="✅ Complete!") yield logger.get_log(), audio_path except Exception as e: logger.log_phase("ERROR", str(e)) yield logger.get_log(), None def gradio_handler(age, gender, topic, pdf_file, progress=gr.Progress()): if not pdf_file: return "❌ Please upload a PDF.", None temp_dir = tempfile.mkdtemp() pdf_path = os.path.join(temp_dir, "lecture.pdf") shutil.copy(pdf_file, pdf_path) try: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: # Use async generator to get updates generator = run_agent_with_reasoning(age, gender, topic, pdf_path, progress) final_log = None final_audio = None # Run through all updates async def run_generator(): nonlocal final_log, final_audio async for log, audio in generator: final_log = log final_audio = audio loop.run_until_complete(run_generator()) return final_log, final_audio finally: loop.close() except Exception as e: import traceback return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None finally: shutil.rmtree(temp_dir, ignore_errors=True) with gr.Blocks() as demo: gr.Markdown( """
"Lecture Overwritten To Unique Story"