import gradio as gr
import asyncio
import os
import sys
from langchain_mcp_adapters.client import MultiServerMCPClient
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import create_agent
import tempfile
import shutil
from datetime import datetime
import re

# Get API keys from Hugging Face Secrets
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")

if not GEMINI_API_KEY or not ELEVENLABS_API_KEY:
    raise ValueError("API keys must be set in Hugging Face Secrets")

os.environ["ELEVENLABS_API_KEY"] = ELEVENLABS_API_KEY

# Get the base directory
BASE_DIR = os.path.dirname(os.path.abspath(__file__))

# Add MCP server paths to Python path for module imports
sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp"))
sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader"))
sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop"))

class ReasoningLogger:
    """Captures and formats the agent's reasoning process with clean output"""
    def __init__(self):
        self.logs = []
        self.current_phase = None
        
    def log_phase(self, phase, content):
        """Log a major phase with enhanced formatting"""
        timestamp = datetime.now().strftime("%H:%M:%S")
        
        # Clean and format content
        cleaned_content = self._clean_content(content)
        
        separator = "─" * 80
        formatted = f"\n{separator}\n {timestamp} |  {phase}\n{separator}\n{cleaned_content}\n"
        self.logs.append(formatted)
        self.current_phase = phase
        
    def log_action(self, action, details):
        """Log an action with clean formatting"""
        timestamp = datetime.now().strftime("%H:%M:%S")
        cleaned_details = self._clean_content(details)
        formatted = f"\n {timestamp} |  {action}\n{cleaned_details}\n"
        self.logs.append(formatted)
        
    def log_result(self, result):
        """Log a result with success formatting"""
        timestamp = datetime.now().strftime("%H:%M:%S")
        cleaned_result = self._clean_content(result)
        formatted = f"\n {timestamp} |  {cleaned_result}\n"
        self.logs.append(formatted)
        
    def log_step(self, step_num, description):
        """Log a numbered step"""
        formatted = f"  └─ Step {step_num}: {description}\n"
        self.logs.append(formatted)
    
    def _clean_content(self, content):
        """Clean and format content for better readability"""
        if not content:
            return ""
        
        # Convert to string
        content = str(content)
        
        # Remove excessive whitespace
        content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
        
        # Remove technical noise patterns
        noise_patterns = [
            r'messages=\[.*?\]',
            r'content=\'.*?\'(?=\s|$)',
            r'ToolMessage\(.*?\)',
            r'additional_kwargs=\{.*?\}',
            r'response_metadata=\{.*?\}',
            r'id=\'.*?\'',
            r'usage_metadata=\{.*?\}',
        ]
        
        for pattern in noise_patterns:
            content = re.sub(pattern, '', content, flags=re.DOTALL)
        
        # Clean up result formatting
        if 'AgentFinish' in content:
            # Extract only the meaningful output
            match = re.search(r'output[\'"]:\s*[\'"](.+?)[\'"]', content, re.DOTALL)
            if match:
                content = match.group(1)
        
        # Format tool calls nicely
        content = re.sub(r'name=\'(\w+)\'', r'\n   Tool: \1', content)
        content = re.sub(r'args=\{([^}]+)\}', lambda m: f'\n   Parameters: {self._format_args(m.group(1))}', content)
        
        # Truncate very long outputs
        lines = content.split('\n')
        if len(lines) > 30:
            content = '\n'.join(lines[:25]) + f'\n\n... ({len(lines) - 25} more lines) ...\n'
        
        return content.strip()
    
    def _format_args(self, args_str):
        """Format tool arguments nicely"""
        # Simplify argument display
        args_str = args_str.replace('\'', '').replace('"', '')
        if len(args_str) > 100:
            return args_str[:100] + '...'
        return args_str
        
    def get_log(self):
        """Return formatted log output"""
        header = """
"""
        return header + "".join(self.logs)

async def run_agent_with_reasoning(age: int, gender: str, topic: str, pdf_temp_path: str, progress=gr.Progress()):
    logger = ReasoningLogger()
    output_dir = tempfile.mkdtemp()
    
    # Phase 1: Planning
    progress(0.1, desc="Agent is analyzing task and creating plan...")
    
    planning_prompt = f"""
You are an autonomous teaching agent. Analyze this task and create a concise plan:

TASK: Create an engaging audio story for a {age}-year-old {gender} student about "{topic}" based on a lecture PDF.

Provide a brief, numbered plan (4-5 steps maximum) without excessive detail.
"""

    # Initialize LLM for planning
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.5-flash",
        google_api_key=GEMINI_API_KEY,
        temperature=0.7
    )
    
    try:
        planning_response = await llm.ainvoke(planning_prompt)
        plan_text = planning_response.content if hasattr(planning_response, 'content') else str(planning_response)
        
        # Extract only the plan steps
        plan_lines = [line for line in plan_text.split('\n') if line.strip() and (line.strip()[0].isdigit() or line.strip().startswith('-'))]
        clean_plan = '\n'.join(plan_lines[:5])  # Limit to 5 steps
        
        logger.log_phase("PLANNING", clean_plan)
        yield logger.get_log(), None
    except Exception as e:
        logger.log_phase("PLANNING ERROR", str(e))
        yield logger.get_log(), None
        return
    
    # Phase 2: Tool Setup
    progress(0.2, desc="🔧 Setting up MCP tools...")
    logger.log_action("TOOL INITIALIZATION", "Connecting to: PDF Reader, AI Writer, ElevenLabs TTS")
    yield logger.get_log(), None
    
    python_exe = sys.executable
    
    client = MultiServerMCPClient({
        "pdf-reader": {
            "transport": "stdio",
            "command": python_exe,
            "args": [os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader", "src", "server.py")]
        },
        "ai-writer": {
            "transport": "stdio",
            "command": python_exe,
            "args": [os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop", "mcp_server", "server.py")]
        },
        "ElevenLabs": {
            "transport": "stdio",
            "command": python_exe,
            "args": [os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp", "elevenlabs_mcp", "server.py")],
            "env": {"ELEVENLABS_API_KEY": ELEVENLABS_API_KEY}
        }
    })

    all_tools = []
    seen = set()
    for server_name in ["pdf-reader", "ai-writer", "ElevenLabs"]:
        async with client.session(server_name):
            tools = await client.get_tools()
            for t in tools:
                if t.name not in seen:
                    all_tools.append(t)
                    seen.add(t.name)
    
    logger.log_result(f"Loaded {len(all_tools)} tools: {', '.join([t.name for t in all_tools])}")
    yield logger.get_log(), None
    
    # Phase 3: Autonomous Execution
    progress(0.3, desc="🤖 Agent executing plan autonomously...")
    
    system_instruction = f"""
You are an autonomous teaching agent. Be concise in your responses.

CONTEXT:
- Student: {age}-year-old {gender}
- Topic: "{topic}"
- PDF Path: {pdf_temp_path}
- Audio Output Directory: {output_dir}

YOUR WORKFLOW:
1. Read PDF and extract relevant content about the topic
2. Write an age-appropriate story teaching key concepts
3. Generate audio with output_directory: "{output_dir}"

Execute autonomously. Provide brief status updates only when starting a new major step.
"""

    agent = create_agent(model=llm, tools=all_tools)

    agent_input = {
        "messages": [
            {"role": "system", "content": system_instruction},
            {
                "role": "user",
                "content": f"Execute the plan. Give brief updates for each major step."
            }
        ]
    }

    logger.log_phase("EXECUTION", "Agent is working autonomously...")
    yield logger.get_log(), None
    
    progress(0.5, desc="📖 Processing content...")
    
    try:
        result = await agent.ainvoke(agent_input)
        
        # Extract clean summary from result
        result_text = str(result)
        
        # Try to extract key information
        if 'output' in result:
            summary = result.get('output', 'Execution completed')
        else:
            # Extract just the essential info
            summary_match = re.search(r'(Story.*?generated|Audio.*?created|File saved.*?\.mp3)', result_text, re.IGNORECASE | re.DOTALL)
            summary = summary_match.group(0) if summary_match else "Task completed successfully"
            if len(summary) > 200:
                summary = summary[:200] + "..."
        
        logger.log_phase("EXECUTION COMPLETE", summary)
        
        progress(0.9, desc="🎵 Finalizing audio generation...")
        yield logger.get_log(), None
        
        # Look for audio file
        audio_path = None
        if output_dir and os.path.exists(output_dir):
            mp3_files = [f for f in os.listdir(output_dir) if f.endswith('.mp3')]
            if mp3_files:
                audio_path = os.path.join(output_dir, mp3_files[0])
                logger.log_result(f"Audio generated: {mp3_files[0]}")
        
        # Check result for file paths
        if not audio_path and "File saved as:" in result_text:
            match = re.search(r'File saved as:\s*([^\s]+\.mp3)', result_text)
            if match:
                file_path = match.group(1)
                if os.path.exists(file_path):
                    audio_path = file_path
                    logger.log_result(f"Audio file: {os.path.basename(file_path)}")
        
        if not audio_path:
            logger.log_result("⚠️ Audio generation completed but file location uncertain")
            
        progress(1.0, desc="✅ Complete!")
        yield logger.get_log(), audio_path
        
    except Exception as e:
        logger.log_phase("ERROR", str(e))
        yield logger.get_log(), None


def gradio_handler(age, gender, topic, pdf_file, progress=gr.Progress()):
    if not pdf_file:
        return "❌ Please upload a PDF.", None

    temp_dir = tempfile.mkdtemp()
    pdf_path = os.path.join(temp_dir, "lecture.pdf")
    shutil.copy(pdf_file, pdf_path)

    try:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        try:
            # Use async generator to get updates
            generator = run_agent_with_reasoning(age, gender, topic, pdf_path, progress)
            final_log = None
            final_audio = None
            
            # Run through all updates
            async def run_generator():
                nonlocal final_log, final_audio
                async for log, audio in generator:
                    final_log = log
                    final_audio = audio
            
            loop.run_until_complete(run_generator())
            
            return final_log, final_audio
            
        finally:
            loop.close()
        
    except Exception as e:
        import traceback
        return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None
    finally:
        shutil.rmtree(temp_dir, ignore_errors=True)


with gr.Blocks() as demo:
    gr.Markdown(
        """
        <h1 style='text-align:center;'>LOTUS</h1>

        <p style='text-align:center; font-size:18px;'>
       "Lecture Overwritten To Unique Story"<br>
        </p>
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Student Configuration")
            age = gr.Number(label="Student Age", value=12, minimum=5, maximum=18)
            gender = gr.Radio(["male", "female"], value="female", label="Student Gender")
            topic = gr.Textbox(label="Topic / Concept", placeholder="e.g., Introduction to chemical reactions...")
            pdf_input = gr.File(label="Upload Lecture PDF", file_types=[".pdf"])
            generate_btn = gr.Button("Start Autonomous Agent", variant="primary", size="lg")
        
        with gr.Column(scale=2):
            gr.Markdown("### Agent Reasoning & Execution Log")
            output_text = gr.Textbox(
                label="Autonomous Agent Process", 
                lines=20,
                max_lines=25
            )
    
    with gr.Row():
        audio_out = gr.Audio(label="🎵 Generated The Audio Story")

    generate_btn.click(
        fn=gradio_handler,
        inputs=[age, gender, topic, pdf_input],
        outputs=[output_text, audio_out]
    )

if __name__ == "__main__":
    demo.launch()