Spaces:
Running
Running
File size: 12,990 Bytes
dc7d0df caa6e5b d5e5c85 caa6e5b 3ddcfc3 caa6e5b d633719 3ddcfc3 ac373a0 d5e5c85 caa6e5b dc7d0df b0f36b0 d5e5c85 b0f36b0 7dd3916 38fa382 7dd3916 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 e1fff8c 3ddcfc3 e1fff8c 3ddcfc3 e1fff8c 3ddcfc3 e1fff8c 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 e1fff8c 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 bd8e399 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d6669e6 b0f36b0 caa6e5b b0f36b0 d5e5c85 caa6e5b b0f36b0 d5e5c85 caa6e5b b0f36b0 d5e5c85 caa6e5b d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 caa6e5b 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 d633719 3ddcfc3 caa6e5b fc3767e 3ddcfc3 caa6e5b d5e5c85 caa6e5b d5e5c85 3ddcfc3 d5e5c85 b4cbd7a d5e5c85 f603017 caa6e5b d5e5c85 caa6e5b 2ccef2a caa6e5b 2cfbc51 caa6e5b 2cfbc51 caa6e5b d633719 e1fff8c d633719 1af8470 d633719 e1fff8c d633719 e1fff8c d633719 3ddcfc3 d633719 fb7ebd7 caa6e5b d5e5c85 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 |
import gradio as gr
import asyncio
import os
import sys
from langchain_mcp_adapters.client import MultiServerMCPClient
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import create_agent
import tempfile
import shutil
from datetime import datetime
import re
# Get API keys from Hugging Face Secrets
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
if not GEMINI_API_KEY or not ELEVENLABS_API_KEY:
raise ValueError("API keys must be set in Hugging Face Secrets")
os.environ["ELEVENLABS_API_KEY"] = ELEVENLABS_API_KEY
# Get the base directory
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# Add MCP server paths to Python path for module imports
sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp"))
sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader"))
sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop"))
class ReasoningLogger:
"""Captures and formats the agent's reasoning process with clean output"""
def __init__(self):
self.logs = []
self.current_phase = None
def log_phase(self, phase, content):
"""Log a major phase with enhanced formatting"""
timestamp = datetime.now().strftime("%H:%M:%S")
# Clean and format content
cleaned_content = self._clean_content(content)
separator = "─" * 80
formatted = f"\n{separator}\n {timestamp} | {phase}\n{separator}\n{cleaned_content}\n"
self.logs.append(formatted)
self.current_phase = phase
def log_action(self, action, details):
"""Log an action with clean formatting"""
timestamp = datetime.now().strftime("%H:%M:%S")
cleaned_details = self._clean_content(details)
formatted = f"\n {timestamp} | {action}\n{cleaned_details}\n"
self.logs.append(formatted)
def log_result(self, result):
"""Log a result with success formatting"""
timestamp = datetime.now().strftime("%H:%M:%S")
cleaned_result = self._clean_content(result)
formatted = f"\n {timestamp} | {cleaned_result}\n"
self.logs.append(formatted)
def log_step(self, step_num, description):
"""Log a numbered step"""
formatted = f" └─ Step {step_num}: {description}\n"
self.logs.append(formatted)
def _clean_content(self, content):
"""Clean and format content for better readability"""
if not content:
return ""
# Convert to string
content = str(content)
# Remove excessive whitespace
content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
# Remove technical noise patterns
noise_patterns = [
r'messages=\[.*?\]',
r'content=\'.*?\'(?=\s|$)',
r'ToolMessage\(.*?\)',
r'additional_kwargs=\{.*?\}',
r'response_metadata=\{.*?\}',
r'id=\'.*?\'',
r'usage_metadata=\{.*?\}',
]
for pattern in noise_patterns:
content = re.sub(pattern, '', content, flags=re.DOTALL)
# Clean up result formatting
if 'AgentFinish' in content:
# Extract only the meaningful output
match = re.search(r'output[\'"]:\s*[\'"](.+?)[\'"]', content, re.DOTALL)
if match:
content = match.group(1)
# Format tool calls nicely
content = re.sub(r'name=\'(\w+)\'', r'\n Tool: \1', content)
content = re.sub(r'args=\{([^}]+)\}', lambda m: f'\n Parameters: {self._format_args(m.group(1))}', content)
# Truncate very long outputs
lines = content.split('\n')
if len(lines) > 30:
content = '\n'.join(lines[:25]) + f'\n\n... ({len(lines) - 25} more lines) ...\n'
return content.strip()
def _format_args(self, args_str):
"""Format tool arguments nicely"""
# Simplify argument display
args_str = args_str.replace('\'', '').replace('"', '')
if len(args_str) > 100:
return args_str[:100] + '...'
return args_str
def get_log(self):
"""Return formatted log output"""
header = """
"""
return header + "".join(self.logs)
async def run_agent_with_reasoning(age: int, gender: str, topic: str, pdf_temp_path: str, progress=gr.Progress()):
logger = ReasoningLogger()
output_dir = tempfile.mkdtemp()
# Phase 1: Planning
progress(0.1, desc="Agent is analyzing task and creating plan...")
planning_prompt = f"""
You are an autonomous teaching agent. Analyze this task and create a concise plan:
TASK: Create an engaging audio story for a {age}-year-old {gender} student about "{topic}" based on a lecture PDF.
Provide a brief, numbered plan (4-5 steps maximum) without excessive detail.
"""
# Initialize LLM for planning
llm = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
google_api_key=GEMINI_API_KEY,
temperature=0.7
)
try:
planning_response = await llm.ainvoke(planning_prompt)
plan_text = planning_response.content if hasattr(planning_response, 'content') else str(planning_response)
# Extract only the plan steps
plan_lines = [line for line in plan_text.split('\n') if line.strip() and (line.strip()[0].isdigit() or line.strip().startswith('-'))]
clean_plan = '\n'.join(plan_lines[:5]) # Limit to 5 steps
logger.log_phase("PLANNING", clean_plan)
yield logger.get_log(), None
except Exception as e:
logger.log_phase("PLANNING ERROR", str(e))
yield logger.get_log(), None
return
# Phase 2: Tool Setup
progress(0.2, desc="🔧 Setting up MCP tools...")
logger.log_action("TOOL INITIALIZATION", "Connecting to: PDF Reader, AI Writer, ElevenLabs TTS")
yield logger.get_log(), None
python_exe = sys.executable
client = MultiServerMCPClient({
"pdf-reader": {
"transport": "stdio",
"command": python_exe,
"args": [os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader", "src", "server.py")]
},
"ai-writer": {
"transport": "stdio",
"command": python_exe,
"args": [os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop", "mcp_server", "server.py")]
},
"ElevenLabs": {
"transport": "stdio",
"command": python_exe,
"args": [os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp", "elevenlabs_mcp", "server.py")],
"env": {"ELEVENLABS_API_KEY": ELEVENLABS_API_KEY}
}
})
all_tools = []
seen = set()
for server_name in ["pdf-reader", "ai-writer", "ElevenLabs"]:
async with client.session(server_name):
tools = await client.get_tools()
for t in tools:
if t.name not in seen:
all_tools.append(t)
seen.add(t.name)
logger.log_result(f"Loaded {len(all_tools)} tools: {', '.join([t.name for t in all_tools])}")
yield logger.get_log(), None
# Phase 3: Autonomous Execution
progress(0.3, desc="🤖 Agent executing plan autonomously...")
system_instruction = f"""
You are an autonomous teaching agent. Be concise in your responses.
CONTEXT:
- Student: {age}-year-old {gender}
- Topic: "{topic}"
- PDF Path: {pdf_temp_path}
- Audio Output Directory: {output_dir}
YOUR WORKFLOW:
1. Read PDF and extract relevant content about the topic
2. Write an age-appropriate story teaching key concepts
3. Generate audio with output_directory: "{output_dir}"
Execute autonomously. Provide brief status updates only when starting a new major step.
"""
agent = create_agent(model=llm, tools=all_tools)
agent_input = {
"messages": [
{"role": "system", "content": system_instruction},
{
"role": "user",
"content": f"Execute the plan. Give brief updates for each major step."
}
]
}
logger.log_phase("EXECUTION", "Agent is working autonomously...")
yield logger.get_log(), None
progress(0.5, desc="📖 Processing content...")
try:
result = await agent.ainvoke(agent_input)
# Extract clean summary from result
result_text = str(result)
# Try to extract key information
if 'output' in result:
summary = result.get('output', 'Execution completed')
else:
# Extract just the essential info
summary_match = re.search(r'(Story.*?generated|Audio.*?created|File saved.*?\.mp3)', result_text, re.IGNORECASE | re.DOTALL)
summary = summary_match.group(0) if summary_match else "Task completed successfully"
if len(summary) > 200:
summary = summary[:200] + "..."
logger.log_phase("EXECUTION COMPLETE", summary)
progress(0.9, desc="🎵 Finalizing audio generation...")
yield logger.get_log(), None
# Look for audio file
audio_path = None
if output_dir and os.path.exists(output_dir):
mp3_files = [f for f in os.listdir(output_dir) if f.endswith('.mp3')]
if mp3_files:
audio_path = os.path.join(output_dir, mp3_files[0])
logger.log_result(f"Audio generated: {mp3_files[0]}")
# Check result for file paths
if not audio_path and "File saved as:" in result_text:
match = re.search(r'File saved as:\s*([^\s]+\.mp3)', result_text)
if match:
file_path = match.group(1)
if os.path.exists(file_path):
audio_path = file_path
logger.log_result(f"Audio file: {os.path.basename(file_path)}")
if not audio_path:
logger.log_result("⚠️ Audio generation completed but file location uncertain")
progress(1.0, desc="✅ Complete!")
yield logger.get_log(), audio_path
except Exception as e:
logger.log_phase("ERROR", str(e))
yield logger.get_log(), None
def gradio_handler(age, gender, topic, pdf_file, progress=gr.Progress()):
if not pdf_file:
return "❌ Please upload a PDF.", None
temp_dir = tempfile.mkdtemp()
pdf_path = os.path.join(temp_dir, "lecture.pdf")
shutil.copy(pdf_file, pdf_path)
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
# Use async generator to get updates
generator = run_agent_with_reasoning(age, gender, topic, pdf_path, progress)
final_log = None
final_audio = None
# Run through all updates
async def run_generator():
nonlocal final_log, final_audio
async for log, audio in generator:
final_log = log
final_audio = audio
loop.run_until_complete(run_generator())
return final_log, final_audio
finally:
loop.close()
except Exception as e:
import traceback
return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
with gr.Blocks() as demo:
gr.Markdown(
"""
<h1 style='text-align:center;'>LOTUS</h1>
<p style='text-align:center; font-size:18px;'>
"Lecture Overwritten To Unique Story"<br>
</p>
"""
)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Student Configuration")
age = gr.Number(label="Student Age", value=12, minimum=5, maximum=18)
gender = gr.Radio(["male", "female"], value="female", label="Student Gender")
topic = gr.Textbox(label="Topic / Concept", placeholder="e.g., Introduction to chemical reactions...")
pdf_input = gr.File(label="Upload Lecture PDF", file_types=[".pdf"])
generate_btn = gr.Button("Start Autonomous Agent", variant="primary", size="lg")
with gr.Column(scale=2):
gr.Markdown("### Agent Reasoning & Execution Log")
output_text = gr.Textbox(
label="Autonomous Agent Process",
lines=20,
max_lines=25
)
with gr.Row():
audio_out = gr.Audio(label="🎵 Generated The Audio Story")
generate_btn.click(
fn=gradio_handler,
inputs=[age, gender, topic, pdf_input],
outputs=[output_text, audio_out]
)
if __name__ == "__main__":
demo.launch() |