Spaces:

MCP-1st-Birthday
/

MudabbirAI

Running

File size: 8,348 Bytes

# mcp_servers.py (Corrected for GOOGLE_API_KEY)
import asyncio
import os
import httpx
import json
import google.generativeai as genai
import anthropic
import openai
from personas import PERSONAS_DATA

# --- 1. Load API Keys from Blaxel Secrets ---
# --- THIS IS THE FIX ---
GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY") # Use the secret name from your screenshot
# ---
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
SAMBANOVA_API_KEY = os.getenv("SAMBANOVA_API_KEY")
SAMBANOVA_BASE_URL = os.getenv("SAMBANOVA_BASE_URL", "https://api.sambanova.ai/v1")

# --- 2. Configure API Clients ---
genai.configure(api_key=GEMINI_API_KEY)
gemini_model = genai.GenerativeModel('gemini-1.5-pro-latest')
anthropic_client = anthropic.AsyncAnthropic(api_key=ANTHROPIC_API_KEY)

sambanova_client = openai.AsyncOpenAI(
    api_key=SAMBANOVA_API_KEY,
    base_url=SAMBANOVA_BASE_URL
)

# This is the prompt from your 'LLM judges prompt v3.0.docx'
EVALUATION_PROMPT_TEMPLATE = """
You are an impartial and objective AI evaluator specializing in assessing business solutions.
Your task is to critically analyze a proposed solution to a given business problem.
You will evaluate the solution across five specific dimensions: Novelty, Usefulness/Feasibility, Flexibility, Elaboration, and Cultural Appropriateness/Sensitivity.

**Evaluation Criteria:**
Assign a score from 1 to 5 for each criterion (1=Very Low, 5=Very High).
You MUST provide a brief, specific justification (1-3 sentences) for each score.

**Definitions:**
1.  **Novelty:** How original, unexpected, or non-obvious is the solution?
2.  **Usefulness/Feasibility:** Is the solution practical, implementable, and likely to be effective?
3.  **Flexibility:** Does the solution offer diverse approaches or adaptable ideas?
4.  **Elaboration:** Is the solution well-explained, clear, and sufficiently detailed?
5.  **Cultural Appropriateness/Sensitivity:** How well does the solution consider and align with potential cultural factors?

**Business Problem:**
{problem}

**Proposed Solution:**
{solution_text}

**Output Format:**
You MUST return *only* a valid JSON object in the following format:
{
  "Novelty": {"score": <score_int>, "justification": "<justification_str>"},
  "Usefulness_Feasibility": {"score": <score_int>, "justification": "<justification_str>"},
  "Flexibility": {"score": <score_int>, "justification": "<justification_str>"},
  "Elaboration": {"score": <score_int>, "justification": "<justification_str>"},
  "Cultural_Appropriateness": {"score": <score_int>, "justification": "<justification_str>"}
}
"""

class BusinessSolutionEvaluator:
    """Implements the "LLM-as-a-Judge" with a live call to Gemini."""
    
    async def evaluate(self, problem: str, solution_text: str) -> dict:
        print(f"Evaluating solution (live): {solution_text[:50]}...")
        
        prompt = EVALUATION_PROMPT_TEMPLATE.format(problem=problem, solution_text=solution_text)
        
        try:
            response = await gemini_model.generate_content_async(
                prompt,
                generation_config=genai.types.GenerationConfig(
                    response_mime_type="application/json"
                )
            )
            
            json_text = response.text.strip().replace("```json", "").replace("```", "")
            v_fitness = json.loads(json_text)
            
            print(f"Evaluation complete (live): {v_fitness}")
            return v_fitness
        except Exception as e:
            print(f"ERROR: BusinessSolutionEvaluator failed: {e}")
            return {
                "Novelty": {"score": 1, "justification": "Error during evaluation."},
                "Usefulness_Feasibility": {"score": 1, "justification": "Error during evaluation."},
                "Flexibility": {"score": 1, "justification": "Error during evaluation."},
                "Elaboration": {"score": 1, "justification": "Error during evaluation."},
                "Cultural_Appropriateness": {"score": 1, "justification": "Error during evaluation."}
            }

# --- 3. Unified API Call Function ---
async def get_llm_response(client_name: str, system_prompt: str, user_prompt: str) -> str:
    """A single function to handle calling any of the three sponsor LLMs."""
    try:
        if client_name == "Gemini":
            chat = gemini_model.start_chat(history=[
                {'role': 'user', 'parts': [system_prompt]},
                {'role': 'model', 'parts': ["Understood. I will act as this persona."]}
            ])
            response = await chat.send_message_async(user_prompt)
            return response.text

        elif client_name == "Anthropic":
            response = await anthropic_client.messages.create(
                model="claude-3-opus-20240229",
                max_tokens=2048,
                system=system_prompt,
                messages=[{"role": "user", "content": user_prompt}]
            )
            return response.content[0].text
        
        elif client_name == "SambaNova":
            completion = await sambanova_client.chat.completions.create(
                model="Meta-Llama-3.1-8B-Instruct",
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ]
            )
            return completion.choices[0].message.content
            
    except Exception as e:
        print(f"ERROR: API call to {client_name} failed: {e}")
        return f"Error generating response from {client_name}."


class AgentCalibrator:
    """Tests the sponsor LLMs with live API calls."""
    
    def __init__(self, evaluator: BusinessSolutionEvaluator):
        self.evaluator = evaluator
        self.sponsor_llms = ["Gemini", "Anthropic", "SambaNova"]
        
    async def calibrate_team(self, problem: str) -> dict:
        print("Running LIVE calibration test for specialist team...")
        
        roles_to_test = {
            "Plant": PERSONAS_DATA["Culture_5"]["description"],
            "Implementer": PERSONAS_DATA["Culture_Expert"]["description"],
            "Monitor": PERSONAS_DATA["Culture_11"]["description"]
        }
        
        test_problem = f"For the business problem '{problem}', generate a single, brief, one-paragraph concept-level solution."
        
        tasks = []
        for role, persona in roles_to_test.items():
            for llm in self.sponsor_llms:
                tasks.append(self.run_calibration_test(problem, role, llm, persona, test_problem))
        
        results = await asyncio.gather(*tasks)
        
        best_llms = {}
        role_metrics = {
            "Plant": "Novelty",
            "Implementer": "Usefulness_Feasibility",
            "Monitor": "Cultural_Appropriateness"
        }
        
        for role in roles_to_test.keys():
            best_score = -1
            best_llm = "None"
            for res in results:
                if res["role"] == role:
                    metric = role_metrics[role]
                    score = res.get("score", {}).get(metric, {}).get("score", 0)
                    if score > best_score:
                        best_score = score
                        best_llm = res["llm"]
            best_llms[role] = best_llm
            
        team_plan = {
            "Plant": {"persona": "Culture_5", "llm": best_llms["Plant"]},
            "Implementer": {"persona": "Culture_Expert", "llm": best_llms["Implementer"]},
            "Monitor": {"persona": "Culture_11", "llm": best_llms["Monitor"]}
        }
        
        print(f"Calibration complete (live). Team plan: {team_plan}")
        return team_plan

    async def run_calibration_test(self, problem, role, llm, persona, test_problem):
        """Helper to run a single test and evaluation."""
        print(f"...Calibrating {role} on {llm}...")
        solution = await get_llm_response(llm, persona, test_problem)
        if "Error generating response" in solution:
            return {"role": role, "llM": llm, "score": {
                "Novelty": {"score": 0}, 
                "Usefulness_Feasibility": {"score": 0}, 
                "Cultural_Appropriateness": {"score": 0}
            }}
        
        score = await self.evaluator.evaluate(problem, solution)
        return {"role": role, "llm": llm, "score": score}