Spaces:

baldassarre
/

mamma-ai

Sleeping

File size: 4,850 Bytes

4dace6a
 
 
 
 
 
 
f900124
4dace6a
 
e14e91c
4dace6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e14e91c
4dace6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e14e91c
 
4dace6a
 
 
 
 
 
 
 
 
 
 
 
 
 
e14e91c
4dace6a
 
 
 
 
 
 
e14e91c
 
4dace6a
 
 
e14e91c
 
4dace6a
 
 
e14e91c
4dace6a
 
e14e91c
4dace6a
 
 
 
 
 
 
 
 
 
 
e14e91c
4dace6a
 
 
 
f900124
4dace6a
 
 
 
e14e91c
 
4dace6a

from __future__ import annotations

import base64
import json
import logging
import os
from pathlib import Path
from typing import Any

import dotenv
import gradio as gr
import requests
from gradio.components.chatbot import (
    FileDataDict,
    FileMessageDict,
    NormalizedMessageContent,
    NormalizedMessageDict,
    TextMessageDict,
)
from gradio.components.multimodal_textbox import MultimodalValue

logger = logging.getLogger()
API_URL = "https://openrouter.ai/api/v1/chat/completions"
MODEL = "google/gemini-2.5-flash-lite-preview-09-2025"
SYSTEM_PROMPT = Path("system-prompt.md").read_text().strip()
AUDIO_FORMATS = {"wav", "mp3", "m4a", "flac"}


def chat_fn(user_msg: MultimodalValue, history: list[NormalizedMessageDict], api_key: str | None) -> str:
    logger.info(f"History (oldest first):\n{json.dumps(history[::-1], indent=2)}")
    logger.info(f"User message:\n{json.dumps(user_msg, indent=2)}")

    # Determine API key
    if api_key is None or len(api_key) == 0:
        return "Boh!"
    if api_key == os.environ["PASSWORD"]:
        api_key = os.environ["OPENROUTER_API_KEY"]

    # Build message history including system prompt, conversation history, and current user message
    user_content: list[NormalizedMessageContent] = []
    if "text" in user_msg and len(text := user_msg["text"].strip()) > 0:
        user_content.append(TextMessageDict(type="text", text=text))
    for path in user_msg.get("files", []):
        user_content.append(FileMessageDict(type="file", file=FileDataDict(path=path)))
    user_msg_dict = NormalizedMessageDict(role="user", content=user_content)
    history = [
        NormalizedMessageDict(role="system", content=[TextMessageDict(type="text", text=SYSTEM_PROMPT)]),
        *history,
        user_msg_dict,
    ]

    # Call the model API
    payload = {
        "model": MODEL,
        "messages": history_to_messages(history),
        "max_tokens": 4096,
        "temperature": 0.2,
    }
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }
    response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
    logger.info(f"Response:\n{json.dumps(response.json(), indent=2)}")
    response.raise_for_status()
    msg = response.json()["choices"][0]["message"]["content"].strip()

    return msg


def history_to_messages(history: list[NormalizedMessageDict]) -> list[dict[str, Any]]:
    """
    Transform content entries to openrouter format.

    From:

        {
            "type": "file",
            "file": {"path": "file.wav"}
        }

    To:

        {
            "type": "input_audio",
            "input_audio": {
                "data": "<base64-encoded-audio>",
                "format": "wav",
            }
        }
    """

    def transform_content(content: NormalizedMessageContent) -> dict[str, Any]:
        if content["type"] == "file":
            path = Path(content["file"]["path"])
            suffix = path.suffix.lstrip(".").lower()
            if suffix not in AUDIO_FORMATS:
                raise ValueError(f"Unsupported file format: {suffix}")
            return {
                "type": "input_audio",
                "input_audio": {
                    "data": file_to_base64(path),
                    "format": suffix,
                },
            }
        return content  # pyright: ignore[reportReturnType]

    return [
        {
            "role": item["role"],
            "content": [transform_content(c) for c in item["content"]],
        }
        for item in history
    ]


def file_to_base64(path: str | Path) -> str:
    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


# Set up logging
logging.basicConfig(level="INFO", format="%(asctime)s %(levelname)s: %(message)s")
logging.captureWarnings(True)

# Load environment variables from .env file
dotenv.load_dotenv()

# Chat (top-level demo variable to allow live reloading)
demo = gr.ChatInterface(
    chat_fn,
    multimodal=True,
    chatbot=gr.Chatbot(placeholder="Ready!"),
    textbox=gr.MultimodalTextbox(
        placeholder="Your message",
        file_count="single",
        file_types=["audio"],
        sources=["microphone"],
    ),
    additional_inputs=[
        gr.Textbox(type="password", label="Openrouter API Key"),
    ],
    additional_inputs_accordion=gr.Accordion("Options", open=True),
    title="Mamma AI",
    description="Parla con la mamma più saggia del mondo! Puoi inviare messaggi di testo o audio.\n\nPrima di usare la chat, inserici un'API key di [Openrouter](https://openrouter.ai/) (oppure la password segreta).",
    autofocus=True,
    examples=[
        ["È meglio lavare i piatti a mano o in lavastoviglie?", None],
        ["Aiuto! Ho sporcato la camicia di vino!", None],
    ],
)
demo.launch(server_name="0.0.0.0")