mamma-ai / app.py
baldassarre's picture
Instructions
f900124
from __future__ import annotations
import base64
import json
import logging
import os
from pathlib import Path
from typing import Any
import dotenv
import gradio as gr
import requests
from gradio.components.chatbot import (
FileDataDict,
FileMessageDict,
NormalizedMessageContent,
NormalizedMessageDict,
TextMessageDict,
)
from gradio.components.multimodal_textbox import MultimodalValue
logger = logging.getLogger()
API_URL = "https://openrouter.ai/api/v1/chat/completions"
MODEL = "google/gemini-2.5-flash-lite-preview-09-2025"
SYSTEM_PROMPT = Path("system-prompt.md").read_text().strip()
AUDIO_FORMATS = {"wav", "mp3", "m4a", "flac"}
def chat_fn(user_msg: MultimodalValue, history: list[NormalizedMessageDict], api_key: str | None) -> str:
logger.info(f"History (oldest first):\n{json.dumps(history[::-1], indent=2)}")
logger.info(f"User message:\n{json.dumps(user_msg, indent=2)}")
# Determine API key
if api_key is None or len(api_key) == 0:
return "Boh!"
if api_key == os.environ["PASSWORD"]:
api_key = os.environ["OPENROUTER_API_KEY"]
# Build message history including system prompt, conversation history, and current user message
user_content: list[NormalizedMessageContent] = []
if "text" in user_msg and len(text := user_msg["text"].strip()) > 0:
user_content.append(TextMessageDict(type="text", text=text))
for path in user_msg.get("files", []):
user_content.append(FileMessageDict(type="file", file=FileDataDict(path=path)))
user_msg_dict = NormalizedMessageDict(role="user", content=user_content)
history = [
NormalizedMessageDict(role="system", content=[TextMessageDict(type="text", text=SYSTEM_PROMPT)]),
*history,
user_msg_dict,
]
# Call the model API
payload = {
"model": MODEL,
"messages": history_to_messages(history),
"max_tokens": 4096,
"temperature": 0.2,
}
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
logger.info(f"Response:\n{json.dumps(response.json(), indent=2)}")
response.raise_for_status()
msg = response.json()["choices"][0]["message"]["content"].strip()
return msg
def history_to_messages(history: list[NormalizedMessageDict]) -> list[dict[str, Any]]:
"""
Transform content entries to openrouter format.
From:
{
"type": "file",
"file": {"path": "file.wav"}
}
To:
{
"type": "input_audio",
"input_audio": {
"data": "<base64-encoded-audio>",
"format": "wav",
}
}
"""
def transform_content(content: NormalizedMessageContent) -> dict[str, Any]:
if content["type"] == "file":
path = Path(content["file"]["path"])
suffix = path.suffix.lstrip(".").lower()
if suffix not in AUDIO_FORMATS:
raise ValueError(f"Unsupported file format: {suffix}")
return {
"type": "input_audio",
"input_audio": {
"data": file_to_base64(path),
"format": suffix,
},
}
return content # pyright: ignore[reportReturnType]
return [
{
"role": item["role"],
"content": [transform_content(c) for c in item["content"]],
}
for item in history
]
def file_to_base64(path: str | Path) -> str:
with open(path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
# Set up logging
logging.basicConfig(level="INFO", format="%(asctime)s %(levelname)s: %(message)s")
logging.captureWarnings(True)
# Load environment variables from .env file
dotenv.load_dotenv()
# Chat (top-level demo variable to allow live reloading)
demo = gr.ChatInterface(
chat_fn,
multimodal=True,
chatbot=gr.Chatbot(placeholder="Ready!"),
textbox=gr.MultimodalTextbox(
placeholder="Your message",
file_count="single",
file_types=["audio"],
sources=["microphone"],
),
additional_inputs=[
gr.Textbox(type="password", label="Openrouter API Key"),
],
additional_inputs_accordion=gr.Accordion("Options", open=True),
title="Mamma AI",
description="Parla con la mamma più saggia del mondo! Puoi inviare messaggi di testo o audio.\n\nPrima di usare la chat, inserici un'API key di [Openrouter](https://openrouter.ai/) (oppure la password segreta).",
autofocus=True,
examples=[
["È meglio lavare i piatti a mano o in lavastoviglie?", None],
["Aiuto! Ho sporcato la camicia di vino!", None],
],
)
demo.launch(server_name="0.0.0.0")