Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import base64 | |
| import json | |
| import logging | |
| import os | |
| from pathlib import Path | |
| from typing import Any | |
| import dotenv | |
| import gradio as gr | |
| import requests | |
| from gradio.components.chatbot import ( | |
| FileDataDict, | |
| FileMessageDict, | |
| NormalizedMessageContent, | |
| NormalizedMessageDict, | |
| TextMessageDict, | |
| ) | |
| from gradio.components.multimodal_textbox import MultimodalValue | |
| logger = logging.getLogger() | |
| API_URL = "https://openrouter.ai/api/v1/chat/completions" | |
| MODEL = "google/gemini-2.5-flash-lite-preview-09-2025" | |
| SYSTEM_PROMPT = Path("system-prompt.md").read_text().strip() | |
| AUDIO_FORMATS = {"wav", "mp3", "m4a", "flac"} | |
| def chat_fn(user_msg: MultimodalValue, history: list[NormalizedMessageDict], api_key: str | None) -> str: | |
| logger.info(f"History (oldest first):\n{json.dumps(history[::-1], indent=2)}") | |
| logger.info(f"User message:\n{json.dumps(user_msg, indent=2)}") | |
| # Determine API key | |
| if api_key is None or len(api_key) == 0: | |
| return "Boh!" | |
| if api_key == os.environ["PASSWORD"]: | |
| api_key = os.environ["OPENROUTER_API_KEY"] | |
| # Build message history including system prompt, conversation history, and current user message | |
| user_content: list[NormalizedMessageContent] = [] | |
| if "text" in user_msg and len(text := user_msg["text"].strip()) > 0: | |
| user_content.append(TextMessageDict(type="text", text=text)) | |
| for path in user_msg.get("files", []): | |
| user_content.append(FileMessageDict(type="file", file=FileDataDict(path=path))) | |
| user_msg_dict = NormalizedMessageDict(role="user", content=user_content) | |
| history = [ | |
| NormalizedMessageDict(role="system", content=[TextMessageDict(type="text", text=SYSTEM_PROMPT)]), | |
| *history, | |
| user_msg_dict, | |
| ] | |
| # Call the model API | |
| payload = { | |
| "model": MODEL, | |
| "messages": history_to_messages(history), | |
| "max_tokens": 4096, | |
| "temperature": 0.2, | |
| } | |
| headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json", | |
| } | |
| response = requests.post(API_URL, headers=headers, json=payload, timeout=30) | |
| logger.info(f"Response:\n{json.dumps(response.json(), indent=2)}") | |
| response.raise_for_status() | |
| msg = response.json()["choices"][0]["message"]["content"].strip() | |
| return msg | |
| def history_to_messages(history: list[NormalizedMessageDict]) -> list[dict[str, Any]]: | |
| """ | |
| Transform content entries to openrouter format. | |
| From: | |
| { | |
| "type": "file", | |
| "file": {"path": "file.wav"} | |
| } | |
| To: | |
| { | |
| "type": "input_audio", | |
| "input_audio": { | |
| "data": "<base64-encoded-audio>", | |
| "format": "wav", | |
| } | |
| } | |
| """ | |
| def transform_content(content: NormalizedMessageContent) -> dict[str, Any]: | |
| if content["type"] == "file": | |
| path = Path(content["file"]["path"]) | |
| suffix = path.suffix.lstrip(".").lower() | |
| if suffix not in AUDIO_FORMATS: | |
| raise ValueError(f"Unsupported file format: {suffix}") | |
| return { | |
| "type": "input_audio", | |
| "input_audio": { | |
| "data": file_to_base64(path), | |
| "format": suffix, | |
| }, | |
| } | |
| return content # pyright: ignore[reportReturnType] | |
| return [ | |
| { | |
| "role": item["role"], | |
| "content": [transform_content(c) for c in item["content"]], | |
| } | |
| for item in history | |
| ] | |
| def file_to_base64(path: str | Path) -> str: | |
| with open(path, "rb") as f: | |
| return base64.b64encode(f.read()).decode("utf-8") | |
| # Set up logging | |
| logging.basicConfig(level="INFO", format="%(asctime)s %(levelname)s: %(message)s") | |
| logging.captureWarnings(True) | |
| # Load environment variables from .env file | |
| dotenv.load_dotenv() | |
| # Chat (top-level demo variable to allow live reloading) | |
| demo = gr.ChatInterface( | |
| chat_fn, | |
| multimodal=True, | |
| chatbot=gr.Chatbot(placeholder="Ready!"), | |
| textbox=gr.MultimodalTextbox( | |
| placeholder="Your message", | |
| file_count="single", | |
| file_types=["audio"], | |
| sources=["microphone"], | |
| ), | |
| additional_inputs=[ | |
| gr.Textbox(type="password", label="Openrouter API Key"), | |
| ], | |
| additional_inputs_accordion=gr.Accordion("Options", open=True), | |
| title="Mamma AI", | |
| description="Parla con la mamma più saggia del mondo! Puoi inviare messaggi di testo o audio.\n\nPrima di usare la chat, inserici un'API key di [Openrouter](https://openrouter.ai/) (oppure la password segreta).", | |
| autofocus=True, | |
| examples=[ | |
| ["È meglio lavare i piatti a mano o in lavastoviglie?", None], | |
| ["Aiuto! Ho sporcato la camicia di vino!", None], | |
| ], | |
| ) | |
| demo.launch(server_name="0.0.0.0") | |