Spaces:

MCP-1st-Birthday
/

GameSmith

Running

File size: 9,054 Bytes

import gradio as gr
import os
import base64
from io import BytesIO
from PIL import Image
import tempfile
from tools import generate_pixel_character, animate_pixel_character, extract_sprite_frames

# --- Helper Functions for Gradio Logic ---

def process_generate_sprite(prompt, ref_img):
    """
    Generates a static 2D sprite character based on a text description in any art style.
    
    Args:
        prompt: Description of the character and style (e.g., "A cute cat wizard, cartoon style" or "anime cat hero").
        ref_img: Optional reference image to influence style.
        
    Returns:
        The generated sprite image and its base64 encoding.
    """
    try:
        ref_b64 = None
        if ref_img is not None:
            # Convert numpy array or PIL image to base64
            if isinstance(ref_img, str): # path
                with open(ref_img, "rb") as f:
                    ref_b64 = base64.b64encode(f.read()).decode('utf-8')
            elif hasattr(ref_img, "save"): # PIL Image
                buffered = BytesIO()
                ref_img.save(buffered, format="PNG")
                ref_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
        
        b64_result = generate_pixel_character(prompt, ref_b64)
        
        # Convert back to PIL for display
        img_data = base64.b64decode(b64_result)
        return Image.open(BytesIO(img_data)), b64_result
    except Exception as e:
        raise gr.Error(str(e))

def process_animate_sprite(sprite_img, animation_type, extra_prompt):
    """
    Animates a static 2D sprite using Google's Veo model.
    
    Args:
        sprite_img: The input static sprite image.
        animation_type: Type of animation - one of "idle", "walk", "run", "jump".
        extra_prompt: Optional additional instructions for the motion.
        
    Returns:
        The generated animation video path and its base64 encoding.
    """
    try:
        if sprite_img is None:
            raise ValueError("Please provide a sprite image first.")
            
        # Convert input image to base64
        sprite_b64 = None
        if isinstance(sprite_img, str): # path provided by Gradio example or upload
             with open(sprite_img, "rb") as f:
                sprite_b64 = base64.b64encode(f.read()).decode('utf-8')
        elif hasattr(sprite_img, "save"): # PIL Image
            buffered = BytesIO()
            sprite_img.save(buffered, format="PNG")
            sprite_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
        elif isinstance(sprite_img, tuple): # Sometimes Gradio returns (path, meta)
             # Handle other formats if necessary
             pass
        
        # If sprite_b64 is still None (e.g. numpy array), try to convert
        if sprite_b64 is None:
             # Assuming numpy array -> PIL -> Base64
             im = Image.fromarray(sprite_img)
             buffered = BytesIO()
             im.save(buffered, format="PNG")
             sprite_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')

        video_b64 = animate_pixel_character(sprite_b64, animation_type, extra_prompt)
        
        # Save to temp file for Gradio to display
        video_bytes = base64.b64decode(video_b64)
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
            f.write(video_bytes)
            video_path = f.name
            
        return video_path, video_b64
    except Exception as e:
        raise gr.Error(str(e))

def process_extract_frames(video_file, fps):
    """
    Extracts frames from an MP4 video animation and returns them as individual images and a ZIP file.
    
    Args:
        video_file: The input MP4 video file path.
        fps: Frames per second to extract (default 8).
        
    Returns:
        A gallery of extracted frames and a ZIP file containing all frames.
    """
    try:
        if video_file is None:
            raise ValueError("Please upload a video file.")
            
        # Read video file to base64
        with open(video_file, "rb") as f:
            video_b64 = base64.b64encode(f.read()).decode('utf-8')
            
        zip_b64, frames_b64 = extract_sprite_frames(video_b64, fps)
        
        # Save zip to temp file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as f:
            f.write(base64.b64decode(zip_b64))
            zip_path = f.name
            
        # Convert frames to gallery format (list of paths or PIL images)
        gallery_images = []
        for fb64 in frames_b64:
            img_data = base64.b64decode(fb64)
            gallery_images.append(Image.open(BytesIO(img_data)))
            
        return gallery_images, zip_path
    except Exception as e:
        raise gr.Error(str(e))


# --- Gradio UI Layout ---

with gr.Blocks(title="GameSmith AI - Game Asset Studio") as demo:
    gr.Markdown(
        """
        # 🎮 GameSmith AI
        ### The Intelligent Game Asset Studio
        
        Generate, animate, and export 2D game sprites in any art style using Google Gemini & Veo.
        *Built for the Hugging Face MCP 1st Birthday Hackathon.*
        """
    )
    
    with gr.Tab("1. Generate Sprite"):
        with gr.Row():
            with gr.Column():
                prompt_input = gr.Textbox(
                    label="Character Description",
                    placeholder="A cute cat wearing a wizard hat, side view... (cartoon, anime, pixel art, etc.)",
                    lines=3
                )
                ref_input = gr.Image(label="Style Reference (Optional)", type="pil")
                gen_btn = gr.Button("Generate Sprite", variant="primary")
            
            with gr.Column():
                result_image = gr.Image(label="Generated Sprite", type="pil", interactive=False)
                # Hidden state to pass base64 to next tab if needed
                sprite_b64_state = gr.State()
        
        gen_btn.click(
            process_generate_sprite,
            inputs=[prompt_input, ref_input],
            outputs=[result_image, sprite_b64_state],
            api_name="generate_pixel_character"
        )

    with gr.Tab("2. Animate"):
        with gr.Row():
            with gr.Column():
                # Allow user to use generated image or upload new
                anim_input_image = gr.Image(label="Input Sprite", type="pil")
                anim_type = gr.Dropdown(
                    choices=["idle", "walk", "run", "jump"],
                    value="idle",
                    label="Animation Type"
                )
                extra_anim_prompt = gr.Textbox(
                    label="Motion Tweaks (Optional)",
                    placeholder="Make it bounce more..."
                )
                anim_btn = gr.Button("Animate", variant="primary")
            
            with gr.Column():
                result_video = gr.Video(label="Generated Animation", interactive=False)
                video_b64_state = gr.State()
        
        # Link previous tab result to this input
        result_image.change(
            lambda x: x,
            inputs=[result_image],
            outputs=[anim_input_image]
        )
        
        anim_btn.click(
            process_animate_sprite,
            inputs=[anim_input_image, anim_type, extra_anim_prompt],
            outputs=[result_video, video_b64_state],
            api_name="animate_pixel_character"
        )

    with gr.Tab("3. Extract Frames"):
        with gr.Row():
            with gr.Column():
                # Allow user to use generated video or upload new
                extract_input_video = gr.Video(label="Input Animation")
                fps_slider = gr.Slider(minimum=4, maximum=24, value=8, step=1, label="FPS")
                extract_btn = gr.Button("Extract Frames", variant="primary")
            
            with gr.Column():
                frames_gallery = gr.Gallery(label="Sprite Sheet Frames")
                download_zip = gr.File(label="Download Sprite Sheet (ZIP)")
        
        # Link previous tab result
        result_video.change(
            lambda x: x,
            inputs=[result_video],
            outputs=[extract_input_video]
        )
        
        extract_btn.click(
            process_extract_frames,
            inputs=[extract_input_video, fps_slider],
            outputs=[frames_gallery, download_zip],
            api_name="extract_sprite_frames"
        )

    gr.Markdown("---")
    gr.Markdown("### 🤖 Model Context Protocol (MCP)")
    gr.Markdown(
        """
        This app doubles as an MCP Server! Connect it to Claude or Cursor to generate assets directly in your chat.
        
        **Tools Exposed:**
        - `generate_pixel_character(prompt)`
        - `animate_pixel_character(sprite_b64, animation_type)`
        - `extract_sprite_frames(video_b64)`
        """
    )

if __name__ == "__main__":
    # Launch the app
    # MCP server is auto-enabled via GRADIO_MCP_SERVER env var or newer Gradio versions
    demo.launch()