Spaces:

celinah
/

text-to-image-to-video

Paused

App Files Files Community

celinah HF Staff commited on Jun 2

Commit

389b237

1 Parent(s): 1f0c608

video

Browse files

Files changed (2) hide show

app.py +210 -9
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import tempfile
 import gradio as gr
 import numpy as np
 import requests
@@ -9,6 +10,9 @@ from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 load_dotenv()
 MAX_SEED = np.iinfo(np.int32).max
@@ -39,14 +43,50 @@ def download_image_locally(image_url: str, local_path: str = "downloaded_image.p
     return local_path
-def login(oauth_token: gr.OAuthToken | None):
-    global TOKEN
     if oauth_token and oauth_token.token:
-        print("Received OAuth token, logging in...")
         TOKEN = oauth_token.token
     else:
-        print("No OAuth token provided, using environment variable HF_TOKEN.")
-        TOKEN = os.environ.get("HF_TOKEN")
 def generate(prompt: str, seed: int = 42, width: int = 1024, height: int = 1024, num_inference_steps: int = 25):
@@ -78,6 +118,79 @@ def generate(prompt: str, seed: int = 42, width: int = 1024, height: int = 1024,
     return image, seed
 examples = [
     "a tiny astronaut hatching from an egg on the moon",
     "a cat holding a sign that says hello world",
@@ -98,8 +211,14 @@ with gr.Blocks(css=css) as demo:
         gr.Markdown(
             "This Space showcases the black‑forest‑labs/FLUX.1‑dev model, served by the nebius API. Sign in with your Hugging Face account to use this API."
         )
-        button = gr.LoginButton("Sign in")
-        button.click(fn=login, inputs=[], outputs=[])
     with gr.Column(elem_id="col-container"):
         gr.Markdown(
             """# FLUX.1 [schnell] with fal‑ai through HF Inference Providers ⚡\nLearn more about HF Inference Providers [here](https://huggingface.co/docs/inference-providers/index)"""
@@ -115,9 +234,9 @@ with gr.Blocks(css=css) as demo:
             )
             run_button = gr.Button("Run", scale=0)
-        result = gr.Image(label="Result", show_label=False, format="png")
         download_btn = gr.DownloadButton(
-            label="Download result",
             visible=False,
             value=None,
             variant="primary",
@@ -164,12 +283,94 @@ with gr.Blocks(css=css) as demo:
             cache_examples="lazy",
         )
         run_button.click(
             fn=generate,
             inputs=[prompt, seed_slider, width_slider, height_slider, steps_slider],
             outputs=[result, seed_number],
         )
         with gr.Accordion("Download Image from URL", open=False):
             image_url_input = gr.Text(label="Image URL", placeholder="Enter image URL (e.g., http://.../image.png)")
             filename_input = gr.Text(

 import os
 import tempfile
+import fal_client
 import gradio as gr
 import numpy as np
 import requests
 from huggingface_hub import InferenceClient
+FAL_KEY = os.environ.get("FAL_KEY")  # Load FAL_KEY
 load_dotenv()
 MAX_SEED = np.iinfo(np.int32).max
     return local_path
+def login(oauth_token: gr.OAuthToken | None, fal_key_from_ui: str | None):
+    """
+    Login to Hugging Face and FAL.
+    Args:
+        oauth_token (gr.OAuthToken | None): The OAuth token from Hugging Face.
+        fal_key_from_ui (str | None): The FAL key from the UI.
+    """
+    global TOKEN, FAL_KEY
     if oauth_token and oauth_token.token:
+        print("Received OAuth token, logging in for Hugging Face...")
         TOKEN = oauth_token.token
     else:
+        env_hf_token = os.environ.get("HF_TOKEN")
+        if env_hf_token:
+            TOKEN = env_hf_token
+            print("Using environment variable HF_TOKEN for Hugging Face.")
+        else:
+            print("No Hugging Face OAuth token received and HF_TOKEN environment variable not set.")
+    if fal_key_from_ui and fal_key_from_ui.strip():
+        FAL_KEY = fal_key_from_ui.strip()
+    elif os.environ.get("FAL_KEY"):
+        if FAL_KEY == os.environ.get("FAL_KEY"):
+            print("Using FAL_KEY from environment variable.")
+        else:
+            FAL_KEY = os.environ.get("FAL_KEY")
+            print("Using FAL_KEY from environment variable (UI input was blank).")
+            gr.Info("FAL_KEY has been set from environment variable.")
+    else:
+        print("FAL_KEY not provided in UI or environment.")
+        FAL_KEY = None
+    if not TOKEN:
+        gr.Warning("Hugging Face token not set. Image generation via HF Inference Providers might fail.")
+    else:
+        gr.Info("Hugging Face token is configured.")
+    if not FAL_KEY:
+        gr.Warning("FAL_KEY not set. Video generation will not work.")
+    else:
+        gr.Info("FAL_KEY is configured.")
 def generate(prompt: str, seed: int = 42, width: int = 1024, height: int = 1024, num_inference_steps: int = 25):
     return image, seed
+def generate_video_from_image(
+    image_filepath: str,  # This will be the path to the image from gr.Image output
+    video_prompt: str,
+    duration: str,  # "5" or "10"
+    aspect_ratio: str,  # "16:9", "9:16", "1:1"
+    video_negative_prompt: str,
+    cfg_scale_video: float,
+    progress=gr.Progress(track_tqdm=True),
+):
+    """
+    Generates a video from an image using fal-ai/kling-video API.
+    """
+    if not FAL_KEY:
+        gr.Error("FAL_KEY is not set. Cannot generate video.")
+        return None
+    if not image_filepath:
+        gr.Warning("No image provided to generate video from.")
+        return None
+    if not os.path.exists(image_filepath):
+        gr.Error(f"Image file not found at: {image_filepath}")
+        return None
+    print(f"Video generation started for image: {image_filepath}")
+    progress(0, desc="Preparing for video generation...")
+    try:
+        progress(0.1, desc="Uploading image...")
+        print("Uploading image to fal.ai storage...")
+        image_url = fal_client.upload_file(image_filepath)
+        print(f"Image uploaded, URL: {image_url}")
+        progress(0.3, desc="Image uploaded. Submitting video request...")
+        def on_queue_update(update):
+            if isinstance(update, fal_client.InProgress):
+                if update.logs:
+                    for log in update.logs:
+                        print(f"[fal-ai log] {log['message']}")
+                        # Try to update progress description if logs are available
+                        # progress(progress.current_progress_value, desc=f"Video processing: {log['message'][:50]}...")
+        print("Subscribing to fal-ai/kling-video/v2.1/master/image-to-video...")
+        api_result = fal_client.subscribe(
+            "fal-ai/kling-video/v2.1/master/image-to-video",
+            arguments={
+                "prompt": video_prompt,
+                "image_url": image_url,
+                "duration": duration,
+                "aspect_ratio": aspect_ratio,
+                "negative_prompt": video_negative_prompt,
+                "cfg_scale": cfg_scale_video,
+            },
+            with_logs=True,  # Get logs
+            on_queue_update=on_queue_update,  # Callback for logs
+        )
+        progress(0.9, desc="Video processing complete.")
+        video_output_url = api_result.get("video", {}).get("url")
+        if video_output_url:
+            print(f"Video generated successfully: {video_output_url}")
+            progress(1, desc="Video ready!")
+            return video_output_url
+        else:
+            print(f"Video generation failed or no URL in response. API Result: {api_result}")
+            gr.Error("Video generation failed or no video URL returned.")
+            return None
+    except Exception as e:
+        print(f"Error during video generation: {e}")
+        gr.Error(f"An error occurred: {str(e)}")
+        return None
 examples = [
     "a tiny astronaut hatching from an egg on the moon",
     "a cat holding a sign that says hello world",
         gr.Markdown(
             "This Space showcases the black‑forest‑labs/FLUX.1‑dev model, served by the nebius API. Sign in with your Hugging Face account to use this API."
         )
+        hf_login_button = gr.LoginButton("Sign in")
+        fal_key_input = gr.Textbox(
+            label="FAL_KEY",
+            placeholder="Enter your FAL API Key here",
+            type="password",
+            value=FAL_KEY if FAL_KEY else "",  # Pre-fill if loaded from env
+        )
+        hf_login_button.click(fn=login, inputs=[hf_login_button, fal_key_input], outputs=None)
     with gr.Column(elem_id="col-container"):
         gr.Markdown(
             """# FLUX.1 [schnell] with fal‑ai through HF Inference Providers ⚡\nLearn more about HF Inference Providers [here](https://huggingface.co/docs/inference-providers/index)"""
             )
             run_button = gr.Button("Run", scale=0)
+        result = gr.Image(label="Generated Image", show_label=False, format="png", type="filepath")
         download_btn = gr.DownloadButton(
+            label="Download result image",
             visible=False,
             value=None,
             variant="primary",
             cache_examples="lazy",
         )
+        def update_image_outputs(image_pil, seed_val):
+            return {
+                result: image_pil,
+                seed_number: seed_val,
+                download_btn: gr.DownloadButton(value=image_pil, visible=True)
+                if image_pil
+                else gr.DownloadButton(visible=False),
+            }
         run_button.click(
             fn=generate,
             inputs=[prompt, seed_slider, width_slider, height_slider, steps_slider],
             outputs=[result, seed_number],
+        ).then(
+            lambda img_path, vid_accordion, vid_btn: {  # Make video section interactive
+                vid_accordion: gr.Accordion(open=True, interactive=True),
+                vid_btn: gr.Button(interactive=True),
+            },
+            inputs=[result],
+            outputs=[],
+        )
+        video_result_output = gr.Video(label="Generated Video", show_label=False)
+        with gr.Accordion("Video Generation from Image", open=False, interactive=False) as video_gen_accordion:
+            video_prompt_input = gr.Text(
+                label="Prompt for Video",
+                placeholder="Describe the animation or changes for the video (e.g., 'camera zooms out slowly')",
+                value="A gentle breeze rustles the leaves, subtle camera movement.",  # Default prompt
+            )
+            with gr.Row():
+                video_duration_input = gr.Dropdown(label="Duration (seconds)", choices=["5", "10"], value="5")
+                video_aspect_ratio_input = gr.Dropdown(
+                    label="Aspect Ratio",
+                    choices=["16:9", "9:16", "1:1"],
+                    value="16:9",  # Default from API
+                )
+            video_negative_prompt_input = gr.Text(
+                label="Negative Prompt for Video",
+                value="blur, distort, low quality",  # Default from API
+            )
+            video_cfg_scale_input = gr.Slider(
+                label="CFG Scale for Video",
+                minimum=0.0,
+                maximum=10.0,
+                value=0.5,
+                step=0.1,  # Default from API (0.5 seems low, API docs mention it, let's check if it's a typo or specific to this model)
+            )
+            generate_video_btn = gr.Button("Generate Video", interactive=False)
+        # Update the run_button.click().then() to target these video components
+        # We need to define them first, so I'm moving the .then() part of run_button here.
+        # This is a bit tricky with Gradio's sequential definition. Let's re-organize slightly.
+        # The previous run_button.click had a .then() that needs video_gen_accordion and generate_video_btn
+        # We'll chain it properly after these are defined.
+        generate_video_btn.click(
+            fn=generate_video_from_image,
+            inputs=[
+                result,  # This is the gr.Image component, its output (filepath) will be passed
+                video_prompt_input,
+                video_duration_input,
+                video_aspect_ratio_input,
+                video_negative_prompt_input,
+                video_cfg_scale_input,
+            ],
+            outputs=[video_result_output],
         )
+        # Now, correctly chain the .then() for the image generation button
+        run_button.click(
+            fn=generate,
+            inputs=[prompt, seed_slider, width_slider, height_slider, steps_slider],
+            outputs=[result, seed_number],
+        ).then(
+            # This function will run after 'generate' and will update the UI
+            # It receives the outputs of 'generate' as its inputs.
+            # We use `result` (the gr.Image component's output which is a filepath)
+            # to enable the video section.
+            lambda image_filepath: {  # image_filepath will be the path from the `result` gr.Image
+                video_gen_accordion: gr.Accordion(open=True, interactive=True if image_filepath else False),
+                generate_video_btn: gr.Button(interactive=True if image_filepath else False),
+                download_btn: gr.DownloadButton(value=image_filepath, visible=True if image_filepath else False),
+            },
+            inputs=[result],  # Input to this lambda is the output of `result` (gr.Image)
+            outputs=[video_gen_accordion, generate_video_btn, download_btn],
+        )
         with gr.Accordion("Download Image from URL", open=False):
             image_url_input = gr.Text(label="Image URL", placeholder="Enter image URL (e.g., http://.../image.png)")
             filename_input = gr.Text(

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 huggingface-hub
 numpy
-python-dotenv

 huggingface-hub
 numpy
+python-dotenv
+fal-client