ai-image-editor

Running on Zero

App Files Files Community

frogleo commited on 10 days ago

Commit

56a627f

verified ·

1 Parent(s): 7a45fb3

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -29,16 +29,13 @@ hf_client = InferenceClient(
 VLM_MODEL = "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
 SYSTEM_PROMPT_TEXT_ONLY = """You are an expert prompt engineer for FLUX.2 by Black Forest Labs. Rewrite user prompts to be more descriptive while strictly preserving their core subject and intent.
 Guidelines:
 1. Structure: Keep structured inputs structured (enhance within fields). Convert natural language to detailed paragraphs.
 2. Details: Add concrete visual specifics - form, scale, textures, materials, lighting (quality, direction, color), shadows, spatial relationships, and environmental context.
 3. Text in Images: Put ALL text in quotation marks, matching the prompt's language. Always provide explicit quoted text for objects that would contain text in reality (signs, labels, screens, etc.) - without it, the model generates gibberish.
 Output only the revised prompt and nothing else."""
 SYSTEM_PROMPT_WITH_IMAGES = """You are FLUX.2 by Black Forest Labs, an image-editing expert. You convert editing requests into one concise instruction (50-80 words, ~30 for brief requests).
 Rules:
 - Single instruction only, no commentary
 - Use clear, analytical language (avoid "whimsical," "cascading," etc.)
@@ -47,7 +44,6 @@ Rules:
 - Turn negatives into positives ("don't change X" → "keep X")
 - Make abstractions concrete ("futuristic" → "glowing cyan neon, metallic panels")
 - Keep content PG-13
 Output only the final instruction in plain text and nothing else."""
 def remote_text_encoder(prompts):
@@ -81,7 +77,11 @@ pipe = Flux2Pipeline.from_pretrained(
 pipe.to(device)
 # Pull pre-compiled Flux2 Transformer blocks from HF hub
-spaces.aoti_blocks_load(pipe.transformer, "zerogpu-aoti/FLUX.2", variant="fa3")
 def image_to_data_uri(img):
     buffered = io.BytesIO()

 VLM_MODEL = "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
 SYSTEM_PROMPT_TEXT_ONLY = """You are an expert prompt engineer for FLUX.2 by Black Forest Labs. Rewrite user prompts to be more descriptive while strictly preserving their core subject and intent.
 Guidelines:
 1. Structure: Keep structured inputs structured (enhance within fields). Convert natural language to detailed paragraphs.
 2. Details: Add concrete visual specifics - form, scale, textures, materials, lighting (quality, direction, color), shadows, spatial relationships, and environmental context.
 3. Text in Images: Put ALL text in quotation marks, matching the prompt's language. Always provide explicit quoted text for objects that would contain text in reality (signs, labels, screens, etc.) - without it, the model generates gibberish.
 Output only the revised prompt and nothing else."""
 SYSTEM_PROMPT_WITH_IMAGES = """You are FLUX.2 by Black Forest Labs, an image-editing expert. You convert editing requests into one concise instruction (50-80 words, ~30 for brief requests).
 Rules:
 - Single instruction only, no commentary
 - Use clear, analytical language (avoid "whimsical," "cascading," etc.)
 - Turn negatives into positives ("don't change X" → "keep X")
 - Make abstractions concrete ("futuristic" → "glowing cyan neon, metallic panels")
 - Keep content PG-13
 Output only the final instruction in plain text and nothing else."""
 def remote_text_encoder(prompts):
 pipe.to(device)
 # Pull pre-compiled Flux2 Transformer blocks from HF hub
+# spaces.aoti_blocks_load(pipe.transformer, "zerogpu-aoti/FLUX.2", variant="fa3")
+# 禁用编译，使用 SDPA 注意力
+# pipe.transformer.config._attn_implementation = "sdpa"
+# spaces.aoti_blocks_load(pipe.transformer, "zerogpu-aoti/FLUX.2", variant="sdpa")
 def image_to_data_uri(img):
     buffered = io.BytesIO()