Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,18 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from PIL import Image
|
| 4 |
-
|
|
|
|
| 5 |
import os
|
| 6 |
from playwright.sync_api import sync_playwright
|
| 7 |
import time
|
| 8 |
import numpy as np
|
| 9 |
|
| 10 |
# --- Configuration ---
|
| 11 |
-
#
|
| 12 |
MODEL_OPTIONS = {
|
| 13 |
"Standard (BF16)": "Qwen/Qwen3-VL-2B-Instruct",
|
| 14 |
-
"Quantized (
|
| 15 |
}
|
| 16 |
|
| 17 |
# --- DETAILED PROMPT TEMPLATE ---
|
|
@@ -58,37 +59,34 @@ Describe the content of the sidebar, including any navigation, filters, or adver
|
|
| 58 |
def load_model(model_name):
|
| 59 |
"""Loads the specified model and processor from Hugging Face."""
|
| 60 |
model_id = MODEL_OPTIONS[model_name]
|
|
|
|
| 61 |
|
| 62 |
-
|
| 63 |
-
# MUST return a value for all 4 outputs.
|
| 64 |
-
yield f"Status: Loading {model_name} model ({model_id})... Please wait.", gr.update(interactive=False), None, None
|
| 65 |
-
|
| 66 |
-
model = None
|
| 67 |
-
processor = None
|
| 68 |
-
|
| 69 |
try:
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
| 71 |
model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 72 |
model_id,
|
| 73 |
-
|
| 74 |
device_map="auto",
|
| 75 |
trust_remote_code=True
|
| 76 |
)
|
| 77 |
else:
|
|
|
|
| 78 |
model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 79 |
model_id,
|
| 80 |
device_map="auto",
|
| 81 |
trust_remote_code=True
|
| 82 |
)
|
|
|
|
|
|
|
| 83 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
| 84 |
|
| 85 |
-
# Final yield on success: Update status, enable button, and return the loaded model/processor.
|
| 86 |
-
# MUST return a value for all 4 outputs.
|
| 87 |
yield f"Status: {model_name} model loaded successfully.", gr.update(interactive=True), model, processor
|
| 88 |
|
| 89 |
except Exception as e:
|
| 90 |
-
# Final yield on error: Update status, enable button, return None for model/processor.
|
| 91 |
-
# MUST return a value for all 4 outputs.
|
| 92 |
yield f"Status: Error loading model: {e}", gr.update(interactive=True), None, None
|
| 93 |
|
| 94 |
# --- Playwright Screenshot Function ---
|
|
@@ -153,7 +151,11 @@ with gr.Blocks() as demo:
|
|
| 153 |
|
| 154 |
with gr.Accordion("Controls", open=True):
|
| 155 |
with gr.Row():
|
| 156 |
-
model_selector = gr.Radio(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
load_model_button = gr.Button("Load/Switch Model")
|
| 158 |
status_text = gr.Textbox(label="Status", value="Status: No model loaded.", interactive=False)
|
| 159 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from PIL import Image
|
| 4 |
+
# --- ADDED: Import BitsAndBytesConfig for CPU quantization ---
|
| 5 |
+
from transformers import Qwen3VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
|
| 6 |
import os
|
| 7 |
from playwright.sync_api import sync_playwright
|
| 8 |
import time
|
| 9 |
import numpy as np
|
| 10 |
|
| 11 |
# --- Configuration ---
|
| 12 |
+
# --- CHANGED: Updated model options to reflect INT8 CPU quantization ---
|
| 13 |
MODEL_OPTIONS = {
|
| 14 |
"Standard (BF16)": "Qwen/Qwen3-VL-2B-Instruct",
|
| 15 |
+
"Quantized (INT8) - Faster on CPU": "Qwen/Qwen3-VL-2B-Instruct", # We use the same base model for quantization
|
| 16 |
}
|
| 17 |
|
| 18 |
# --- DETAILED PROMPT TEMPLATE ---
|
|
|
|
| 59 |
def load_model(model_name):
|
| 60 |
"""Loads the specified model and processor from Hugging Face."""
|
| 61 |
model_id = MODEL_OPTIONS[model_name]
|
| 62 |
+
yield f"Status: Loading {model_name} ({model_id})... Please wait.", gr.update(interactive=False), None, None
|
| 63 |
|
| 64 |
+
model, processor = None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
try:
|
| 66 |
+
# --- CHANGED: New logic for CPU-compatible quantization ---
|
| 67 |
+
if "Quantized" in model_name:
|
| 68 |
+
# Use bitsandbytes for 8-bit quantization on CPU
|
| 69 |
+
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 70 |
model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 71 |
model_id,
|
| 72 |
+
quantization_config=quantization_config,
|
| 73 |
device_map="auto",
|
| 74 |
trust_remote_code=True
|
| 75 |
)
|
| 76 |
else:
|
| 77 |
+
# Standard loading for the full-precision model
|
| 78 |
model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 79 |
model_id,
|
| 80 |
device_map="auto",
|
| 81 |
trust_remote_code=True
|
| 82 |
)
|
| 83 |
+
|
| 84 |
+
# The processor is the same for both versions
|
| 85 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
| 86 |
|
|
|
|
|
|
|
| 87 |
yield f"Status: {model_name} model loaded successfully.", gr.update(interactive=True), model, processor
|
| 88 |
|
| 89 |
except Exception as e:
|
|
|
|
|
|
|
| 90 |
yield f"Status: Error loading model: {e}", gr.update(interactive=True), None, None
|
| 91 |
|
| 92 |
# --- Playwright Screenshot Function ---
|
|
|
|
| 151 |
|
| 152 |
with gr.Accordion("Controls", open=True):
|
| 153 |
with gr.Row():
|
| 154 |
+
model_selector = gr.Radio(
|
| 155 |
+
choices=list(MODEL_OPTIONS.keys()),
|
| 156 |
+
value="Quantized (INT8) - Faster on CPU", # Default to the faster option
|
| 157 |
+
label="Select Model"
|
| 158 |
+
)
|
| 159 |
load_model_button = gr.Button("Load/Switch Model")
|
| 160 |
status_text = gr.Textbox(label="Status", value="Status: No model loaded.", interactive=False)
|
| 161 |
|