Spaces:

broadfield-dev
/

url2markdown

Sleeping

App Files Files Community

broadfield-dev commited on Oct 26

Commit

2d4657b

verified ·

1 Parent(s): 2f41c97

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -17

app.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import gradio as gr
 import torch
 from PIL import Image
-from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
 import os
 from playwright.sync_api import sync_playwright
 import time
 import numpy as np
 # --- Configuration ---
-# Define the model options
 MODEL_OPTIONS = {
     "Standard (BF16)": "Qwen/Qwen3-VL-2B-Instruct",
-    "Quantized (FP8) - Faster": "Qwen/Qwen3-VL-2B-Instruct-FP8",
 }
 # --- DETAILED PROMPT TEMPLATE ---
@@ -58,37 +59,34 @@ Describe the content of the sidebar, including any navigation, filters, or adver
 def load_model(model_name):
     """Loads the specified model and processor from Hugging Face."""
     model_id = MODEL_OPTIONS[model_name]
-    # First yield: Update status to loading, disable button, clear previous model state.
-    # MUST return a value for all 4 outputs.
-    yield f"Status: Loading {model_name} model ({model_id})... Please wait.", gr.update(interactive=False), None, None
-    model = None
-    processor = None
     try:
-        if "FP8" in model_id:
             model = Qwen3VLForConditionalGeneration.from_pretrained(
                 model_id,
-                torch_dtype=torch.float8_e4m3fn,
                 device_map="auto",
                 trust_remote_code=True
             )
         else:
             model = Qwen3VLForConditionalGeneration.from_pretrained(
                 model_id,
                 device_map="auto",
                 trust_remote_code=True
             )
         processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-        # Final yield on success: Update status, enable button, and return the loaded model/processor.
-        # MUST return a value for all 4 outputs.
         yield f"Status: {model_name} model loaded successfully.", gr.update(interactive=True), model, processor
     except Exception as e:
-        # Final yield on error: Update status, enable button, return None for model/processor.
-        # MUST return a value for all 4 outputs.
         yield f"Status: Error loading model: {e}", gr.update(interactive=True), None, None
 # --- Playwright Screenshot Function ---
@@ -153,7 +151,11 @@ with gr.Blocks() as demo:
     with gr.Accordion("Controls", open=True):
         with gr.Row():
-            model_selector = gr.Radio(choices=list(MODEL_OPTIONS.keys()), value="Quantized (FP8) - Faster", label="Select Model")
             load_model_button = gr.Button("Load/Switch Model")
         status_text = gr.Textbox(label="Status", value="Status: No model loaded.", interactive=False)

 import gradio as gr
 import torch
 from PIL import Image
+# --- ADDED: Import BitsAndBytesConfig for CPU quantization ---
+from transformers import Qwen3VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
 import os
 from playwright.sync_api import sync_playwright
 import time
 import numpy as np
 # --- Configuration ---
+# --- CHANGED: Updated model options to reflect INT8 CPU quantization ---
 MODEL_OPTIONS = {
     "Standard (BF16)": "Qwen/Qwen3-VL-2B-Instruct",
+    "Quantized (INT8) - Faster on CPU": "Qwen/Qwen3-VL-2B-Instruct", # We use the same base model for quantization
 }
 # --- DETAILED PROMPT TEMPLATE ---
 def load_model(model_name):
     """Loads the specified model and processor from Hugging Face."""
     model_id = MODEL_OPTIONS[model_name]
+    yield f"Status: Loading {model_name} ({model_id})... Please wait.", gr.update(interactive=False), None, None
+    model, processor = None, None
     try:
+        # --- CHANGED: New logic for CPU-compatible quantization ---
+        if "Quantized" in model_name:
+            # Use bitsandbytes for 8-bit quantization on CPU
+            quantization_config = BitsAndBytesConfig(load_in_8bit=True)
             model = Qwen3VLForConditionalGeneration.from_pretrained(
                 model_id,
+                quantization_config=quantization_config,
                 device_map="auto",
                 trust_remote_code=True
             )
         else:
+            # Standard loading for the full-precision model
             model = Qwen3VLForConditionalGeneration.from_pretrained(
                 model_id,
                 device_map="auto",
                 trust_remote_code=True
             )
+        # The processor is the same for both versions
         processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
         yield f"Status: {model_name} model loaded successfully.", gr.update(interactive=True), model, processor
     except Exception as e:
         yield f"Status: Error loading model: {e}", gr.update(interactive=True), None, None
 # --- Playwright Screenshot Function ---
     with gr.Accordion("Controls", open=True):
         with gr.Row():
+            model_selector = gr.Radio(
+                choices=list(MODEL_OPTIONS.keys()),
+                value="Quantized (INT8) - Faster on CPU", # Default to the faster option
+                label="Select Model"
+            )
             load_model_button = gr.Button("Load/Switch Model")
         status_text = gr.Textbox(label="Status", value="Status: No model loaded.", interactive=False)