broadfield-dev commited on
Commit
2d4657b
·
verified ·
1 Parent(s): 2f41c97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -17
app.py CHANGED
@@ -1,17 +1,18 @@
1
  import gradio as gr
2
  import torch
3
  from PIL import Image
4
- from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
 
5
  import os
6
  from playwright.sync_api import sync_playwright
7
  import time
8
  import numpy as np
9
 
10
  # --- Configuration ---
11
- # Define the model options
12
  MODEL_OPTIONS = {
13
  "Standard (BF16)": "Qwen/Qwen3-VL-2B-Instruct",
14
- "Quantized (FP8) - Faster": "Qwen/Qwen3-VL-2B-Instruct-FP8",
15
  }
16
 
17
  # --- DETAILED PROMPT TEMPLATE ---
@@ -58,37 +59,34 @@ Describe the content of the sidebar, including any navigation, filters, or adver
58
  def load_model(model_name):
59
  """Loads the specified model and processor from Hugging Face."""
60
  model_id = MODEL_OPTIONS[model_name]
 
61
 
62
- # First yield: Update status to loading, disable button, clear previous model state.
63
- # MUST return a value for all 4 outputs.
64
- yield f"Status: Loading {model_name} model ({model_id})... Please wait.", gr.update(interactive=False), None, None
65
-
66
- model = None
67
- processor = None
68
-
69
  try:
70
- if "FP8" in model_id:
 
 
 
71
  model = Qwen3VLForConditionalGeneration.from_pretrained(
72
  model_id,
73
- torch_dtype=torch.float8_e4m3fn,
74
  device_map="auto",
75
  trust_remote_code=True
76
  )
77
  else:
 
78
  model = Qwen3VLForConditionalGeneration.from_pretrained(
79
  model_id,
80
  device_map="auto",
81
  trust_remote_code=True
82
  )
 
 
83
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
84
 
85
- # Final yield on success: Update status, enable button, and return the loaded model/processor.
86
- # MUST return a value for all 4 outputs.
87
  yield f"Status: {model_name} model loaded successfully.", gr.update(interactive=True), model, processor
88
 
89
  except Exception as e:
90
- # Final yield on error: Update status, enable button, return None for model/processor.
91
- # MUST return a value for all 4 outputs.
92
  yield f"Status: Error loading model: {e}", gr.update(interactive=True), None, None
93
 
94
  # --- Playwright Screenshot Function ---
@@ -153,7 +151,11 @@ with gr.Blocks() as demo:
153
 
154
  with gr.Accordion("Controls", open=True):
155
  with gr.Row():
156
- model_selector = gr.Radio(choices=list(MODEL_OPTIONS.keys()), value="Quantized (FP8) - Faster", label="Select Model")
 
 
 
 
157
  load_model_button = gr.Button("Load/Switch Model")
158
  status_text = gr.Textbox(label="Status", value="Status: No model loaded.", interactive=False)
159
 
 
1
  import gradio as gr
2
  import torch
3
  from PIL import Image
4
+ # --- ADDED: Import BitsAndBytesConfig for CPU quantization ---
5
+ from transformers import Qwen3VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
6
  import os
7
  from playwright.sync_api import sync_playwright
8
  import time
9
  import numpy as np
10
 
11
  # --- Configuration ---
12
+ # --- CHANGED: Updated model options to reflect INT8 CPU quantization ---
13
  MODEL_OPTIONS = {
14
  "Standard (BF16)": "Qwen/Qwen3-VL-2B-Instruct",
15
+ "Quantized (INT8) - Faster on CPU": "Qwen/Qwen3-VL-2B-Instruct", # We use the same base model for quantization
16
  }
17
 
18
  # --- DETAILED PROMPT TEMPLATE ---
 
59
  def load_model(model_name):
60
  """Loads the specified model and processor from Hugging Face."""
61
  model_id = MODEL_OPTIONS[model_name]
62
+ yield f"Status: Loading {model_name} ({model_id})... Please wait.", gr.update(interactive=False), None, None
63
 
64
+ model, processor = None, None
 
 
 
 
 
 
65
  try:
66
+ # --- CHANGED: New logic for CPU-compatible quantization ---
67
+ if "Quantized" in model_name:
68
+ # Use bitsandbytes for 8-bit quantization on CPU
69
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
70
  model = Qwen3VLForConditionalGeneration.from_pretrained(
71
  model_id,
72
+ quantization_config=quantization_config,
73
  device_map="auto",
74
  trust_remote_code=True
75
  )
76
  else:
77
+ # Standard loading for the full-precision model
78
  model = Qwen3VLForConditionalGeneration.from_pretrained(
79
  model_id,
80
  device_map="auto",
81
  trust_remote_code=True
82
  )
83
+
84
+ # The processor is the same for both versions
85
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
86
 
 
 
87
  yield f"Status: {model_name} model loaded successfully.", gr.update(interactive=True), model, processor
88
 
89
  except Exception as e:
 
 
90
  yield f"Status: Error loading model: {e}", gr.update(interactive=True), None, None
91
 
92
  # --- Playwright Screenshot Function ---
 
151
 
152
  with gr.Accordion("Controls", open=True):
153
  with gr.Row():
154
+ model_selector = gr.Radio(
155
+ choices=list(MODEL_OPTIONS.keys()),
156
+ value="Quantized (INT8) - Faster on CPU", # Default to the faster option
157
+ label="Select Model"
158
+ )
159
  load_model_button = gr.Button("Load/Switch Model")
160
  status_text = gr.Textbox(label="Status", value="Status: No model loaded.", interactive=False)
161