Novaciano commited on
Commit
53ce1a4
·
verified ·
1 Parent(s): 8d40d9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -103
app.py CHANGED
@@ -3,35 +3,34 @@ import subprocess
3
  import signal
4
  import tempfile
5
  from pathlib import Path
6
- from textwrap import dedent
7
  from dataclasses import dataclass, field
 
8
 
9
  import gradio as gr
10
  from datasets import load_dataset
11
- from huggingface_hub import HfApi, ModelCard, whoami
 
12
 
13
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 
 
 
 
14
 
15
  @dataclass
16
  class QuantizationConfig:
17
  method: str
18
- use_imatrix: bool = False
19
- imatrix_method: str = "IQ4_NL"
20
- train_data: str = ""
21
  quant_embedding: bool = False
22
- embedding_tensor_method: str = "Q8_0"
23
  leave_output: bool = False
24
  quant_output: bool = False
25
- output_tensor_method: str = "Q8_0"
26
  fp16_model: str = field(default="", init=False)
27
  quantized_gguf: str = field(default="", init=False)
28
- imatrix_file: str = field(default="", init=False)
29
 
30
  @dataclass
31
  class SplitConfig:
32
  enabled: bool = False
33
  max_tensors: int = 256
34
- max_size: str = None
35
 
36
  @dataclass
37
  class OutputConfig:
@@ -55,76 +54,87 @@ class GGUFConverterError(Exception):
55
  pass
56
 
57
  class HuggingFaceModelProcessor:
58
- QUANTIZE_TIMEOUT = 86400
59
  HF_TO_GGUF_TIMEOUT = 3600
60
- IMATRIX_TIMEOUT = 86400
61
- SPLIT_TIMEOUT = 3600
62
  KILL_TIMEOUT = 5
63
 
64
  DOWNLOAD_FOLDER = "./downloads"
65
  OUTPUT_FOLDER = "./outputs"
66
- CALIBRATION_FILE = "calibration_data_v5_rc.txt"
67
 
68
  def __init__(self):
69
  self.HF_TOKEN = os.environ.get("HF_TOKEN")
70
- self._create_folder(self.DOWNLOAD_FOLDER)
71
- self._create_folder(self.OUTPUT_FOLDER)
72
-
73
- def _create_folder(self, folder_name: str):
74
- if not os.path.exists(folder_name):
75
- os.makedirs(folder_name)
76
- return folder_name
77
-
78
- def _download_dataset(self, dataset_name: str):
79
- print(f"Cargando dataset desde HuggingFace Hub: {dataset_name}")
80
- dataset = load_dataset(dataset_name, use_auth_token=self.HF_TOKEN)
81
- return dataset
82
-
83
- def _download_model(self, processing_config: ModelProcessingConfig):
84
- print(f"Descargando modelo {processing_config.model_name}")
85
- if os.path.exists(processing_config.quant_config.fp16_model):
86
- print("FP16 ya existe, omitiendo conversión.")
87
- return processing_config.quant_config.fp16_model
88
-
89
- with tempfile.TemporaryDirectory(dir=self.DOWNLOAD_FOLDER) as tmpdir:
90
- local_dir = f"{Path(tmpdir)}/{processing_config.model_name}"
91
- api = HfApi(token=processing_config.token)
92
- pattern = "*.safetensors"
93
- api.snapshot_download(repo_id=processing_config.model_id, local_dir=local_dir, allow_patterns=[pattern])
94
- convert_command = [
95
- "python3", "/app/convert_hf_to_gguf.py", local_dir,
96
- "--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
97
- ]
98
- process = subprocess.Popen(convert_command, shell=False, stderr=subprocess.STDOUT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  try:
100
- process.wait(timeout=self.HF_TO_GGUF_TIMEOUT)
101
  except subprocess.TimeoutExpired:
102
- process.send_signal(signal.SIGINT)
103
- try:
104
- process.wait(timeout=self.KILL_TIMEOUT)
105
- except subprocess.TimeoutExpired:
106
- process.kill()
107
- raise GGUFConverterError("Error convirtiendo a FP16: timeout")
108
- if process.returncode != 0:
109
- raise GGUFConverterError(f"Error convirtiendo a FP16: code={process.returncode}")
110
- print("Modelo convertido a FP16 correctamente")
111
- return processing_config.quant_config.fp16_model
112
-
113
- def _quantize_model(self, quant_config: QuantizationConfig):
114
- quantize_cmd = ["llama-quantize"]
115
- if quant_config.quant_embedding:
116
- quantize_cmd.extend(["--token-embedding-type", quant_config.embedding_tensor_method])
117
- if quant_config.leave_output:
118
- quantize_cmd.append("--leave-output-tensor")
119
- else:
120
- if quant_config.quant_output:
121
- quantize_cmd.extend(["--output-tensor-type", quant_config.output_tensor_method])
122
- if quant_config.use_imatrix:
123
- raise NotImplementedError("imatrix no implementado para esta demo automática")
124
- quantize_cmd.append(quant_config.fp16_model)
125
- quantize_cmd.append(quant_config.quantized_gguf)
126
- quantize_cmd.append(quant_config.method)
127
-
128
  process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
129
  try:
130
  process.wait(timeout=self.QUANTIZE_TIMEOUT)
@@ -137,25 +147,12 @@ class HuggingFaceModelProcessor:
137
  raise GGUFConverterError("Error cuantizando: timeout")
138
  if process.returncode != 0:
139
  raise GGUFConverterError(f"Error cuantizando: code={process.returncode}")
140
- print("Cuantización completada")
141
- return quant_config.quantized_gguf
142
-
143
- def _create_repo(self, processing_config: ModelProcessingConfig):
144
- api = HfApi(token=processing_config.token)
145
- new_repo_url = api.create_repo(repo_id=processing_config.output_config.repo_name, exist_ok=True, private=processing_config.output_config.private_repo)
146
- processing_config.new_repo_url = new_repo_url.url
147
- processing_config.new_repo_id = new_repo_url.repo_id
148
- print("Repositorio creado:", processing_config.new_repo_url)
149
- return new_repo_url
150
-
151
- def run_full_pipeline(self, token, model_id, model_name, dataset_name):
152
  logs = []
153
  try:
154
- # 1. Cargar dataset
155
- dataset = self._download_dataset(dataset_name)
156
- logs.append(f"Dataset cargado: {dataset_name}")
157
-
158
- # 2. Configuración inicial
159
  outdir = self.OUTPUT_FOLDER
160
  quant_config = QuantizationConfig(method="Q4_0")
161
  quant_config.fp16_model = f"{outdir}/{model_name}.f16"
@@ -163,24 +160,27 @@ class HuggingFaceModelProcessor:
163
  split_config = SplitConfig()
164
  output_config = OutputConfig(private_repo=False, repo_name=f"{model_name}-gguf")
165
  processing_config = ModelProcessingConfig(
166
- token=token, model_id=model_id, model_name=model_name, outdir=outdir,
167
- quant_config=quant_config, split_config=split_config, output_config=output_config
 
168
  )
169
 
170
- # 3. Descargar modelo
171
- self._download_model(processing_config)
172
- logs.append("Modelo descargado y convertido a FP16")
 
 
 
 
173
 
174
- # 4. Cuantizar modelo
175
- self._quantize_model(quant_config)
176
  logs.append("Modelo cuantizado a GGUF")
177
 
178
- # 5. Crear repo
179
- self._create_repo(processing_config)
180
- logs.append(f"Repositorio creado: {processing_config.new_repo_url}")
181
 
182
  except Exception as e:
183
- logs.append(f"ERROR: {e}")
184
 
185
  return "\n".join(logs)
186
 
@@ -188,16 +188,15 @@ class HuggingFaceModelProcessor:
188
  processor = HuggingFaceModelProcessor()
189
 
190
  with gr.Blocks() as demo:
191
- gr.Markdown("## Pipeline Automática GGUF desde HuggingFace Hub")
192
- dataset_input = gr.Textbox(label="Nombre del dataset HuggingFace", placeholder="openerotica/erotiquant3")
193
  model_input = gr.Textbox(label="ID del modelo HF", placeholder="ochoa/your-model")
194
- token_input = gr.Textbox(label="Tu token HF (opcional, si está en HF_TOKEN puede dejarse vacío)", type="password")
195
- run_button = gr.Button("Ejecutar pipeline automática")
196
  output_logs = gr.Textbox(label="Logs", lines=20)
197
 
198
  run_button.click(
199
- fn=lambda token, model_id, model_name, dataset_name: processor.run_full_pipeline(token, model_id, model_name, dataset_name),
200
- inputs=[token_input, model_input, model_input, dataset_input],
201
  outputs=[output_logs]
202
  )
203
 
 
3
  import signal
4
  import tempfile
5
  from pathlib import Path
 
6
  from dataclasses import dataclass, field
7
+ from textwrap import dedent
8
 
9
  import gradio as gr
10
  from datasets import load_dataset
11
+ from huggingface_hub import HfApi
12
+ from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
13
 
14
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
15
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
16
+
17
+ # ----------------- Configuración -----------------
18
+ DATASET_NAME = "openerotica/erotiquant3"
19
 
20
  @dataclass
21
  class QuantizationConfig:
22
  method: str
23
+ train_data: str = DATASET_NAME
 
 
24
  quant_embedding: bool = False
 
25
  leave_output: bool = False
26
  quant_output: bool = False
 
27
  fp16_model: str = field(default="", init=False)
28
  quantized_gguf: str = field(default="", init=False)
 
29
 
30
  @dataclass
31
  class SplitConfig:
32
  enabled: bool = False
33
  max_tensors: int = 256
 
34
 
35
  @dataclass
36
  class OutputConfig:
 
54
  pass
55
 
56
  class HuggingFaceModelProcessor:
 
57
  HF_TO_GGUF_TIMEOUT = 3600
58
+ QUANTIZE_TIMEOUT = 86400
 
59
  KILL_TIMEOUT = 5
60
 
61
  DOWNLOAD_FOLDER = "./downloads"
62
  OUTPUT_FOLDER = "./outputs"
 
63
 
64
  def __init__(self):
65
  self.HF_TOKEN = os.environ.get("HF_TOKEN")
66
+ os.makedirs(self.DOWNLOAD_FOLDER, exist_ok=True)
67
+ os.makedirs(self.OUTPUT_FOLDER, exist_ok=True)
68
+
69
+ # ----------------- Entrenamiento -----------------
70
+ def _train_model(self, model_id: str, dataset_name: str, outdir: str):
71
+ print(f"Iniciando entrenamiento de {model_id} con dataset {dataset_name}")
72
+ dataset = load_dataset(dataset_name, split="train", use_auth_token=self.HF_TOKEN)
73
+
74
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=self.HF_TOKEN)
75
+ model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=self.HF_TOKEN)
76
+
77
+ def tokenize_function(examples):
78
+ return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
79
+
80
+ tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)
81
+
82
+ data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
83
+
84
+ train_args = TrainingArguments(
85
+ output_dir=f"{outdir}/{Path(model_id).name}_trained",
86
+ overwrite_output_dir=True,
87
+ num_train_epochs=1, # ajustar según recursos
88
+ per_device_train_batch_size=2,
89
+ save_total_limit=1,
90
+ logging_dir=f"{outdir}/logs",
91
+ logging_steps=10,
92
+ save_steps=200,
93
+ report_to=[],
94
+ )
95
+
96
+ trainer = Trainer(
97
+ model=model,
98
+ args=train_args,
99
+ train_dataset=tokenized_dataset,
100
+ data_collator=data_collator,
101
+ )
102
+
103
+ trainer.train()
104
+ trainer.save_model(train_args.output_dir)
105
+ tokenizer.save_pretrained(train_args.output_dir)
106
+
107
+ print("Entrenamiento finalizado.")
108
+ return train_args.output_dir
109
+
110
+ # ----------------- Conversión a FP16 -----------------
111
+ def _convert_to_fp16(self, model_dir: str, out_fp16_path: str):
112
+ print(f"Convirtiendo modelo a FP16: {out_fp16_path}")
113
+ convert_command = [
114
+ "python3", "/app/convert_hf_to_gguf.py",
115
+ model_dir,
116
+ "--outtype", "f16",
117
+ "--outfile", out_fp16_path
118
+ ]
119
+ process = subprocess.Popen(convert_command, shell=False, stderr=subprocess.STDOUT)
120
+ try:
121
+ process.wait(timeout=self.HF_TO_GGUF_TIMEOUT)
122
+ except subprocess.TimeoutExpired:
123
+ process.send_signal(signal.SIGINT)
124
  try:
125
+ process.wait(timeout=self.KILL_TIMEOUT)
126
  except subprocess.TimeoutExpired:
127
+ process.kill()
128
+ raise GGUFConverterError("Error convirtiendo a FP16: timeout")
129
+ if process.returncode != 0:
130
+ raise GGUFConverterError(f"Error FP16: código {process.returncode}")
131
+ print("Conversión a FP16 completa.")
132
+ return out_fp16_path
133
+
134
+ # ----------------- Cuantización -----------------
135
+ def _quantize_model(self, fp16_path: str, gguf_path: str, method: str):
136
+ print(f"Cuantizando modelo con método {method}")
137
+ quantize_cmd = ["llama-quantize", fp16_path, gguf_path, method]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
139
  try:
140
  process.wait(timeout=self.QUANTIZE_TIMEOUT)
 
147
  raise GGUFConverterError("Error cuantizando: timeout")
148
  if process.returncode != 0:
149
  raise GGUFConverterError(f"Error cuantizando: code={process.returncode}")
150
+ print("Cuantización completada.")
151
+ return gguf_path
152
+
153
+ def run_full_pipeline(self, token, model_id, model_name):
 
 
 
 
 
 
 
 
154
  logs = []
155
  try:
 
 
 
 
 
156
  outdir = self.OUTPUT_FOLDER
157
  quant_config = QuantizationConfig(method="Q4_0")
158
  quant_config.fp16_model = f"{outdir}/{model_name}.f16"
 
160
  split_config = SplitConfig()
161
  output_config = OutputConfig(private_repo=False, repo_name=f"{model_name}-gguf")
162
  processing_config = ModelProcessingConfig(
163
+ token=token, model_id=model_id, model_name=model_name,
164
+ outdir=outdir, quant_config=quant_config, split_config=split_config,
165
+ output_config=output_config
166
  )
167
 
168
+ # 1. Entrenar modelo
169
+ trained_dir = self._train_model(model_id, DATASET_NAME, outdir)
170
+ logs.append("Entrenamiento completado")
171
+
172
+ # 2. Convertir a FP16
173
+ self._convert_to_fp16(trained_dir, quant_config.fp16_model)
174
+ logs.append("Conversión a FP16 completada")
175
 
176
+ # 3. Cuantizar
177
+ self._quantize_model(quant_config.fp16_model, quant_config.quantized_gguf, quant_config.method)
178
  logs.append("Modelo cuantizado a GGUF")
179
 
180
+ logs.append("✅ Pipeline completada correctamente")
 
 
181
 
182
  except Exception as e:
183
+ logs.append(f"ERROR: {e}")
184
 
185
  return "\n".join(logs)
186
 
 
188
  processor = HuggingFaceModelProcessor()
189
 
190
  with gr.Blocks() as demo:
191
+ gr.Markdown("## Pipeline Automática GGUF con entrenamiento (dataset openerotica/erotiquant3)")
 
192
  model_input = gr.Textbox(label="ID del modelo HF", placeholder="ochoa/your-model")
193
+ token_input = gr.Textbox(label="Tu token HF (opcional si ya está en HF_TOKEN)", type="password")
194
+ run_button = gr.Button("Entrenar y cuantizar automáticamente")
195
  output_logs = gr.Textbox(label="Logs", lines=20)
196
 
197
  run_button.click(
198
+ fn=lambda token, model_id: processor.run_full_pipeline(token, model_id, Path(model_id).name),
199
+ inputs=[token_input, model_input],
200
  outputs=[output_logs]
201
  )
202