Novaciano's picture
Update app.py
029f19c verified
import os
import subprocess
import signal
from pathlib import Path
from dataclasses import dataclass, field
import gradio as gr
from datasets import load_dataset
from huggingface_hub import HfApi, Repository
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import PPOTrainer, PPOConfig
from trl import AutoModelForCausalLMWithValueHead
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# ----------------- Configuración -----------------
DATASET_NAME = "openerotica/erotiquant3"
@dataclass
class QuantizationConfig:
method: str
train_data: str = DATASET_NAME
fp16_model: str = field(default="", init=False)
quantized_gguf: str = field(default="", init=False)
@dataclass
class SplitConfig:
enabled: bool = False
max_tensors: int = 256
@dataclass
class OutputConfig:
private_repo: bool = False
repo_name: str = ""
filename: str = ""
@dataclass
class ModelProcessingConfig:
token: str
model_id: str
model_name: str
outdir: str
quant_config: QuantizationConfig
split_config: SplitConfig
output_config: OutputConfig
class GGUFConverterError(Exception):
pass
class HuggingFaceModelProcessor:
HF_TO_GGUF_TIMEOUT = 3600
QUANTIZE_TIMEOUT = 86400
KILL_TIMEOUT = 5
DOWNLOAD_FOLDER = "./downloads"
OUTPUT_FOLDER = "./outputs"
def __init__(self):
self.HF_TOKEN = os.environ.get("HF_TOKEN")
os.makedirs(self.DOWNLOAD_FOLDER, exist_ok=True)
os.makedirs(self.OUTPUT_FOLDER, exist_ok=True)
# ----------------- Entrenamiento con PPO -----------------
def _train_model_with_ppo(self, model_id: str, dataset_name: str, outdir: str, token: str):
print(f"Iniciando entrenamiento con PPO de {model_id} con dataset {dataset_name}")
dataset = load_dataset(dataset_name, split="train", token=token)
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
model = AutoModelForCausalLMWithValueHead.from_pretrained(model_id, token=token)
# Tokenización
def tokenize_function(examples):
return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)
# Configuración de PPO
ppo_config = PPOConfig(
model_name=model_id,
train_batch_size=2,
gradient_accumulation_steps=1,
learning_rate=5e-6,
max_length=512,
num_train_epochs=1,
)
ppo_trainer = PPOTrainer(
model=model,
args=ppo_config,
train_dataset=tokenized_dataset,
tokenizer=tokenizer,
)
# Entrenamiento
ppo_trainer.train()
ppo_trainer.save_model(outdir)
tokenizer.save_pretrained(outdir)
print("Entrenamiento PPO finalizado.")
return outdir
# ----------------- Conversión a FP16 -----------------
def _convert_to_fp16(self, model_dir: str, out_fp16_path: str):
print(f"Convirtiendo modelo a FP16: {out_fp16_path}")
convert_command = [
"python3", "/app/convert_hf_to_gguf.py",
model_dir,
"--outtype", "f16",
"--outfile", out_fp16_path
]
process = subprocess.Popen(convert_command, shell=False, stderr=subprocess.STDOUT)
try:
process.wait(timeout=self.HF_TO_GGUF_TIMEOUT)
except subprocess.TimeoutExpired:
process.send_signal(signal.SIGINT)
try:
process.wait(timeout=self.KILL_TIMEOUT)
except subprocess.TimeoutExpired:
process.kill()
raise GGUFConverterError("Error convirtiendo a FP16: timeout")
if process.returncode != 0:
raise GGUFConverterError(f"Error FP16: código {process.returncode}")
print("Conversión a FP16 completa.")
return out_fp16_path
# ----------------- Cuantización -----------------
def _quantize_model(self, fp16_path: str, gguf_path: str, method: str):
print(f"Cuantizando modelo con método {method}")
quantize_cmd = ["llama-quantize", fp16_path, gguf_path, method]
process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
try:
process.wait(timeout=self.QUANTIZE_TIMEOUT)
except subprocess.TimeoutExpired:
process.send_signal(signal.SIGINT)
try:
process.wait(timeout=self.KILL_TIMEOUT)
except subprocess.TimeoutExpired:
process.kill()
raise GGUFConverterError("Error cuantizando: timeout")
if process.returncode != 0:
raise GGUFConverterError(f"Error cuantizando: code={process.returncode}")
print("Cuantización completada.")
return gguf_path
# ----------------- Publicación en HF -----------------
def _push_to_hf(self, local_dir: str, repo_name: str, token: str, private=False):
print(f"Subiendo modelo a Hugging Face: {repo_name}")
repo = Repository(local_dir, clone_from=f"{repo_name}", use_auth_token=token)
repo.push_to_hub(commit_message="Modelo entrenado y cuantizado")
print("Subida completada.")
# ----------------- Pipeline completa -----------------
def run_full_pipeline(self, token, model_id, hf_repo_name):
logs = []
try:
outdir = self.OUTPUT_FOLDER
model_name = Path(model_id).name
quant_config = QuantizationConfig(method="Q4_0")
quant_config.fp16_model = f"{outdir}/{model_name}.f16"
quant_config.quantized_gguf = f"{outdir}/{model_name}.gguf"
split_config = SplitConfig()
output_config = OutputConfig(private_repo=False, repo_name=hf_repo_name)
processing_config = ModelProcessingConfig(
token=token, model_id=model_id, model_name=model_name,
outdir=outdir, quant_config=quant_config, split_config=split_config,
output_config=output_config
)
# 1. Entrenar modelo con PPO
trained_dir = self._train_model_with_ppo(model_id, DATASET_NAME, outdir, token)
logs.append("Entrenamiento completado")
# 2. Convertir a FP16
self._convert_to_fp16(trained_dir, quant_config.fp16_model)
logs.append("Conversión a FP16 completada")
# 3. Cuantizar
self._quantize_model(quant_config.fp16_model, quant_config.quantized_gguf, quant_config.method)
logs.append("Modelo cuantizado a GGUF")
# 4. Subir a Hugging Face
self._push_to_hf(trained_dir, hf_repo_name, token)
logs.append(f"Modelo subido a Hugging Face: {hf_repo_name}")
logs.append("✅ Pipeline completada correctamente")
except Exception as e:
logs.append(f"❌ ERROR: {e}")
return "\n".join(logs)
# ----------------- Interfaz Gradio -----------------
processor = HuggingFaceModelProcessor()
with gr.Blocks() as demo:
gr.Markdown("## Pipeline Automática GGUF con entrenamiento PPO y publicación HF")
model_input = gr.Textbox(label="ID del modelo HF (para entrenamiento)", placeholder="ochoa/your-model")
repo_input = gr.Textbox(label="Nombre del repo HF para publicar", placeholder="usuario/nuevo-modelo")
token_input = gr.Textbox(label="Tu token HF", type="password")
run_button = gr.Button("Entrenar, cuantizar y publicar")
output_logs = gr.Textbox(label="Logs", lines=20)
run_button.click(
fn=lambda token, model_id, repo_name: processor.run_full_pipeline(token, model_id, repo_name),
inputs=[token_input, model_input, repo_input],
outputs=[output_logs]
)
demo.launch()