Spaces:
Sleeping
Sleeping
File size: 7,936 Bytes
57cd4b7 f2c9828 e88cde7 ad94fc8 2d3ee83 ad94fc8 3bce3a9 5e23a70 029f19c c12824a 3bce3a9 53ce1a4 ad94fc8 53ce1a4 2d3ee83 ad94fc8 2d3ee83 ad94fc8 8d40d9d 53ce1a4 8d40d9d ad94fc8 eb0c0ed ad94fc8 ccb548e 53ce1a4 029f19c 5e23a70 53ce1a4 5e23a70 029f19c 53ce1a4 029f19c 53ce1a4 029f19c ccb548e 53ce1a4 029f19c 53ce1a4 029f19c 53ce1a4 029f19c 53ce1a4 029f19c 53ce1a4 029f19c 53ce1a4 239afdd 53ce1a4 239afdd 53ce1a4 2ae55e9 239afdd eb0c0ed 239afdd 2ae55e9 eb0c0ed 2ae55e9 8d40d9d 239afdd 8d40d9d 53ce1a4 5e23a70 8d40d9d 5e23a70 8d40d9d 5e23a70 8d40d9d 53ce1a4 8d40d9d 029f19c 53ce1a4 8d40d9d 53ce1a4 8d40d9d 5e23a70 53ce1a4 8d40d9d 53ce1a4 8d40d9d ad94fc8 3bce3a9 029f19c 5e23a70 8d40d9d 5e23a70 8d40d9d ad94fc8 3bce3a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import os
import subprocess
import signal
from pathlib import Path
from dataclasses import dataclass, field
import gradio as gr
from datasets import load_dataset
from huggingface_hub import HfApi, Repository
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import PPOTrainer, PPOConfig
from trl import AutoModelForCausalLMWithValueHead
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# ----------------- Configuración -----------------
DATASET_NAME = "openerotica/erotiquant3"
@dataclass
class QuantizationConfig:
method: str
train_data: str = DATASET_NAME
fp16_model: str = field(default="", init=False)
quantized_gguf: str = field(default="", init=False)
@dataclass
class SplitConfig:
enabled: bool = False
max_tensors: int = 256
@dataclass
class OutputConfig:
private_repo: bool = False
repo_name: str = ""
filename: str = ""
@dataclass
class ModelProcessingConfig:
token: str
model_id: str
model_name: str
outdir: str
quant_config: QuantizationConfig
split_config: SplitConfig
output_config: OutputConfig
class GGUFConverterError(Exception):
pass
class HuggingFaceModelProcessor:
HF_TO_GGUF_TIMEOUT = 3600
QUANTIZE_TIMEOUT = 86400
KILL_TIMEOUT = 5
DOWNLOAD_FOLDER = "./downloads"
OUTPUT_FOLDER = "./outputs"
def __init__(self):
self.HF_TOKEN = os.environ.get("HF_TOKEN")
os.makedirs(self.DOWNLOAD_FOLDER, exist_ok=True)
os.makedirs(self.OUTPUT_FOLDER, exist_ok=True)
# ----------------- Entrenamiento con PPO -----------------
def _train_model_with_ppo(self, model_id: str, dataset_name: str, outdir: str, token: str):
print(f"Iniciando entrenamiento con PPO de {model_id} con dataset {dataset_name}")
dataset = load_dataset(dataset_name, split="train", token=token)
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
model = AutoModelForCausalLMWithValueHead.from_pretrained(model_id, token=token)
# Tokenización
def tokenize_function(examples):
return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)
# Configuración de PPO
ppo_config = PPOConfig(
model_name=model_id,
train_batch_size=2,
gradient_accumulation_steps=1,
learning_rate=5e-6,
max_length=512,
num_train_epochs=1,
)
ppo_trainer = PPOTrainer(
model=model,
args=ppo_config,
train_dataset=tokenized_dataset,
tokenizer=tokenizer,
)
# Entrenamiento
ppo_trainer.train()
ppo_trainer.save_model(outdir)
tokenizer.save_pretrained(outdir)
print("Entrenamiento PPO finalizado.")
return outdir
# ----------------- Conversión a FP16 -----------------
def _convert_to_fp16(self, model_dir: str, out_fp16_path: str):
print(f"Convirtiendo modelo a FP16: {out_fp16_path}")
convert_command = [
"python3", "/app/convert_hf_to_gguf.py",
model_dir,
"--outtype", "f16",
"--outfile", out_fp16_path
]
process = subprocess.Popen(convert_command, shell=False, stderr=subprocess.STDOUT)
try:
process.wait(timeout=self.HF_TO_GGUF_TIMEOUT)
except subprocess.TimeoutExpired:
process.send_signal(signal.SIGINT)
try:
process.wait(timeout=self.KILL_TIMEOUT)
except subprocess.TimeoutExpired:
process.kill()
raise GGUFConverterError("Error convirtiendo a FP16: timeout")
if process.returncode != 0:
raise GGUFConverterError(f"Error FP16: código {process.returncode}")
print("Conversión a FP16 completa.")
return out_fp16_path
# ----------------- Cuantización -----------------
def _quantize_model(self, fp16_path: str, gguf_path: str, method: str):
print(f"Cuantizando modelo con método {method}")
quantize_cmd = ["llama-quantize", fp16_path, gguf_path, method]
process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
try:
process.wait(timeout=self.QUANTIZE_TIMEOUT)
except subprocess.TimeoutExpired:
process.send_signal(signal.SIGINT)
try:
process.wait(timeout=self.KILL_TIMEOUT)
except subprocess.TimeoutExpired:
process.kill()
raise GGUFConverterError("Error cuantizando: timeout")
if process.returncode != 0:
raise GGUFConverterError(f"Error cuantizando: code={process.returncode}")
print("Cuantización completada.")
return gguf_path
# ----------------- Publicación en HF -----------------
def _push_to_hf(self, local_dir: str, repo_name: str, token: str, private=False):
print(f"Subiendo modelo a Hugging Face: {repo_name}")
repo = Repository(local_dir, clone_from=f"{repo_name}", use_auth_token=token)
repo.push_to_hub(commit_message="Modelo entrenado y cuantizado")
print("Subida completada.")
# ----------------- Pipeline completa -----------------
def run_full_pipeline(self, token, model_id, hf_repo_name):
logs = []
try:
outdir = self.OUTPUT_FOLDER
model_name = Path(model_id).name
quant_config = QuantizationConfig(method="Q4_0")
quant_config.fp16_model = f"{outdir}/{model_name}.f16"
quant_config.quantized_gguf = f"{outdir}/{model_name}.gguf"
split_config = SplitConfig()
output_config = OutputConfig(private_repo=False, repo_name=hf_repo_name)
processing_config = ModelProcessingConfig(
token=token, model_id=model_id, model_name=model_name,
outdir=outdir, quant_config=quant_config, split_config=split_config,
output_config=output_config
)
# 1. Entrenar modelo con PPO
trained_dir = self._train_model_with_ppo(model_id, DATASET_NAME, outdir, token)
logs.append("Entrenamiento completado")
# 2. Convertir a FP16
self._convert_to_fp16(trained_dir, quant_config.fp16_model)
logs.append("Conversión a FP16 completada")
# 3. Cuantizar
self._quantize_model(quant_config.fp16_model, quant_config.quantized_gguf, quant_config.method)
logs.append("Modelo cuantizado a GGUF")
# 4. Subir a Hugging Face
self._push_to_hf(trained_dir, hf_repo_name, token)
logs.append(f"Modelo subido a Hugging Face: {hf_repo_name}")
logs.append("✅ Pipeline completada correctamente")
except Exception as e:
logs.append(f"❌ ERROR: {e}")
return "\n".join(logs)
# ----------------- Interfaz Gradio -----------------
processor = HuggingFaceModelProcessor()
with gr.Blocks() as demo:
gr.Markdown("## Pipeline Automática GGUF con entrenamiento PPO y publicación HF")
model_input = gr.Textbox(label="ID del modelo HF (para entrenamiento)", placeholder="ochoa/your-model")
repo_input = gr.Textbox(label="Nombre del repo HF para publicar", placeholder="usuario/nuevo-modelo")
token_input = gr.Textbox(label="Tu token HF", type="password")
run_button = gr.Button("Entrenar, cuantizar y publicar")
output_logs = gr.Textbox(label="Logs", lines=20)
run_button.click(
fn=lambda token, model_id, repo_name: processor.run_full_pipeline(token, model_id, repo_name),
inputs=[token_input, model_input, repo_input],
outputs=[output_logs]
)
demo.launch()
|