Spaces:
Sleeping
Sleeping
| import os | |
| import subprocess | |
| import signal | |
| from pathlib import Path | |
| from dataclasses import dataclass, field | |
| import gradio as gr | |
| from datasets import load_dataset | |
| from huggingface_hub import HfApi, Repository | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments | |
| from trl import PPOTrainer, PPOConfig | |
| from trl import AutoModelForCausalLMWithValueHead | |
| os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| # ----------------- Configuración ----------------- | |
| DATASET_NAME = "openerotica/erotiquant3" | |
| class QuantizationConfig: | |
| method: str | |
| train_data: str = DATASET_NAME | |
| fp16_model: str = field(default="", init=False) | |
| quantized_gguf: str = field(default="", init=False) | |
| class SplitConfig: | |
| enabled: bool = False | |
| max_tensors: int = 256 | |
| class OutputConfig: | |
| private_repo: bool = False | |
| repo_name: str = "" | |
| filename: str = "" | |
| class ModelProcessingConfig: | |
| token: str | |
| model_id: str | |
| model_name: str | |
| outdir: str | |
| quant_config: QuantizationConfig | |
| split_config: SplitConfig | |
| output_config: OutputConfig | |
| class GGUFConverterError(Exception): | |
| pass | |
| class HuggingFaceModelProcessor: | |
| HF_TO_GGUF_TIMEOUT = 3600 | |
| QUANTIZE_TIMEOUT = 86400 | |
| KILL_TIMEOUT = 5 | |
| DOWNLOAD_FOLDER = "./downloads" | |
| OUTPUT_FOLDER = "./outputs" | |
| def __init__(self): | |
| self.HF_TOKEN = os.environ.get("HF_TOKEN") | |
| os.makedirs(self.DOWNLOAD_FOLDER, exist_ok=True) | |
| os.makedirs(self.OUTPUT_FOLDER, exist_ok=True) | |
| # ----------------- Entrenamiento con PPO ----------------- | |
| def _train_model_with_ppo(self, model_id: str, dataset_name: str, outdir: str, token: str): | |
| print(f"Iniciando entrenamiento con PPO de {model_id} con dataset {dataset_name}") | |
| dataset = load_dataset(dataset_name, split="train", token=token) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, token=token) | |
| model = AutoModelForCausalLMWithValueHead.from_pretrained(model_id, token=token) | |
| # Tokenización | |
| def tokenize_function(examples): | |
| return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512) | |
| tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names) | |
| # Configuración de PPO | |
| ppo_config = PPOConfig( | |
| model_name=model_id, | |
| train_batch_size=2, | |
| gradient_accumulation_steps=1, | |
| learning_rate=5e-6, | |
| max_length=512, | |
| num_train_epochs=1, | |
| ) | |
| ppo_trainer = PPOTrainer( | |
| model=model, | |
| args=ppo_config, | |
| train_dataset=tokenized_dataset, | |
| tokenizer=tokenizer, | |
| ) | |
| # Entrenamiento | |
| ppo_trainer.train() | |
| ppo_trainer.save_model(outdir) | |
| tokenizer.save_pretrained(outdir) | |
| print("Entrenamiento PPO finalizado.") | |
| return outdir | |
| # ----------------- Conversión a FP16 ----------------- | |
| def _convert_to_fp16(self, model_dir: str, out_fp16_path: str): | |
| print(f"Convirtiendo modelo a FP16: {out_fp16_path}") | |
| convert_command = [ | |
| "python3", "/app/convert_hf_to_gguf.py", | |
| model_dir, | |
| "--outtype", "f16", | |
| "--outfile", out_fp16_path | |
| ] | |
| process = subprocess.Popen(convert_command, shell=False, stderr=subprocess.STDOUT) | |
| try: | |
| process.wait(timeout=self.HF_TO_GGUF_TIMEOUT) | |
| except subprocess.TimeoutExpired: | |
| process.send_signal(signal.SIGINT) | |
| try: | |
| process.wait(timeout=self.KILL_TIMEOUT) | |
| except subprocess.TimeoutExpired: | |
| process.kill() | |
| raise GGUFConverterError("Error convirtiendo a FP16: timeout") | |
| if process.returncode != 0: | |
| raise GGUFConverterError(f"Error FP16: código {process.returncode}") | |
| print("Conversión a FP16 completa.") | |
| return out_fp16_path | |
| # ----------------- Cuantización ----------------- | |
| def _quantize_model(self, fp16_path: str, gguf_path: str, method: str): | |
| print(f"Cuantizando modelo con método {method}") | |
| quantize_cmd = ["llama-quantize", fp16_path, gguf_path, method] | |
| process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT) | |
| try: | |
| process.wait(timeout=self.QUANTIZE_TIMEOUT) | |
| except subprocess.TimeoutExpired: | |
| process.send_signal(signal.SIGINT) | |
| try: | |
| process.wait(timeout=self.KILL_TIMEOUT) | |
| except subprocess.TimeoutExpired: | |
| process.kill() | |
| raise GGUFConverterError("Error cuantizando: timeout") | |
| if process.returncode != 0: | |
| raise GGUFConverterError(f"Error cuantizando: code={process.returncode}") | |
| print("Cuantización completada.") | |
| return gguf_path | |
| # ----------------- Publicación en HF ----------------- | |
| def _push_to_hf(self, local_dir: str, repo_name: str, token: str, private=False): | |
| print(f"Subiendo modelo a Hugging Face: {repo_name}") | |
| repo = Repository(local_dir, clone_from=f"{repo_name}", use_auth_token=token) | |
| repo.push_to_hub(commit_message="Modelo entrenado y cuantizado") | |
| print("Subida completada.") | |
| # ----------------- Pipeline completa ----------------- | |
| def run_full_pipeline(self, token, model_id, hf_repo_name): | |
| logs = [] | |
| try: | |
| outdir = self.OUTPUT_FOLDER | |
| model_name = Path(model_id).name | |
| quant_config = QuantizationConfig(method="Q4_0") | |
| quant_config.fp16_model = f"{outdir}/{model_name}.f16" | |
| quant_config.quantized_gguf = f"{outdir}/{model_name}.gguf" | |
| split_config = SplitConfig() | |
| output_config = OutputConfig(private_repo=False, repo_name=hf_repo_name) | |
| processing_config = ModelProcessingConfig( | |
| token=token, model_id=model_id, model_name=model_name, | |
| outdir=outdir, quant_config=quant_config, split_config=split_config, | |
| output_config=output_config | |
| ) | |
| # 1. Entrenar modelo con PPO | |
| trained_dir = self._train_model_with_ppo(model_id, DATASET_NAME, outdir, token) | |
| logs.append("Entrenamiento completado") | |
| # 2. Convertir a FP16 | |
| self._convert_to_fp16(trained_dir, quant_config.fp16_model) | |
| logs.append("Conversión a FP16 completada") | |
| # 3. Cuantizar | |
| self._quantize_model(quant_config.fp16_model, quant_config.quantized_gguf, quant_config.method) | |
| logs.append("Modelo cuantizado a GGUF") | |
| # 4. Subir a Hugging Face | |
| self._push_to_hf(trained_dir, hf_repo_name, token) | |
| logs.append(f"Modelo subido a Hugging Face: {hf_repo_name}") | |
| logs.append("✅ Pipeline completada correctamente") | |
| except Exception as e: | |
| logs.append(f"❌ ERROR: {e}") | |
| return "\n".join(logs) | |
| # ----------------- Interfaz Gradio ----------------- | |
| processor = HuggingFaceModelProcessor() | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Pipeline Automática GGUF con entrenamiento PPO y publicación HF") | |
| model_input = gr.Textbox(label="ID del modelo HF (para entrenamiento)", placeholder="ochoa/your-model") | |
| repo_input = gr.Textbox(label="Nombre del repo HF para publicar", placeholder="usuario/nuevo-modelo") | |
| token_input = gr.Textbox(label="Tu token HF", type="password") | |
| run_button = gr.Button("Entrenar, cuantizar y publicar") | |
| output_logs = gr.Textbox(label="Logs", lines=20) | |
| run_button.click( | |
| fn=lambda token, model_id, repo_name: processor.run_full_pipeline(token, model_id, repo_name), | |
| inputs=[token_input, model_input, repo_input], | |
| outputs=[output_logs] | |
| ) | |
| demo.launch() | |