Spaces:

Novaciano
/

Train-With-Erotiquant3

Sleeping

File size: 7,936 Bytes

57cd4b7
f2c9828
e88cde7
ad94fc8
2d3ee83
ad94fc8
 
3bce3a9
5e23a70
029f19c
 
 
c12824a
3bce3a9
53ce1a4
 
 
 
ad94fc8
 
 
 
53ce1a4
2d3ee83
 
ad94fc8
 
 
 
 
 
 
 
 
 
 
 
2d3ee83
 
 
 
 
 
 
 
 
ad94fc8
 
 
 
 
8d40d9d
53ce1a4
8d40d9d
 
ad94fc8
 
eb0c0ed
ad94fc8
ccb548e
53ce1a4
 
 
029f19c
 
 
5e23a70
53ce1a4
5e23a70
029f19c
53ce1a4
029f19c
53ce1a4
 
 
 
 
029f19c
 
 
 
 
 
 
ccb548e
53ce1a4
 
029f19c
53ce1a4
029f19c
53ce1a4
029f19c
53ce1a4
 
029f19c
 
 
 
53ce1a4
029f19c
 
53ce1a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239afdd
53ce1a4
239afdd
53ce1a4
 
 
 
 
 
 
 
 
 
 
2ae55e9
239afdd
eb0c0ed
239afdd
2ae55e9
 
eb0c0ed
2ae55e9
 
8d40d9d
239afdd
8d40d9d
53ce1a4
 
 
5e23a70
 
 
 
 
 
 
 
 
8d40d9d
 
 
5e23a70
8d40d9d
 
 
 
5e23a70
8d40d9d
53ce1a4
 
 
8d40d9d
 
029f19c
 
53ce1a4
 
 
 
 
8d40d9d
53ce1a4
 
8d40d9d
 
5e23a70
 
 
 
53ce1a4
8d40d9d
 
53ce1a4
8d40d9d
 
 
 
 
ad94fc8
3bce3a9
029f19c
5e23a70
 
 
 
8d40d9d
 
 
5e23a70
 
8d40d9d
 
ad94fc8
3bce3a9

import os
import subprocess
import signal
from pathlib import Path
from dataclasses import dataclass, field

import gradio as gr
from datasets import load_dataset
from huggingface_hub import HfApi, Repository
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import PPOTrainer, PPOConfig
from trl import AutoModelForCausalLMWithValueHead

os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# ----------------- Configuración -----------------
DATASET_NAME = "openerotica/erotiquant3"

@dataclass
class QuantizationConfig:
    method: str
    train_data: str = DATASET_NAME
    fp16_model: str = field(default="", init=False)
    quantized_gguf: str = field(default="", init=False)

@dataclass
class SplitConfig:
    enabled: bool = False
    max_tensors: int = 256

@dataclass
class OutputConfig:
    private_repo: bool = False
    repo_name: str = ""
    filename: str = ""

@dataclass
class ModelProcessingConfig:
    token: str
    model_id: str
    model_name: str
    outdir: str
    quant_config: QuantizationConfig
    split_config: SplitConfig
    output_config: OutputConfig

class GGUFConverterError(Exception):
    pass

class HuggingFaceModelProcessor:
    HF_TO_GGUF_TIMEOUT = 3600
    QUANTIZE_TIMEOUT = 86400
    KILL_TIMEOUT = 5

    DOWNLOAD_FOLDER = "./downloads"
    OUTPUT_FOLDER = "./outputs"

    def __init__(self):
        self.HF_TOKEN = os.environ.get("HF_TOKEN")
        os.makedirs(self.DOWNLOAD_FOLDER, exist_ok=True)
        os.makedirs(self.OUTPUT_FOLDER, exist_ok=True)

    # ----------------- Entrenamiento con PPO -----------------
    def _train_model_with_ppo(self, model_id: str, dataset_name: str, outdir: str, token: str):
        print(f"Iniciando entrenamiento con PPO de {model_id} con dataset {dataset_name}")
        dataset = load_dataset(dataset_name, split="train", token=token)

        tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
        model = AutoModelForCausalLMWithValueHead.from_pretrained(model_id, token=token)

        # Tokenización
        def tokenize_function(examples):
            return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

        tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)

        # Configuración de PPO
        ppo_config = PPOConfig(
            model_name=model_id,
            train_batch_size=2,
            gradient_accumulation_steps=1,
            learning_rate=5e-6,
            max_length=512,
            num_train_epochs=1,
        )

        ppo_trainer = PPOTrainer(
            model=model,
            args=ppo_config,
            train_dataset=tokenized_dataset,
            tokenizer=tokenizer,
        )

        # Entrenamiento
        ppo_trainer.train()
        ppo_trainer.save_model(outdir)
        tokenizer.save_pretrained(outdir)

        print("Entrenamiento PPO finalizado.")
        return outdir

    # ----------------- Conversión a FP16 -----------------
    def _convert_to_fp16(self, model_dir: str, out_fp16_path: str):
        print(f"Convirtiendo modelo a FP16: {out_fp16_path}")
        convert_command = [
            "python3", "/app/convert_hf_to_gguf.py",
            model_dir,
            "--outtype", "f16",
            "--outfile", out_fp16_path
        ]
        process = subprocess.Popen(convert_command, shell=False, stderr=subprocess.STDOUT)
        try:
            process.wait(timeout=self.HF_TO_GGUF_TIMEOUT)
        except subprocess.TimeoutExpired:
            process.send_signal(signal.SIGINT)
            try:
                process.wait(timeout=self.KILL_TIMEOUT)
            except subprocess.TimeoutExpired:
                process.kill()
            raise GGUFConverterError("Error convirtiendo a FP16: timeout")
        if process.returncode != 0:
            raise GGUFConverterError(f"Error FP16: código {process.returncode}")
        print("Conversión a FP16 completa.")
        return out_fp16_path

    # ----------------- Cuantización -----------------
    def _quantize_model(self, fp16_path: str, gguf_path: str, method: str):
        print(f"Cuantizando modelo con método {method}")
        quantize_cmd = ["llama-quantize", fp16_path, gguf_path, method]
        process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
        try:
            process.wait(timeout=self.QUANTIZE_TIMEOUT)
        except subprocess.TimeoutExpired:
            process.send_signal(signal.SIGINT)
            try:
                process.wait(timeout=self.KILL_TIMEOUT)
            except subprocess.TimeoutExpired:
                process.kill()
            raise GGUFConverterError("Error cuantizando: timeout")
        if process.returncode != 0:
            raise GGUFConverterError(f"Error cuantizando: code={process.returncode}")
        print("Cuantización completada.")
        return gguf_path

    # ----------------- Publicación en HF -----------------
    def _push_to_hf(self, local_dir: str, repo_name: str, token: str, private=False):
        print(f"Subiendo modelo a Hugging Face: {repo_name}")
        repo = Repository(local_dir, clone_from=f"{repo_name}", use_auth_token=token)
        repo.push_to_hub(commit_message="Modelo entrenado y cuantizado")
        print("Subida completada.")

    # ----------------- Pipeline completa -----------------
    def run_full_pipeline(self, token, model_id, hf_repo_name):
        logs = []
        try:
            outdir = self.OUTPUT_FOLDER
            model_name = Path(model_id).name
            quant_config = QuantizationConfig(method="Q4_0")
            quant_config.fp16_model = f"{outdir}/{model_name}.f16"
            quant_config.quantized_gguf = f"{outdir}/{model_name}.gguf"
            split_config = SplitConfig()
            output_config = OutputConfig(private_repo=False, repo_name=hf_repo_name)
            processing_config = ModelProcessingConfig(
                token=token, model_id=model_id, model_name=model_name,
                outdir=outdir, quant_config=quant_config, split_config=split_config,
                output_config=output_config
            )

            # 1. Entrenar modelo con PPO
            trained_dir = self._train_model_with_ppo(model_id, DATASET_NAME, outdir, token)
            logs.append("Entrenamiento completado")

            # 2. Convertir a FP16
            self._convert_to_fp16(trained_dir, quant_config.fp16_model)
            logs.append("Conversión a FP16 completada")

            # 3. Cuantizar
            self._quantize_model(quant_config.fp16_model, quant_config.quantized_gguf, quant_config.method)
            logs.append("Modelo cuantizado a GGUF")

            # 4. Subir a Hugging Face
            self._push_to_hf(trained_dir, hf_repo_name, token)
            logs.append(f"Modelo subido a Hugging Face: {hf_repo_name}")

            logs.append("✅ Pipeline completada correctamente")

        except Exception as e:
            logs.append(f"❌ ERROR: {e}")

        return "\n".join(logs)

# ----------------- Interfaz Gradio -----------------
processor = HuggingFaceModelProcessor()

with gr.Blocks() as demo:
    gr.Markdown("## Pipeline Automática GGUF con entrenamiento PPO y publicación HF")
    model_input = gr.Textbox(label="ID del modelo HF (para entrenamiento)", placeholder="ochoa/your-model")
    repo_input = gr.Textbox(label="Nombre del repo HF para publicar", placeholder="usuario/nuevo-modelo")
    token_input = gr.Textbox(label="Tu token HF", type="password")
    run_button = gr.Button("Entrenar, cuantizar y publicar")
    output_logs = gr.Textbox(label="Logs", lines=20)

    run_button.click(
        fn=lambda token, model_id, repo_name: processor.run_full_pipeline(token, model_id, repo_name),
        inputs=[token_input, model_input, repo_input],
        outputs=[output_logs]
    )

demo.launch()