File size: 7,936 Bytes
57cd4b7
f2c9828
e88cde7
ad94fc8
2d3ee83
ad94fc8
 
3bce3a9
5e23a70
029f19c
 
 
c12824a
3bce3a9
53ce1a4
 
 
 
ad94fc8
 
 
 
53ce1a4
2d3ee83
 
ad94fc8
 
 
 
 
 
 
 
 
 
 
 
2d3ee83
 
 
 
 
 
 
 
 
ad94fc8
 
 
 
 
8d40d9d
53ce1a4
8d40d9d
 
ad94fc8
 
eb0c0ed
ad94fc8
ccb548e
53ce1a4
 
 
029f19c
 
 
5e23a70
53ce1a4
5e23a70
029f19c
53ce1a4
029f19c
53ce1a4
 
 
 
 
029f19c
 
 
 
 
 
 
ccb548e
53ce1a4
 
029f19c
53ce1a4
029f19c
53ce1a4
029f19c
53ce1a4
 
029f19c
 
 
 
53ce1a4
029f19c
 
53ce1a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239afdd
53ce1a4
239afdd
53ce1a4
 
 
 
 
 
 
 
 
 
 
2ae55e9
239afdd
eb0c0ed
239afdd
2ae55e9
 
eb0c0ed
2ae55e9
 
8d40d9d
239afdd
8d40d9d
53ce1a4
 
 
5e23a70
 
 
 
 
 
 
 
 
8d40d9d
 
 
5e23a70
8d40d9d
 
 
 
5e23a70
8d40d9d
53ce1a4
 
 
8d40d9d
 
029f19c
 
53ce1a4
 
 
 
 
8d40d9d
53ce1a4
 
8d40d9d
 
5e23a70
 
 
 
53ce1a4
8d40d9d
 
53ce1a4
8d40d9d
 
 
 
 
ad94fc8
3bce3a9
029f19c
5e23a70
 
 
 
8d40d9d
 
 
5e23a70
 
8d40d9d
 
ad94fc8
3bce3a9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import os
import subprocess
import signal
from pathlib import Path
from dataclasses import dataclass, field

import gradio as gr
from datasets import load_dataset
from huggingface_hub import HfApi, Repository
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import PPOTrainer, PPOConfig
from trl import AutoModelForCausalLMWithValueHead

os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# ----------------- Configuración -----------------
DATASET_NAME = "openerotica/erotiquant3"

@dataclass
class QuantizationConfig:
    method: str
    train_data: str = DATASET_NAME
    fp16_model: str = field(default="", init=False)
    quantized_gguf: str = field(default="", init=False)

@dataclass
class SplitConfig:
    enabled: bool = False
    max_tensors: int = 256

@dataclass
class OutputConfig:
    private_repo: bool = False
    repo_name: str = ""
    filename: str = ""

@dataclass
class ModelProcessingConfig:
    token: str
    model_id: str
    model_name: str
    outdir: str
    quant_config: QuantizationConfig
    split_config: SplitConfig
    output_config: OutputConfig

class GGUFConverterError(Exception):
    pass

class HuggingFaceModelProcessor:
    HF_TO_GGUF_TIMEOUT = 3600
    QUANTIZE_TIMEOUT = 86400
    KILL_TIMEOUT = 5

    DOWNLOAD_FOLDER = "./downloads"
    OUTPUT_FOLDER = "./outputs"

    def __init__(self):
        self.HF_TOKEN = os.environ.get("HF_TOKEN")
        os.makedirs(self.DOWNLOAD_FOLDER, exist_ok=True)
        os.makedirs(self.OUTPUT_FOLDER, exist_ok=True)

    # ----------------- Entrenamiento con PPO -----------------
    def _train_model_with_ppo(self, model_id: str, dataset_name: str, outdir: str, token: str):
        print(f"Iniciando entrenamiento con PPO de {model_id} con dataset {dataset_name}")
        dataset = load_dataset(dataset_name, split="train", token=token)

        tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
        model = AutoModelForCausalLMWithValueHead.from_pretrained(model_id, token=token)

        # Tokenización
        def tokenize_function(examples):
            return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

        tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)

        # Configuración de PPO
        ppo_config = PPOConfig(
            model_name=model_id,
            train_batch_size=2,
            gradient_accumulation_steps=1,
            learning_rate=5e-6,
            max_length=512,
            num_train_epochs=1,
        )

        ppo_trainer = PPOTrainer(
            model=model,
            args=ppo_config,
            train_dataset=tokenized_dataset,
            tokenizer=tokenizer,
        )

        # Entrenamiento
        ppo_trainer.train()
        ppo_trainer.save_model(outdir)
        tokenizer.save_pretrained(outdir)

        print("Entrenamiento PPO finalizado.")
        return outdir

    # ----------------- Conversión a FP16 -----------------
    def _convert_to_fp16(self, model_dir: str, out_fp16_path: str):
        print(f"Convirtiendo modelo a FP16: {out_fp16_path}")
        convert_command = [
            "python3", "/app/convert_hf_to_gguf.py",
            model_dir,
            "--outtype", "f16",
            "--outfile", out_fp16_path
        ]
        process = subprocess.Popen(convert_command, shell=False, stderr=subprocess.STDOUT)
        try:
            process.wait(timeout=self.HF_TO_GGUF_TIMEOUT)
        except subprocess.TimeoutExpired:
            process.send_signal(signal.SIGINT)
            try:
                process.wait(timeout=self.KILL_TIMEOUT)
            except subprocess.TimeoutExpired:
                process.kill()
            raise GGUFConverterError("Error convirtiendo a FP16: timeout")
        if process.returncode != 0:
            raise GGUFConverterError(f"Error FP16: código {process.returncode}")
        print("Conversión a FP16 completa.")
        return out_fp16_path

    # ----------------- Cuantización -----------------
    def _quantize_model(self, fp16_path: str, gguf_path: str, method: str):
        print(f"Cuantizando modelo con método {method}")
        quantize_cmd = ["llama-quantize", fp16_path, gguf_path, method]
        process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
        try:
            process.wait(timeout=self.QUANTIZE_TIMEOUT)
        except subprocess.TimeoutExpired:
            process.send_signal(signal.SIGINT)
            try:
                process.wait(timeout=self.KILL_TIMEOUT)
            except subprocess.TimeoutExpired:
                process.kill()
            raise GGUFConverterError("Error cuantizando: timeout")
        if process.returncode != 0:
            raise GGUFConverterError(f"Error cuantizando: code={process.returncode}")
        print("Cuantización completada.")
        return gguf_path

    # ----------------- Publicación en HF -----------------
    def _push_to_hf(self, local_dir: str, repo_name: str, token: str, private=False):
        print(f"Subiendo modelo a Hugging Face: {repo_name}")
        repo = Repository(local_dir, clone_from=f"{repo_name}", use_auth_token=token)
        repo.push_to_hub(commit_message="Modelo entrenado y cuantizado")
        print("Subida completada.")

    # ----------------- Pipeline completa -----------------
    def run_full_pipeline(self, token, model_id, hf_repo_name):
        logs = []
        try:
            outdir = self.OUTPUT_FOLDER
            model_name = Path(model_id).name
            quant_config = QuantizationConfig(method="Q4_0")
            quant_config.fp16_model = f"{outdir}/{model_name}.f16"
            quant_config.quantized_gguf = f"{outdir}/{model_name}.gguf"
            split_config = SplitConfig()
            output_config = OutputConfig(private_repo=False, repo_name=hf_repo_name)
            processing_config = ModelProcessingConfig(
                token=token, model_id=model_id, model_name=model_name,
                outdir=outdir, quant_config=quant_config, split_config=split_config,
                output_config=output_config
            )

            # 1. Entrenar modelo con PPO
            trained_dir = self._train_model_with_ppo(model_id, DATASET_NAME, outdir, token)
            logs.append("Entrenamiento completado")

            # 2. Convertir a FP16
            self._convert_to_fp16(trained_dir, quant_config.fp16_model)
            logs.append("Conversión a FP16 completada")

            # 3. Cuantizar
            self._quantize_model(quant_config.fp16_model, quant_config.quantized_gguf, quant_config.method)
            logs.append("Modelo cuantizado a GGUF")

            # 4. Subir a Hugging Face
            self._push_to_hf(trained_dir, hf_repo_name, token)
            logs.append(f"Modelo subido a Hugging Face: {hf_repo_name}")

            logs.append("✅ Pipeline completada correctamente")

        except Exception as e:
            logs.append(f"❌ ERROR: {e}")

        return "\n".join(logs)

# ----------------- Interfaz Gradio -----------------
processor = HuggingFaceModelProcessor()

with gr.Blocks() as demo:
    gr.Markdown("## Pipeline Automática GGUF con entrenamiento PPO y publicación HF")
    model_input = gr.Textbox(label="ID del modelo HF (para entrenamiento)", placeholder="ochoa/your-model")
    repo_input = gr.Textbox(label="Nombre del repo HF para publicar", placeholder="usuario/nuevo-modelo")
    token_input = gr.Textbox(label="Tu token HF", type="password")
    run_button = gr.Button("Entrenar, cuantizar y publicar")
    output_logs = gr.Textbox(label="Logs", lines=20)

    run_button.click(
        fn=lambda token, model_id, repo_name: processor.run_full_pipeline(token, model_id, repo_name),
        inputs=[token_input, model_input, repo_input],
        outputs=[output_logs]
    )

demo.launch()