Spaces:
Running
Running
Execute PPT, poster, and PR separately
Browse files
app.py
CHANGED
|
@@ -54,71 +54,143 @@ def save_api_key(api_key):
|
|
| 54 |
except Exception as e:
|
| 55 |
return f"❌ 出错: {str(e)}", get_debug_info()
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
|
|
|
| 59 |
if not os.path.exists(PAPERS_DIR) or not any(f.endswith('.pdf') for f in os.listdir(PAPERS_DIR)):
|
| 60 |
return "❌ 未发现 PDF 文件", get_debug_info(), "No execution logs."
|
| 61 |
|
| 62 |
try:
|
|
|
|
| 63 |
env = os.environ.copy()
|
| 64 |
env["MINERU_FORMULA_ENABLE"] = "false"
|
| 65 |
env["MINERU_TABLE_ENABLE"] = "false"
|
| 66 |
env["MINERU_DEVICE_MODE"] = "cpu"
|
| 67 |
env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
|
| 68 |
|
| 69 |
-
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
env=env,
|
| 74 |
capture_output=True,
|
| 75 |
text=True,
|
| 76 |
timeout=300
|
| 77 |
)
|
| 78 |
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
if
|
| 82 |
-
status = "✅ PDF解析完成"
|
| 83 |
else:
|
| 84 |
-
status = f"❌
|
| 85 |
|
| 86 |
return status, get_debug_info(), full_log
|
| 87 |
|
|
|
|
|
|
|
|
|
|
| 88 |
except Exception as e:
|
| 89 |
error_log = f"Exception occurred during execution:\n{str(e)}"
|
| 90 |
return "❌ 运行异常", get_debug_info(), error_log
|
| 91 |
|
| 92 |
-
def run_final_generation():
|
| 93 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 94 |
if not os.path.exists(OUTPUT_DIR):
|
| 95 |
return "❌ 请先执行第二步解析", get_debug_info(), "No output folder found.", None
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
result = subprocess.run(
|
| 101 |
command,
|
| 102 |
capture_output=True,
|
| 103 |
text=True,
|
| 104 |
-
timeout=600 #
|
| 105 |
)
|
|
|
|
| 106 |
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
|
|
|
| 111 |
|
| 112 |
-
#
|
| 113 |
-
# shutil.make_archive 会自动加上 .zip 后缀,所以 base_name 不带后缀
|
| 114 |
zip_base_name = ZIP_OUTPUT_PATH.replace(".zip", "")
|
| 115 |
shutil.make_archive(zip_base_name, 'zip', OUTPUT_DIR)
|
| 116 |
|
| 117 |
-
|
|
|
|
| 118 |
|
| 119 |
except Exception as e:
|
| 120 |
-
error_log = f"Exception occurred
|
| 121 |
-
return "❌ 最终生成异常", get_debug_info(), error_log, None
|
| 122 |
|
| 123 |
# --- UI ---
|
| 124 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
@@ -136,12 +208,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 136 |
|
| 137 |
with gr.Group():
|
| 138 |
gr.Markdown("### 2. 执行解析")
|
| 139 |
-
parse_btn = gr.Button("🚀 Run Mineru
|
| 140 |
parse_status = gr.Textbox(label="运行状态")
|
| 141 |
|
| 142 |
with gr.Group():
|
| 143 |
gr.Markdown("### 3. 最终生成")
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
gen_status = gr.Textbox(label="生成状态")
|
| 146 |
download_file = gr.File(label="下载压缩后的结果", interactive=False)
|
| 147 |
|
|
@@ -158,20 +236,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 158 |
)
|
| 159 |
refresh_btn = gr.Button("🔄 刷新状态")
|
| 160 |
|
| 161 |
-
# 逻辑绑定
|
| 162 |
key_btn.click(save_api_key, inputs=key_input, outputs=[parse_status, debug_view])
|
| 163 |
pdf_btn.click(save_pdf, inputs=pdf_input, outputs=[parse_status, debug_view])
|
| 164 |
|
| 165 |
parse_btn.click(
|
| 166 |
-
fn=
|
| 167 |
outputs=[parse_status, debug_view, cmd_logs]
|
| 168 |
)
|
| 169 |
|
| 170 |
-
# 最终生成逻辑绑定
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
)
|
| 175 |
|
| 176 |
refresh_btn.click(get_debug_info, outputs=debug_view)
|
| 177 |
|
|
|
|
| 54 |
except Exception as e:
|
| 55 |
return f"❌ 出错: {str(e)}", get_debug_info()
|
| 56 |
|
| 57 |
+
|
| 58 |
+
def run_mineru_parsing_and_dag_gen():
|
| 59 |
+
"""执行 PDF 解析并捕获完整日志,随后执行DAG生成流程"""
|
| 60 |
if not os.path.exists(PAPERS_DIR) or not any(f.endswith('.pdf') for f in os.listdir(PAPERS_DIR)):
|
| 61 |
return "❌ 未发现 PDF 文件", get_debug_info(), "No execution logs."
|
| 62 |
|
| 63 |
try:
|
| 64 |
+
# ================= 第一步:执行 Mineru 解析 =================
|
| 65 |
env = os.environ.copy()
|
| 66 |
env["MINERU_FORMULA_ENABLE"] = "false"
|
| 67 |
env["MINERU_TABLE_ENABLE"] = "false"
|
| 68 |
env["MINERU_DEVICE_MODE"] = "cpu"
|
| 69 |
env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
|
| 70 |
|
| 71 |
+
command_mineru = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
|
| 72 |
|
| 73 |
+
result_mineru = subprocess.run(
|
| 74 |
+
command_mineru,
|
| 75 |
env=env,
|
| 76 |
capture_output=True,
|
| 77 |
text=True,
|
| 78 |
timeout=300
|
| 79 |
)
|
| 80 |
|
| 81 |
+
# 记录 Mineru 的日志
|
| 82 |
+
full_log = f"--- Mineru STDOUT ---\n{result_mineru.stdout}\n\n--- Mineru STDERR ---\n{result_mineru.stderr}\n"
|
| 83 |
+
|
| 84 |
+
# 如果解析失败,直接返回,不执行后续 DAG 生成
|
| 85 |
+
if result_mineru.returncode != 0:
|
| 86 |
+
status = f"❌ Mineru 解析失败 (Exit Code: {result_mineru.returncode})"
|
| 87 |
+
return status, get_debug_info(), full_log
|
| 88 |
+
|
| 89 |
+
# ================= 第二步:执行 DAG 生成 =================
|
| 90 |
+
# 使用 sys.executable 确保使用当前虚拟环境的 Python 解释器
|
| 91 |
+
command_dag = [sys.executable, "gen_dag.py"]
|
| 92 |
+
|
| 93 |
+
result_dag = subprocess.run(
|
| 94 |
+
command_dag,
|
| 95 |
+
capture_output=True,
|
| 96 |
+
text=True,
|
| 97 |
+
timeout=300 # 可以根据你 DAG 生成的时间需求调整超时
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
# 追加 DAG 生成的日志
|
| 101 |
+
full_log += f"\n--- DAG Gen STDOUT ---\n{result_dag.stdout}\n\n--- DAG Gen STDERR ---\n{result_dag.stderr}"
|
| 102 |
|
| 103 |
+
if result_dag.returncode == 0:
|
| 104 |
+
status = "✅ PDF解析与DAG生成全部完成"
|
| 105 |
else:
|
| 106 |
+
status = f"❌ DAG生成失败 (Exit Code: {result_dag.returncode})"
|
| 107 |
|
| 108 |
return status, get_debug_info(), full_log
|
| 109 |
|
| 110 |
+
except subprocess.TimeoutExpired as e:
|
| 111 |
+
error_log = f"Execution timed out:\n{str(e)}"
|
| 112 |
+
return "❌ 运行超时", get_debug_info(), error_log
|
| 113 |
except Exception as e:
|
| 114 |
error_log = f"Exception occurred during execution:\n{str(e)}"
|
| 115 |
return "❌ 运行异常", get_debug_info(), error_log
|
| 116 |
|
| 117 |
+
def run_final_generation(task_type="all"):
|
| 118 |
+
"""
|
| 119 |
+
执行对应的生成脚本并压缩结果(支持并行执行)
|
| 120 |
+
task_type 支持: 'ppt', 'poster', 'pr', 'all'
|
| 121 |
+
"""
|
| 122 |
if not os.path.exists(OUTPUT_DIR):
|
| 123 |
return "❌ 请先执行第二步解析", get_debug_info(), "No output folder found.", None
|
| 124 |
|
| 125 |
+
# 根据传入的 task_type 决定要运行哪些脚本
|
| 126 |
+
scripts_to_run = []
|
| 127 |
+
if task_type == "ppt":
|
| 128 |
+
scripts_to_run = ["gen_ppt.py"]
|
| 129 |
+
elif task_type == "poster":
|
| 130 |
+
scripts_to_run = ["gen_poster.py"]
|
| 131 |
+
elif task_type == "pr":
|
| 132 |
+
scripts_to_run = ["gen_pr.py"]
|
| 133 |
+
elif task_type == "all":
|
| 134 |
+
scripts_to_run = ["gen_ppt.py", "gen_poster.py", "gen_pr.py"]
|
| 135 |
+
else:
|
| 136 |
+
return "❌ 未知任务类型", get_debug_info(), "Invalid task_type.", None
|
| 137 |
+
|
| 138 |
+
full_log = f"🚀 准备启动 {len(scripts_to_run)} 个任务...\n"
|
| 139 |
+
success = True
|
| 140 |
+
|
| 141 |
+
# 定义单个脚本的执行包装器
|
| 142 |
+
def execute_script(script):
|
| 143 |
+
command = [sys.executable, script]
|
| 144 |
result = subprocess.run(
|
| 145 |
command,
|
| 146 |
capture_output=True,
|
| 147 |
text=True,
|
| 148 |
+
timeout=600 # 每个脚本独立的超时时间
|
| 149 |
)
|
| 150 |
+
return script, result
|
| 151 |
|
| 152 |
+
try:
|
| 153 |
+
# 使用 ThreadPoolExecutor 并行执行脚本
|
| 154 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=len(scripts_to_run)) as executor:
|
| 155 |
+
# 提交所有任务
|
| 156 |
+
future_to_script = {executor.submit(execute_script, s): s for s in scripts_to_run}
|
| 157 |
+
|
| 158 |
+
# as_completed 会在某个任务完成时立刻生成结果
|
| 159 |
+
for future in concurrent.futures.as_completed(future_to_script):
|
| 160 |
+
script_name = future_to_script[future]
|
| 161 |
+
try:
|
| 162 |
+
# 获取该任务的执行结果
|
| 163 |
+
_, result = future.result()
|
| 164 |
+
|
| 165 |
+
full_log += f"\n================ ✅ 执行完成: {script_name} ================\n"
|
| 166 |
+
full_log += f"--- STDOUT ---\n{result.stdout}\n\n--- STDERR ---\n{result.stderr}\n"
|
| 167 |
+
|
| 168 |
+
# 检查此任务是否失败
|
| 169 |
+
if result.returncode != 0:
|
| 170 |
+
success = False
|
| 171 |
+
full_log += f"❌ [错误] {script_name} 返回非零退出码 (Exit Code: {result.returncode})\n"
|
| 172 |
+
|
| 173 |
+
except subprocess.TimeoutExpired as e:
|
| 174 |
+
success = False
|
| 175 |
+
full_log += f"\n================ ❌ 任务超时: {script_name} ================\n{str(e)}\n"
|
| 176 |
+
except Exception as e:
|
| 177 |
+
success = False
|
| 178 |
+
full_log += f"\n================ ❌ 任务异常: {script_name} ================\n{str(e)}\n"
|
| 179 |
|
| 180 |
+
# 如果有任何一个脚本执行失败,直接返回,不打包压缩
|
| 181 |
+
if not success:
|
| 182 |
+
return f"❌ {task_type.upper()} 包含失败任务,请检查日志", get_debug_info(), full_log, None
|
| 183 |
|
| 184 |
+
# 所有脚本都运行成功后,压缩 mineru_outputs 文件夹
|
|
|
|
| 185 |
zip_base_name = ZIP_OUTPUT_PATH.replace(".zip", "")
|
| 186 |
shutil.make_archive(zip_base_name, 'zip', OUTPUT_DIR)
|
| 187 |
|
| 188 |
+
success_msg = f"✅ {task_type.upper()} 生成并压缩完成"
|
| 189 |
+
return success_msg, get_debug_info(), full_log, ZIP_OUTPUT_PATH
|
| 190 |
|
| 191 |
except Exception as e:
|
| 192 |
+
error_log = full_log + f"\n[全局异常] Exception occurred:\n{str(e)}"
|
| 193 |
+
return "❌ 最终生成发生全局异常", get_debug_info(), error_log, None
|
| 194 |
|
| 195 |
# --- UI ---
|
| 196 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
|
|
| 208 |
|
| 209 |
with gr.Group():
|
| 210 |
gr.Markdown("### 2. 执行解析")
|
| 211 |
+
parse_btn = gr.Button("🚀 Run Mineru & DAG Gen", variant="primary")
|
| 212 |
parse_status = gr.Textbox(label="运行状态")
|
| 213 |
|
| 214 |
with gr.Group():
|
| 215 |
gr.Markdown("### 3. 最终生成")
|
| 216 |
+
gr.Markdown("请选择要生成的内容类型:")
|
| 217 |
+
with gr.Row():
|
| 218 |
+
gen_ppt_btn = gr.Button("📊 生成 PPT")
|
| 219 |
+
gen_poster_btn = gr.Button("🖼️ 生成 Poster")
|
| 220 |
+
gen_pr_btn = gr.Button("📰 生成 PR")
|
| 221 |
+
gen_all_btn = gr.Button("🔨 生成全部 (ALL)", variant="primary")
|
| 222 |
+
|
| 223 |
gen_status = gr.Textbox(label="生成状态")
|
| 224 |
download_file = gr.File(label="下载压缩后的结果", interactive=False)
|
| 225 |
|
|
|
|
| 236 |
)
|
| 237 |
refresh_btn = gr.Button("🔄 刷新状态")
|
| 238 |
|
| 239 |
+
# ================= 逻辑绑定 =================
|
| 240 |
key_btn.click(save_api_key, inputs=key_input, outputs=[parse_status, debug_view])
|
| 241 |
pdf_btn.click(save_pdf, inputs=pdf_input, outputs=[parse_status, debug_view])
|
| 242 |
|
| 243 |
parse_btn.click(
|
| 244 |
+
fn=run_mineru_parsing_and_dag_gen,
|
| 245 |
outputs=[parse_status, debug_view, cmd_logs]
|
| 246 |
)
|
| 247 |
|
| 248 |
+
# 最终生成逻辑绑定 (使用 lambda 传递固定的 task_type 参数)
|
| 249 |
+
gen_ppt_btn.click(fn=lambda: run_final_generation("ppt"), outputs=[gen_status, debug_view, cmd_logs, download_file])
|
| 250 |
+
gen_poster_btn.click(fn=lambda: run_final_generation("poster"), outputs=[gen_status, debug_view, cmd_logs, download_file])
|
| 251 |
+
gen_pr_btn.click(fn=lambda: run_final_generation("pr"), outputs=[gen_status, debug_view, cmd_logs, download_file])
|
| 252 |
+
gen_all_btn.click(fn=lambda: run_final_generation("all"), outputs=[gen_status, debug_view, cmd_logs, download_file])
|
| 253 |
|
| 254 |
refresh_btn.click(get_debug_info, outputs=debug_view)
|
| 255 |
|