Laramie2 commited on
Commit
9aca477
·
verified ·
1 Parent(s): 1bb0075

Execute PPT, poster, and PR separately

Browse files
Files changed (1) hide show
  1. app.py +110 -32
app.py CHANGED
@@ -54,71 +54,143 @@ def save_api_key(api_key):
54
  except Exception as e:
55
  return f"❌ 出错: {str(e)}", get_debug_info()
56
 
57
- def run_mineru_parsing():
58
- """执行 PDF 解析并捕获完整日志"""
 
59
  if not os.path.exists(PAPERS_DIR) or not any(f.endswith('.pdf') for f in os.listdir(PAPERS_DIR)):
60
  return "❌ 未发现 PDF 文件", get_debug_info(), "No execution logs."
61
 
62
  try:
 
63
  env = os.environ.copy()
64
  env["MINERU_FORMULA_ENABLE"] = "false"
65
  env["MINERU_TABLE_ENABLE"] = "false"
66
  env["MINERU_DEVICE_MODE"] = "cpu"
67
  env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
68
 
69
- command = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
70
 
71
- result = subprocess.run(
72
- command,
73
  env=env,
74
  capture_output=True,
75
  text=True,
76
  timeout=300
77
  )
78
 
79
- full_log = f"--- STDOUT ---\n{result.stdout}\n\n--- STDERR ---\n{result.stderr}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- if result.returncode == 0:
82
- status = "✅ PDF解析完成"
83
  else:
84
- status = f"❌ 解析失败 (Exit Code: {result.returncode})"
85
 
86
  return status, get_debug_info(), full_log
87
 
 
 
 
88
  except Exception as e:
89
  error_log = f"Exception occurred during execution:\n{str(e)}"
90
  return "❌ 运行异常", get_debug_info(), error_log
91
 
92
- def run_final_generation():
93
- """执行 python main.py 并压缩结果"""
 
 
 
94
  if not os.path.exists(OUTPUT_DIR):
95
  return "❌ 请先执行第二步解析", get_debug_info(), "No output folder found.", None
96
 
97
- try:
98
- # 1. 运行 python main.py
99
- command = [sys.executable, "main.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  result = subprocess.run(
101
  command,
102
  capture_output=True,
103
  text=True,
104
- timeout=600 # 适当增加超时时间
105
  )
 
106
 
107
- full_log = f"--- STDOUT ---\n{result.stdout}\n\n--- STDERR ---\n{result.stderr}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- if result.returncode != 0:
110
- return f"❌ 生成失败 (Exit Code: {result.returncode})", get_debug_info(), full_log, None
 
111
 
112
- # 2. 压缩 mineru_outputs 文件夹
113
- # shutil.make_archive 会自动加上 .zip 后缀,所以 base_name 不带后缀
114
  zip_base_name = ZIP_OUTPUT_PATH.replace(".zip", "")
115
  shutil.make_archive(zip_base_name, 'zip', OUTPUT_DIR)
116
 
117
- return "✅ 最终生成并压缩完成", get_debug_info(), full_log, ZIP_OUTPUT_PATH
 
118
 
119
  except Exception as e:
120
- error_log = f"Exception occurred during final generation:\n{str(e)}"
121
- return "❌ 最终生成异常", get_debug_info(), error_log, None
122
 
123
  # --- UI ---
124
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -136,12 +208,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
136
 
137
  with gr.Group():
138
  gr.Markdown("### 2. 执行解析")
139
- parse_btn = gr.Button("🚀 Run Mineru (CPU Mode)", variant="primary")
140
  parse_status = gr.Textbox(label="运行状态")
141
 
142
  with gr.Group():
143
  gr.Markdown("### 3. 最终生成")
144
- gen_btn = gr.Button("🔨 执行 main.py 并打包", variant="primary")
 
 
 
 
 
 
145
  gen_status = gr.Textbox(label="生成状态")
146
  download_file = gr.File(label="下载压缩后的结果", interactive=False)
147
 
@@ -158,20 +236,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
158
  )
159
  refresh_btn = gr.Button("🔄 刷新状态")
160
 
161
- # 逻辑绑定
162
  key_btn.click(save_api_key, inputs=key_input, outputs=[parse_status, debug_view])
163
  pdf_btn.click(save_pdf, inputs=pdf_input, outputs=[parse_status, debug_view])
164
 
165
  parse_btn.click(
166
- fn=run_mineru_parsing,
167
  outputs=[parse_status, debug_view, cmd_logs]
168
  )
169
 
170
- # 最终生成逻辑绑定
171
- gen_btn.click(
172
- fn=run_final_generation,
173
- outputs=[gen_status, debug_view, cmd_logs, download_file]
174
- )
175
 
176
  refresh_btn.click(get_debug_info, outputs=debug_view)
177
 
 
54
  except Exception as e:
55
  return f"❌ 出错: {str(e)}", get_debug_info()
56
 
57
+
58
+ def run_mineru_parsing_and_dag_gen():
59
+ """执行 PDF 解析并捕获完整日志,随后执行DAG生成流程"""
60
  if not os.path.exists(PAPERS_DIR) or not any(f.endswith('.pdf') for f in os.listdir(PAPERS_DIR)):
61
  return "❌ 未发现 PDF 文件", get_debug_info(), "No execution logs."
62
 
63
  try:
64
+ # ================= 第一步:执行 Mineru 解析 =================
65
  env = os.environ.copy()
66
  env["MINERU_FORMULA_ENABLE"] = "false"
67
  env["MINERU_TABLE_ENABLE"] = "false"
68
  env["MINERU_DEVICE_MODE"] = "cpu"
69
  env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
70
 
71
+ command_mineru = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
72
 
73
+ result_mineru = subprocess.run(
74
+ command_mineru,
75
  env=env,
76
  capture_output=True,
77
  text=True,
78
  timeout=300
79
  )
80
 
81
+ # 记录 Mineru 的日志
82
+ full_log = f"--- Mineru STDOUT ---\n{result_mineru.stdout}\n\n--- Mineru STDERR ---\n{result_mineru.stderr}\n"
83
+
84
+ # 如果解析失败,直接返回,不执行后续 DAG 生成
85
+ if result_mineru.returncode != 0:
86
+ status = f"❌ Mineru 解析失败 (Exit Code: {result_mineru.returncode})"
87
+ return status, get_debug_info(), full_log
88
+
89
+ # ================= 第二步:执行 DAG 生成 =================
90
+ # 使用 sys.executable 确保使用当前虚拟环境的 Python 解释器
91
+ command_dag = [sys.executable, "gen_dag.py"]
92
+
93
+ result_dag = subprocess.run(
94
+ command_dag,
95
+ capture_output=True,
96
+ text=True,
97
+ timeout=300 # 可以根据你 DAG 生成的时间需求调整超时
98
+ )
99
+
100
+ # 追加 DAG 生成的日志
101
+ full_log += f"\n--- DAG Gen STDOUT ---\n{result_dag.stdout}\n\n--- DAG Gen STDERR ---\n{result_dag.stderr}"
102
 
103
+ if result_dag.returncode == 0:
104
+ status = "✅ PDF解析与DAG生成全部完成"
105
  else:
106
+ status = f"❌ DAG生成失败 (Exit Code: {result_dag.returncode})"
107
 
108
  return status, get_debug_info(), full_log
109
 
110
+ except subprocess.TimeoutExpired as e:
111
+ error_log = f"Execution timed out:\n{str(e)}"
112
+ return "❌ 运行超时", get_debug_info(), error_log
113
  except Exception as e:
114
  error_log = f"Exception occurred during execution:\n{str(e)}"
115
  return "❌ 运行异常", get_debug_info(), error_log
116
 
117
+ def run_final_generation(task_type="all"):
118
+ """
119
+ 执行对应的生成脚本并压缩结果(支持并行执行)
120
+ task_type 支持: 'ppt', 'poster', 'pr', 'all'
121
+ """
122
  if not os.path.exists(OUTPUT_DIR):
123
  return "❌ 请先执行第二步解析", get_debug_info(), "No output folder found.", None
124
 
125
+ # 根据传入的 task_type 决定要运行哪些脚本
126
+ scripts_to_run = []
127
+ if task_type == "ppt":
128
+ scripts_to_run = ["gen_ppt.py"]
129
+ elif task_type == "poster":
130
+ scripts_to_run = ["gen_poster.py"]
131
+ elif task_type == "pr":
132
+ scripts_to_run = ["gen_pr.py"]
133
+ elif task_type == "all":
134
+ scripts_to_run = ["gen_ppt.py", "gen_poster.py", "gen_pr.py"]
135
+ else:
136
+ return "❌ 未知任务类型", get_debug_info(), "Invalid task_type.", None
137
+
138
+ full_log = f"🚀 准备启动 {len(scripts_to_run)} 个任务...\n"
139
+ success = True
140
+
141
+ # 定义单个脚本的执行包装器
142
+ def execute_script(script):
143
+ command = [sys.executable, script]
144
  result = subprocess.run(
145
  command,
146
  capture_output=True,
147
  text=True,
148
+ timeout=600 # 每个脚本独立的超时时间
149
  )
150
+ return script, result
151
 
152
+ try:
153
+ # 使用 ThreadPoolExecutor 并行执行脚本
154
+ with concurrent.futures.ThreadPoolExecutor(max_workers=len(scripts_to_run)) as executor:
155
+ # 提交所有任务
156
+ future_to_script = {executor.submit(execute_script, s): s for s in scripts_to_run}
157
+
158
+ # as_completed 会在某个任务完成时立刻生成结果
159
+ for future in concurrent.futures.as_completed(future_to_script):
160
+ script_name = future_to_script[future]
161
+ try:
162
+ # 获取该任务的执行结果
163
+ _, result = future.result()
164
+
165
+ full_log += f"\n================ ✅ 执行完成: {script_name} ================\n"
166
+ full_log += f"--- STDOUT ---\n{result.stdout}\n\n--- STDERR ---\n{result.stderr}\n"
167
+
168
+ # 检查此任务是否失败
169
+ if result.returncode != 0:
170
+ success = False
171
+ full_log += f"❌ [错误] {script_name} 返回非零退出码 (Exit Code: {result.returncode})\n"
172
+
173
+ except subprocess.TimeoutExpired as e:
174
+ success = False
175
+ full_log += f"\n================ ❌ 任务超时: {script_name} ================\n{str(e)}\n"
176
+ except Exception as e:
177
+ success = False
178
+ full_log += f"\n================ ❌ 任务异常: {script_name} ================\n{str(e)}\n"
179
 
180
+ # 如果有任何一个脚本执行失败,直接返回,不打包压缩
181
+ if not success:
182
+ return f"❌ {task_type.upper()} 包含失败任务,请检查日志", get_debug_info(), full_log, None
183
 
184
+ # 所有脚本都运行成功后,压缩 mineru_outputs 文件夹
 
185
  zip_base_name = ZIP_OUTPUT_PATH.replace(".zip", "")
186
  shutil.make_archive(zip_base_name, 'zip', OUTPUT_DIR)
187
 
188
+ success_msg = f"✅ {task_type.upper()} 生成并压缩完成"
189
+ return success_msg, get_debug_info(), full_log, ZIP_OUTPUT_PATH
190
 
191
  except Exception as e:
192
+ error_log = full_log + f"\n[全局异常] Exception occurred:\n{str(e)}"
193
+ return "❌ 最终生成发生全局异常", get_debug_info(), error_log, None
194
 
195
  # --- UI ---
196
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
208
 
209
  with gr.Group():
210
  gr.Markdown("### 2. 执行解析")
211
+ parse_btn = gr.Button("🚀 Run Mineru & DAG Gen", variant="primary")
212
  parse_status = gr.Textbox(label="运行状态")
213
 
214
  with gr.Group():
215
  gr.Markdown("### 3. 最终生成")
216
+ gr.Markdown("请选择要生成的内容类型:")
217
+ with gr.Row():
218
+ gen_ppt_btn = gr.Button("📊 生成 PPT")
219
+ gen_poster_btn = gr.Button("🖼️ 生成 Poster")
220
+ gen_pr_btn = gr.Button("📰 生成 PR")
221
+ gen_all_btn = gr.Button("🔨 生成全部 (ALL)", variant="primary")
222
+
223
  gen_status = gr.Textbox(label="生成状态")
224
  download_file = gr.File(label="下载压缩后的结果", interactive=False)
225
 
 
236
  )
237
  refresh_btn = gr.Button("🔄 刷新状态")
238
 
239
+ # ================= 逻辑绑定 =================
240
  key_btn.click(save_api_key, inputs=key_input, outputs=[parse_status, debug_view])
241
  pdf_btn.click(save_pdf, inputs=pdf_input, outputs=[parse_status, debug_view])
242
 
243
  parse_btn.click(
244
+ fn=run_mineru_parsing_and_dag_gen,
245
  outputs=[parse_status, debug_view, cmd_logs]
246
  )
247
 
248
+ # 最终生成逻辑绑定 (使用 lambda 传递固定的 task_type 参数)
249
+ gen_ppt_btn.click(fn=lambda: run_final_generation("ppt"), outputs=[gen_status, debug_view, cmd_logs, download_file])
250
+ gen_poster_btn.click(fn=lambda: run_final_generation("poster"), outputs=[gen_status, debug_view, cmd_logs, download_file])
251
+ gen_pr_btn.click(fn=lambda: run_final_generation("pr"), outputs=[gen_status, debug_view, cmd_logs, download_file])
252
+ gen_all_btn.click(fn=lambda: run_final_generation("all"), outputs=[gen_status, debug_view, cmd_logs, download_file])
253
 
254
  refresh_btn.click(get_debug_info, outputs=debug_view)
255