Refactor app.py to remove unused functions and comments, update UI labels to Japanese, and reorganize input sections for better clarity.
Browse files
app.py
CHANGED
|
@@ -16,19 +16,6 @@ from pydantic import BaseModel, ValidationError
|
|
| 16 |
from pypdf import PdfReader
|
| 17 |
from tenacity import retry, retry_if_exception_type
|
| 18 |
|
| 19 |
-
import re
|
| 20 |
-
|
| 21 |
-
def read_readme():
|
| 22 |
-
readme_path = Path("README.md")
|
| 23 |
-
if readme_path.exists():
|
| 24 |
-
with open(readme_path, "r") as file:
|
| 25 |
-
content = file.read()
|
| 26 |
-
# Use regex to remove metadata enclosed in -- ... --
|
| 27 |
-
content = re.sub(r'--.*?--', '', content, flags=re.DOTALL)
|
| 28 |
-
return content
|
| 29 |
-
else:
|
| 30 |
-
return "README.md not found. Please check the repository for more information."
|
| 31 |
-
|
| 32 |
# Define multiple sets of instruction templates
|
| 33 |
INSTRUCTION_TEMPLATES = {
|
| 34 |
################# PODCAST ##################
|
|
@@ -655,16 +642,6 @@ def generate_audio(
|
|
| 655 |
edited_transcript=edited_transcript_processed,
|
| 656 |
user_feedback=user_feedback_processed
|
| 657 |
)
|
| 658 |
-
# llm_output = generate_dialogue(
|
| 659 |
-
# '本ガイドブックは、政府情報システム開発におけるアジャイル開発の適用を支援するために用意されたものです。',
|
| 660 |
-
# intro_instructions='',
|
| 661 |
-
# text_instructions='',
|
| 662 |
-
# scratch_pad_instructions='',
|
| 663 |
-
# prelude_dialog='',
|
| 664 |
-
# podcast_dialog_instructions='',
|
| 665 |
-
# edited_transcript='',
|
| 666 |
-
# user_feedback=''
|
| 667 |
-
# )
|
| 668 |
print('llm_output:', llm_output)
|
| 669 |
|
| 670 |
# Generate audio from the transcript
|
|
@@ -763,108 +740,101 @@ with gr.Blocks(title="PDF to Audio", css="""
|
|
| 763 |
""") as demo:
|
| 764 |
|
| 765 |
with gr.Row(elem_id="header"):
|
| 766 |
-
|
| 767 |
-
gr.Markdown("# Convert PDFs into an audio podcast, lecture, summary and others\n\nFirst, upload one or more PDFs, select options, then push Generate Audio.\n\nYou can also select a variety of custom option and direct the way the result is generated.", elem_id="title")
|
| 768 |
-
with gr.Column(scale=1):
|
| 769 |
-
gr.HTML('''
|
| 770 |
-
<div id="logo_container">
|
| 771 |
-
<img src="https://huggingface.co/spaces/lamm-mit/PDF2Audio/resolve/main/logo.png" id="logo_image" alt="Logo">
|
| 772 |
-
</div>
|
| 773 |
-
''')
|
| 774 |
#gr.Markdown("")
|
| 775 |
-
submit_btn = gr.Button("Generate Audio", elem_id="submit_btn")
|
| 776 |
|
| 777 |
with gr.Row(elem_id="main_container"):
|
| 778 |
with gr.Column(scale=2):
|
| 779 |
-
files = gr.Files(label="
|
| 780 |
|
| 781 |
openai_api_key = gr.Textbox(
|
| 782 |
-
label="OpenAI API
|
| 783 |
visible=True, # Always show the API key field
|
| 784 |
-
placeholder="
|
| 785 |
type="password" # Hide the API key input
|
| 786 |
)
|
| 787 |
text_model = gr.Dropdown(
|
| 788 |
-
label="
|
| 789 |
choices=STANDARD_TEXT_MODELS,
|
| 790 |
value="o1-preview-2024-09-12", #"gpt-4o-mini",
|
| 791 |
-
info="
|
| 792 |
)
|
| 793 |
audio_model = gr.Dropdown(
|
| 794 |
-
label="
|
| 795 |
choices=STANDARD_AUDIO_MODELS,
|
| 796 |
value="tts-1",
|
| 797 |
-
info="
|
| 798 |
)
|
| 799 |
speaker_1_voice = gr.Dropdown(
|
| 800 |
-
label="
|
| 801 |
choices=STANDARD_VOICES,
|
| 802 |
value="alloy",
|
| 803 |
-
info="
|
| 804 |
)
|
| 805 |
speaker_2_voice = gr.Dropdown(
|
| 806 |
-
label="
|
| 807 |
choices=STANDARD_VOICES,
|
| 808 |
value="echo",
|
| 809 |
-
info="
|
| 810 |
)
|
| 811 |
api_base = gr.Textbox(
|
| 812 |
-
label="
|
| 813 |
-
placeholder="
|
| 814 |
-
info="
|
| 815 |
)
|
| 816 |
|
| 817 |
with gr.Column(scale=3):
|
| 818 |
template_dropdown = gr.Dropdown(
|
| 819 |
-
label="
|
| 820 |
choices=list(INSTRUCTION_TEMPLATES.keys()),
|
| 821 |
value="podcast",
|
| 822 |
-
info="
|
| 823 |
-
)
|
| 824 |
-
intro_instructions = gr.Textbox(
|
| 825 |
-
label="Intro Instructions",
|
| 826 |
-
lines=10,
|
| 827 |
-
value=INSTRUCTION_TEMPLATES["podcast"]["intro"],
|
| 828 |
-
info="Provide the introductory instructions for generating the dialogue.",
|
| 829 |
-
)
|
| 830 |
-
text_instructions = gr.Textbox(
|
| 831 |
-
label="Standard Text Analysis Instructions",
|
| 832 |
-
lines=10,
|
| 833 |
-
placeholder="Enter text analysis instructions...",
|
| 834 |
-
value=INSTRUCTION_TEMPLATES["podcast"]["text_instructions"],
|
| 835 |
-
info="Provide the instructions for analyzing the raw data and text.",
|
| 836 |
-
)
|
| 837 |
-
scratch_pad_instructions = gr.Textbox(
|
| 838 |
-
label="Scratch Pad Instructions",
|
| 839 |
-
lines=15,
|
| 840 |
-
value=INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"],
|
| 841 |
-
info="Provide the scratch pad instructions for brainstorming presentation/dialogue content.",
|
| 842 |
)
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
|
| 853 |
-
|
| 854 |
-
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 859 |
error_output = gr.Textbox(visible=False) # Hidden textbox to store error message
|
| 860 |
|
| 861 |
-
use_edited_transcript = gr.Checkbox(label="
|
| 862 |
-
edited_transcript = gr.Textbox(label="
|
| 863 |
show_copy_button=True, interactive=False)
|
| 864 |
|
| 865 |
-
user_feedback = gr.Textbox(label="
|
| 866 |
-
|
| 867 |
-
regenerate_btn = gr.Button("Regenerate Audio with Edits and Feedback")
|
| 868 |
# Function to update the interactive state of edited_transcript
|
| 869 |
def update_edit_box(checkbox_value):
|
| 870 |
return gr.update(interactive=checkbox_value, lines=20 if checkbox_value else 20, visible=True if checkbox_value else False)
|
|
@@ -933,10 +903,6 @@ with gr.Blocks(title="PDF to Audio", css="""
|
|
| 933 |
inputs=[error_output],
|
| 934 |
outputs=[]
|
| 935 |
)
|
| 936 |
-
|
| 937 |
-
# Add README content at the bottom
|
| 938 |
-
gr.Markdown("---") # Horizontal line to separate the interface from README
|
| 939 |
-
gr.Markdown(read_readme())
|
| 940 |
|
| 941 |
# Enable queueing for better performance
|
| 942 |
demo.queue(max_size=20, default_concurrency_limit=32)
|
|
|
|
| 16 |
from pypdf import PdfReader
|
| 17 |
from tenacity import retry, retry_if_exception_type
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
# Define multiple sets of instruction templates
|
| 20 |
INSTRUCTION_TEMPLATES = {
|
| 21 |
################# PODCAST ##################
|
|
|
|
| 642 |
edited_transcript=edited_transcript_processed,
|
| 643 |
user_feedback=user_feedback_processed
|
| 644 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
print('llm_output:', llm_output)
|
| 646 |
|
| 647 |
# Generate audio from the transcript
|
|
|
|
| 740 |
""") as demo:
|
| 741 |
|
| 742 |
with gr.Row(elem_id="header"):
|
| 743 |
+
gr.Markdown("# PDFを音声ポッドキャスト・講義・要約などに変換\n\nまず、1つ以上のPDFをアップロードし、オプションを選択してから「音声を生成」ボタンを押してください。\n\nカスタムオプションも選択でき、生成方法を細かく指示できます。", elem_id="title")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 744 |
#gr.Markdown("")
|
|
|
|
| 745 |
|
| 746 |
with gr.Row(elem_id="main_container"):
|
| 747 |
with gr.Column(scale=2):
|
| 748 |
+
files = gr.Files(label="PDFファイル", file_types=[], )
|
| 749 |
|
| 750 |
openai_api_key = gr.Textbox(
|
| 751 |
+
label="OpenAI APIキー",
|
| 752 |
visible=True, # Always show the API key field
|
| 753 |
+
placeholder="ここにOpenAI APIキーを入力してください...",
|
| 754 |
type="password" # Hide the API key input
|
| 755 |
)
|
| 756 |
text_model = gr.Dropdown(
|
| 757 |
+
label="テキスト生成モデル",
|
| 758 |
choices=STANDARD_TEXT_MODELS,
|
| 759 |
value="o1-preview-2024-09-12", #"gpt-4o-mini",
|
| 760 |
+
info="対話テキストを生成するモデルを選択してください。",
|
| 761 |
)
|
| 762 |
audio_model = gr.Dropdown(
|
| 763 |
+
label="音声生成モデル",
|
| 764 |
choices=STANDARD_AUDIO_MODELS,
|
| 765 |
value="tts-1",
|
| 766 |
+
info="音声を生成するモデルを選択してください。",
|
| 767 |
)
|
| 768 |
speaker_1_voice = gr.Dropdown(
|
| 769 |
+
label="話者1の声",
|
| 770 |
choices=STANDARD_VOICES,
|
| 771 |
value="alloy",
|
| 772 |
+
info="話者1の音声を選択してください。",
|
| 773 |
)
|
| 774 |
speaker_2_voice = gr.Dropdown(
|
| 775 |
+
label="話者2の声",
|
| 776 |
choices=STANDARD_VOICES,
|
| 777 |
value="echo",
|
| 778 |
+
info="話者2の音声を選択してください。",
|
| 779 |
)
|
| 780 |
api_base = gr.Textbox(
|
| 781 |
+
label="カスタムAPIベースURL",
|
| 782 |
+
placeholder="カスタム/ローカルモデルを使う場合はAPIベースURLを入力してください...",
|
| 783 |
+
info="カスタムやローカルモデルを使う場合、ここにAPIベースURLを入力してください。例: http://localhost:8080/v1 (llama.cpp RESTサーバー用)",
|
| 784 |
)
|
| 785 |
|
| 786 |
with gr.Column(scale=3):
|
| 787 |
template_dropdown = gr.Dropdown(
|
| 788 |
+
label="指示テンプレート",
|
| 789 |
choices=list(INSTRUCTION_TEMPLATES.keys()),
|
| 790 |
value="podcast",
|
| 791 |
+
info="使用する指示テンプレートを選択してください。各フィールドを編集してカスタマイズも可能です。",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 792 |
)
|
| 793 |
+
with gr.Accordion("プロンプト", open=False):
|
| 794 |
+
intro_instructions = gr.Textbox(
|
| 795 |
+
label="イントロ指示",
|
| 796 |
+
lines=10,
|
| 797 |
+
value=INSTRUCTION_TEMPLATES["podcast"]["intro"],
|
| 798 |
+
info="対話生成のためのイントロ指示を入力してください。",
|
| 799 |
+
)
|
| 800 |
+
text_instructions = gr.Textbox(
|
| 801 |
+
label="テキスト分析指示",
|
| 802 |
+
lines=10,
|
| 803 |
+
placeholder="テキスト分析の指示を入力してください...",
|
| 804 |
+
value=INSTRUCTION_TEMPLATES["podcast"]["text_instructions"],
|
| 805 |
+
info="生データやテキストの分析指示を入力してください。",
|
| 806 |
+
)
|
| 807 |
+
scratch_pad_instructions = gr.Textbox(
|
| 808 |
+
label="ブレインストーミング指示",
|
| 809 |
+
lines=15,
|
| 810 |
+
value=INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"],
|
| 811 |
+
info="プレゼン/対話内容のブレインストーミング指示を入力してください。",
|
| 812 |
+
)
|
| 813 |
+
prelude_dialog = gr.Textbox(
|
| 814 |
+
label="プレリュード指示",
|
| 815 |
+
lines=5,
|
| 816 |
+
value=INSTRUCTION_TEMPLATES["podcast"]["prelude"],
|
| 817 |
+
info="プレゼン/対話作成前のプレリュード指示を入力してください。",
|
| 818 |
+
)
|
| 819 |
+
podcast_dialog_instructions = gr.Textbox(
|
| 820 |
+
label="ポッドキャスト対話指示",
|
| 821 |
+
lines=20,
|
| 822 |
+
value=INSTRUCTION_TEMPLATES["podcast"]["dialog"],
|
| 823 |
+
info="プレゼンやポッドキャスト対話生成の指示を入力してください。",
|
| 824 |
+
)
|
| 825 |
+
|
| 826 |
+
submit_btn = gr.Button("音声を生成", elem_id="submit_btn", variant="primary")
|
| 827 |
+
audio_output = gr.Audio(label="音声", format="mp3", interactive=False, autoplay=False)
|
| 828 |
+
transcript_output = gr.Textbox(label="書き起こしテキスト", lines=20, show_copy_button=True)
|
| 829 |
+
original_text_output = gr.Textbox(label="元テキスト", lines=10, visible=False)
|
| 830 |
error_output = gr.Textbox(visible=False) # Hidden textbox to store error message
|
| 831 |
|
| 832 |
+
use_edited_transcript = gr.Checkbox(label="書き起こしテキストを編集する(チェックすると編集欄が有効化)", value=False)
|
| 833 |
+
edited_transcript = gr.Textbox(label="編集用テキスト欄(例: '[ここに定義を追加]' など明確な指示を記載)", lines=20, visible=False,
|
| 834 |
show_copy_button=True, interactive=False)
|
| 835 |
|
| 836 |
+
user_feedback = gr.Textbox(label="フィードバック・メモ", lines=10)
|
| 837 |
+
regenerate_btn = gr.Button("編集・フィードバックで再生成")
|
|
|
|
| 838 |
# Function to update the interactive state of edited_transcript
|
| 839 |
def update_edit_box(checkbox_value):
|
| 840 |
return gr.update(interactive=checkbox_value, lines=20 if checkbox_value else 20, visible=True if checkbox_value else False)
|
|
|
|
| 903 |
inputs=[error_output],
|
| 904 |
outputs=[]
|
| 905 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
|
| 907 |
# Enable queueing for better performance
|
| 908 |
demo.queue(max_size=20, default_concurrency_limit=32)
|