Add speed adjustment feature to audio generation in app.py, including UI slider for playback speed control.
Browse files
app.py
CHANGED
|
@@ -15,6 +15,7 @@ from promptic import llm
|
|
| 15 |
from pydantic import BaseModel, ValidationError
|
| 16 |
from pypdf import PdfReader
|
| 17 |
from tenacity import retry, retry_if_exception_type
|
|
|
|
| 18 |
|
| 19 |
# Define multiple sets of instruction templates
|
| 20 |
INSTRUCTION_TEMPLATES = {
|
|
@@ -576,6 +577,7 @@ def generate_audio(
|
|
| 576 |
user_feedback: str = None,
|
| 577 |
original_text: str = None,
|
| 578 |
debug = False,
|
|
|
|
| 579 |
) -> tuple:
|
| 580 |
# Validate API Key
|
| 581 |
if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
|
|
@@ -677,6 +679,17 @@ def generate_audio(
|
|
| 677 |
temporary_file.write(audio)
|
| 678 |
temporary_file.close()
|
| 679 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
# Delete any files in the temp directory that end with .mp3 and are over a day old
|
| 681 |
for file in glob.glob(f"{temporary_directory}*.mp3"):
|
| 682 |
if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
|
|
@@ -782,6 +795,14 @@ with gr.Blocks(title="PDF to Audio", css="""
|
|
| 782 |
placeholder="カスタム/ローカルモデルを使う場合はAPIベースURLを入力してください...",
|
| 783 |
info="カスタムやローカルモデルを使う場合、ここにAPIベースURLを入力してください。例: http://localhost:8080/v1 (llama.cpp RESTサーバー用)",
|
| 784 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
|
| 786 |
with gr.Column(scale=3):
|
| 787 |
template_dropdown = gr.Dropdown(
|
|
@@ -861,6 +882,7 @@ with gr.Blocks(title="PDF to Audio", css="""
|
|
| 861 |
prelude_dialog, podcast_dialog_instructions,
|
| 862 |
edited_transcript, # placeholder for edited_transcript
|
| 863 |
user_feedback, # placeholder for user_feedback
|
|
|
|
| 864 |
],
|
| 865 |
outputs=[audio_output, transcript_output, original_text_output, error_output]
|
| 866 |
).then(
|
|
@@ -880,7 +902,8 @@ with gr.Blocks(title="PDF to Audio", css="""
|
|
| 880 |
fn=lambda use_edit, edit, *args: validate_and_generate_audio(
|
| 881 |
*args[:12], # All inputs up to podcast_dialog_instructions
|
| 882 |
edit if use_edit else "", # Use edited transcript if checkbox is checked, otherwise empty string
|
| 883 |
-
*args[12:]
|
|
|
|
| 884 |
),
|
| 885 |
inputs=[
|
| 886 |
use_edited_transcript, edited_transcript,
|
|
@@ -888,7 +911,8 @@ with gr.Blocks(title="PDF to Audio", css="""
|
|
| 888 |
speaker_1_voice, speaker_2_voice, api_base,
|
| 889 |
intro_instructions, text_instructions, scratch_pad_instructions,
|
| 890 |
prelude_dialog, podcast_dialog_instructions,
|
| 891 |
-
user_feedback, original_text_output
|
|
|
|
| 892 |
],
|
| 893 |
outputs=[audio_output, transcript_output, original_text_output, error_output]
|
| 894 |
).then(
|
|
|
|
| 15 |
from pydantic import BaseModel, ValidationError
|
| 16 |
from pypdf import PdfReader
|
| 17 |
from tenacity import retry, retry_if_exception_type
|
| 18 |
+
from pydub import AudioSegment
|
| 19 |
|
| 20 |
# Define multiple sets of instruction templates
|
| 21 |
INSTRUCTION_TEMPLATES = {
|
|
|
|
| 577 |
user_feedback: str = None,
|
| 578 |
original_text: str = None,
|
| 579 |
debug = False,
|
| 580 |
+
speed: float = 1.0, # 追加
|
| 581 |
) -> tuple:
|
| 582 |
# Validate API Key
|
| 583 |
if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
|
|
|
|
| 679 |
temporary_file.write(audio)
|
| 680 |
temporary_file.close()
|
| 681 |
|
| 682 |
+
# ここから再生速度変更処理
|
| 683 |
+
if speed != 1.0:
|
| 684 |
+
# pydubでmp3を読み込み、速度変更
|
| 685 |
+
sound = AudioSegment.from_file(temporary_file.name, format="mp3")
|
| 686 |
+
# 再生速度変更(ピッチはそのまま)
|
| 687 |
+
sound = sound._spawn(sound.raw_data, overrides={
|
| 688 |
+
"frame_rate": int(sound.frame_rate * speed)
|
| 689 |
+
}).set_frame_rate(sound.frame_rate)
|
| 690 |
+
# 上書き保存
|
| 691 |
+
sound.export(temporary_file.name, format="mp3")
|
| 692 |
+
|
| 693 |
# Delete any files in the temp directory that end with .mp3 and are over a day old
|
| 694 |
for file in glob.glob(f"{temporary_directory}*.mp3"):
|
| 695 |
if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
|
|
|
|
| 795 |
placeholder="カスタム/ローカルモデルを使う場合はAPIベースURLを入力してください...",
|
| 796 |
info="カスタムやローカルモデルを使う場合、ここにAPIベースURLを入力してください。例: http://localhost:8080/v1 (llama.cpp RESTサーバー用)",
|
| 797 |
)
|
| 798 |
+
speed_slider = gr.Slider(
|
| 799 |
+
minimum=0.5,
|
| 800 |
+
maximum=2.0,
|
| 801 |
+
value=1.0,
|
| 802 |
+
step=0.05,
|
| 803 |
+
label="再生速度 (0.5x~2.0x)",
|
| 804 |
+
info="音声の再生速度を調整できます。デフォルトは1.0(等倍)です。"
|
| 805 |
+
)
|
| 806 |
|
| 807 |
with gr.Column(scale=3):
|
| 808 |
template_dropdown = gr.Dropdown(
|
|
|
|
| 882 |
prelude_dialog, podcast_dialog_instructions,
|
| 883 |
edited_transcript, # placeholder for edited_transcript
|
| 884 |
user_feedback, # placeholder for user_feedback
|
| 885 |
+
speed_slider, # 追加
|
| 886 |
],
|
| 887 |
outputs=[audio_output, transcript_output, original_text_output, error_output]
|
| 888 |
).then(
|
|
|
|
| 902 |
fn=lambda use_edit, edit, *args: validate_and_generate_audio(
|
| 903 |
*args[:12], # All inputs up to podcast_dialog_instructions
|
| 904 |
edit if use_edit else "", # Use edited transcript if checkbox is checked, otherwise empty string
|
| 905 |
+
*args[12:],
|
| 906 |
+
speed_slider,
|
| 907 |
),
|
| 908 |
inputs=[
|
| 909 |
use_edited_transcript, edited_transcript,
|
|
|
|
| 911 |
speaker_1_voice, speaker_2_voice, api_base,
|
| 912 |
intro_instructions, text_instructions, scratch_pad_instructions,
|
| 913 |
prelude_dialog, podcast_dialog_instructions,
|
| 914 |
+
user_feedback, original_text_output,
|
| 915 |
+
speed_slider,
|
| 916 |
],
|
| 917 |
outputs=[audio_output, transcript_output, original_text_output, error_output]
|
| 918 |
).then(
|