"""
Audiobook Creator
Copyright (C) 2025 Prakhar Sharma
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
"""
import gradio as gr
import os
import traceback
from fastapi import FastAPI
from book_to_txt import process_book_and_extract_text, save_book
from identify_characters_and_output_book_to_jsonl import process_book_and_identify_characters
from generate_audiobook import process_audiobook_generation, validate_book_for_m4b_generation, sanitize_filename
from add_emotion_tags import process_emotion_tags
from dotenv import load_dotenv
load_dotenv()
css = """
.step-heading {font-size: 1.2rem; font-weight: bold; margin-bottom: 0.5rem}
"""
app = FastAPI()
def validate_book_upload(book_file, book_title):
"""Validate book upload and return a notification"""
if book_file is None:
return gr.Warning("Please upload a book file first.")
if not book_title:
book_title = os.path.splitext(os.path.basename(book_file.name))[0]
book_title = sanitize_filename(book_title)
yield book_title
return gr.Info(f"Book '{book_title}' ready for processing.", duration=5)
def text_extraction_wrapper(book_file, text_decoding_option):
"""Wrapper for text extraction with validation and progress updates"""
if book_file is None:
yield None
return gr.Warning("Please upload a book file and enter a title first.")
try:
last_output = None
# Pass through all yield values from the original function
for output in process_book_and_extract_text(book_file, text_decoding_option):
last_output = output
yield output # Yield each progress update
# Final yield with success notification
yield last_output
return gr.Info("Text extracted successfully! You can now edit the content.", duration=5)
except ValueError as e:
# Handle validation errors specifically
print(e)
traceback.print_exc()
yield None
return gr.Warning(f"Book validation error: {str(e)}")
except Exception as e:
print(e)
traceback.print_exc()
yield None
return gr.Warning(f"Error extracting text: {str(e)}")
def save_book_wrapper(text_content):
"""Wrapper for saving book with validation"""
if not text_content:
return gr.Warning("No text content to save.")
try:
save_book(text_content)
return gr.Info("📖 Book saved successfully as 'converted_book.txt'!", duration=10)
except Exception as e:
print(e)
traceback.print_exc()
return gr.Warning(f"Error saving book: {str(e)}")
async def identify_characters_wrapper():
"""Wrapper for character identification with validation and progress updates"""
try:
last_output = None
# Pass through all yield values from the original function
async for output in process_book_and_identify_characters():
last_output = output
yield output # Yield each progress update
# Final yield with success notification
yield gr.Info("Character identification complete! You can now add emotion tags or proceed to audiobook generation.", duration=5)
yield last_output
return
except Exception as e:
print(e)
traceback.print_exc()
yield gr.Warning(f"Error identifying characters: {str(e)}")
yield None
return
async def add_emotion_tags_wrapper(characters_identified_state):
"""Wrapper for emotion tags processing with validation and progress updates"""
# Check if TTS engine supports emotion tags
current_tts_engine = os.environ.get("TTS_MODEL", "kokoro").lower()
if current_tts_engine != "orpheus":
yield gr.Warning(f"Emotion tags are only supported with Orpheus TTS engine. Current engine: {current_tts_engine}")
yield None
return
try:
last_output = None
# Use the unified emotion tags processing function (voice-agnostic)
async for output in process_emotion_tags(characters_identified_state):
last_output = output
yield output
# Final yield with success notification
yield gr.Info("Emotion tags added successfully! You can now generate the audiobook.", duration=5)
yield last_output
return
except Exception as e:
print(e)
traceback.print_exc()
yield gr.Warning(f"Error adding emotion tags: {str(e)}")
yield None
return
async def generate_audiobook_wrapper(voice_type, narrator_gender, output_format, book_file, emotion_tags_processed_state, book_title):
"""Wrapper for audiobook generation with validation and progress updates"""
if book_file is None:
yield gr.Warning("Please upload a book file first."), None
yield None, None
return
if not voice_type or not output_format:
yield gr.Warning("Please select voice type and output format."), None
yield None, None
return
# Early validation for M4B format
if output_format == "M4B (Chapters & Cover)":
yield gr.Info("Validating book file for M4B audiobook generation..."), None
is_valid, error_message, metadata = validate_book_for_m4b_generation(book_file)
if not is_valid:
yield gr.Warning(f"❌ Book validation failed: {error_message}"), None
yield None, None
return
yield gr.Info(f"✅ Book validation successful! Title: {metadata.get('Title', 'Unknown')}, Author: {metadata.get('Author(s)', 'Unknown')}"), None
# Use session state to determine if emotion tags should be used
add_emotion_tags = emotion_tags_processed_state
if add_emotion_tags:
yield gr.Info("🎭 Using emotion tags (processed in current session)"), None
else:
yield gr.Info("📖 Using standard narration"), None
try:
last_output = None
audiobook_path = None
# Pass through all yield values from the original function
async for output in process_audiobook_generation(voice_type, narrator_gender, output_format, book_file, add_emotion_tags):
last_output = output
yield output, None # Yield each progress update without file path
# Get the correct file extension based on the output format
generate_m4b_audiobook_file = True if output_format == "M4B (Chapters & Cover)" else False
file_extension = "m4b" if generate_m4b_audiobook_file else output_format.lower()
# Set the audiobook file path according to the provided information
audiobook_path = os.path.join("generated_audiobooks", f"audiobook.{file_extension}")
# Rename the audiobook file to the book title
os.rename(audiobook_path, os.path.join("generated_audiobooks", f"{book_title}.{file_extension}"))
audiobook_path = os.path.join("generated_audiobooks", f"{book_title}.{file_extension}")
# Final yield with success notification and file path
yield gr.Info(f"Audiobook generated successfully in {output_format} format! You can now download it in the Download section. Click on the blue download link next to the file name.", duration=10), None
yield last_output, audiobook_path
return
except Exception as e:
print(e)
traceback.print_exc()
yield gr.Warning(f"Error generating audiobook: {str(e)}"), None
yield None, None
return
def update_emotion_tags_status_and_state():
"""Update the emotion tags status display and set session state after processing"""
# Return both the updated status display and set session state to True
return gr.update(value="✅ Emotion tags processed - will be used in audiobook"), True
def update_characters_identified_state():
"""Set characters_identified state to True after character identification"""
return True
with gr.Blocks(css=css, theme=gr.themes.Default()) as gradio_app:
gr.Markdown("# 📖 Audiobook Creator")
gr.Markdown("Create professional audiobooks from your ebooks in just a few steps.")
# Session state to track if emotion tags were processed
emotion_tags_processed = gr.State(False)
# Session state to track if characters were identified
characters_identified = gr.State(False)
# Get TTS configuration from environment variables
current_tts_engine = os.environ.get("TTS_MODEL", "kokoro").lower()
tts_base_url = os.environ.get("TTS_BASE_URL", "Not configured")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown('
📚 Step 1: Book Details
')
book_title = gr.Textbox(
label="Book Title",
placeholder="Enter the title of your book",
info="This will be used for naming the audiobook file"
)
book_input = gr.File(
label="Upload Book"
)
text_decoding_option = gr.Radio(
["textract", "calibre"],
label="Text Extraction Method",
value="textract",
info="Use calibre for better formatted results, wider compatibility for ebook formats. You can try both methods and choose based on the output result."
)
validate_btn = gr.Button("Validate Book", variant="primary")
with gr.Row():
with gr.Column():
gr.Markdown('✂️ Step 2: Extract & Edit Content
')
convert_btn = gr.Button("Extract Text", variant="primary")
with gr.Accordion("Editing Tips", open=True):
gr.Markdown("""
* Remove unwanted sections: Table of Contents, About the Author, Acknowledgements
* Fix formatting issues or OCR errors
* Check for chapter breaks and paragraph formatting
""")
# Navigation buttons for the textbox
with gr.Row():
top_btn = gr.Button("↑ Go to Top", size="sm", variant="secondary")
bottom_btn = gr.Button("↓ Go to Bottom", size="sm", variant="secondary")
text_output = gr.Textbox(
label="Edit Book Content",
placeholder="Extracted text will appear here for editing",
interactive=True,
lines=15,
elem_id="text_editor"
)
save_btn = gr.Button("Save Edited Text", variant="primary")
with gr.Row():
with gr.Column():
gr.Markdown('🧩 Step 3: Character Identification (Optional - Requires LLM)
')
identify_btn = gr.Button("Identify Characters", variant="primary")
with gr.Accordion("Why Identify Characters?", open=True):
gr.Markdown("""
* Improves multi-voice narration by assigning different voices to characters
* Creates more engaging audiobooks with distinct character voices
* Skip this step if you prefer single-voice narration
""")
character_output = gr.Textbox(
label="Character Identification Progress",
placeholder="Character identification progress will be shown here",
interactive=False,
lines=3
)
# Add emotion tags step (only visible if Orpheus TTS engine is configured)
emotion_tags_visible = current_tts_engine == "orpheus"
with gr.Row(visible=emotion_tags_visible):
with gr.Column():
gr.Markdown('🎭 Step 3.5: Add Emotion Tags (Optional - Requires LLM)
')
emotion_tags_btn = gr.Button("Add Emotion Tags", variant="primary")
with gr.Accordion("What are Emotion Tags?", open=True):
gr.Markdown("""
**Emotion Tags enhance your audiobook by adding natural expressions:**
* **``** - For laughter or when text indicates laughing
* **``** - For light laughter or chuckling sounds
* **``** - For sighing or expressions of resignation/relief
* **``** - For coughing sounds or throat clearing
* **``** - For sniffling or nasal sounds (emotion, cold, etc.)
* **``** - For groaning sounds expressing discomfort/frustration
* **``** - For yawning or expressions of tiredness
* **``** - For gasping sounds of surprise/shock
These tags are automatically placed based on the text context and work only with **Orpheus TTS**.
""")
emotion_tags_output = gr.Textbox(
label="Emotion Tags Processing Progress",
placeholder="Emotion tags processing progress will be shown here",
interactive=False,
lines=3
)
with gr.Row():
with gr.Column():
gr.Markdown('🎧 Step 4: Generate Audiobook
')
with gr.Row():
voice_type = gr.Radio(
["Single Voice", "Multi-Voice"],
label="Narration Type",
value="Single Voice",
info="Multi-Voice requires character identification"
)
narrator_gender = gr.Radio(
["male", "female"],
label="Choose whether you want the book to be read in a male or female voice",
value="female"
)
tts_engine_display = gr.Radio(
["kokoro", "orpheus"],
label="TTS Engine",
value=current_tts_engine,
interactive=False,
info="Configure TTS engine in .env file. Orpheus supports emotion tags."
)
output_format = gr.Dropdown(
["M4B (Chapters & Cover)", "AAC", "M4A", "MP3", "WAV", "OPUS", "FLAC", "PCM"],
label="Output Format",
value="M4B (Chapters & Cover)",
info="M4B supports chapters and cover art"
)
# Emotion tags status display (conditional visibility based on TTS engine in .env)
emotion_tags_visible = current_tts_engine == "orpheus"
with gr.Group(visible=emotion_tags_visible) as emotion_tags_group:
emotion_tags_status_display = gr.Radio(
choices=["✅ Emotion tags processed - will be used in audiobook", "❌ No emotion tags - standard narration will be used"],
value="❌ No emotion tags - standard narration will be used", # Always start with default
label="Emotion Tags Status",
interactive=False,
info="This will update automatically when you process emotion tags in Step 3.5"
)
generate_btn = gr.Button("Generate Audiobook", variant="primary")
audio_output = gr.Textbox(
label="Generation Progress",
placeholder="Generation progress will be shown here",
interactive=False,
lines=3
)
# Add a new File component for downloading the audiobook
with gr.Group(visible=False) as download_box:
gr.Markdown("### 📥 Download Your Audiobook")
audiobook_file = gr.File(
label="Download Generated Audiobook",
interactive=False,
type="filepath"
)
# Connections with proper handling of Gradio notifications
validate_btn.click(
validate_book_upload,
inputs=[book_input, book_title],
outputs=[book_title]
)
convert_btn.click(
text_extraction_wrapper,
inputs=[book_input, text_decoding_option],
outputs=[text_output],
queue=True
)
save_btn.click(
save_book_wrapper,
inputs=[text_output],
outputs=[],
queue=True
)
identify_btn.click(
identify_characters_wrapper,
inputs=[],
outputs=[character_output],
queue=True
).then(
# Update characters_identified state after character identification completes
update_characters_identified_state,
inputs=[],
outputs=[characters_identified]
)
emotion_tags_btn.click(
add_emotion_tags_wrapper,
inputs=[characters_identified],
outputs=[emotion_tags_output],
queue=True
).then(
# Update emotion tags checkbox default after processing completes
update_emotion_tags_status_and_state,
inputs=[],
outputs=[emotion_tags_status_display, emotion_tags_processed]
)
# Update the generate_audiobook_wrapper to output both progress text and file path
generate_btn.click(
generate_audiobook_wrapper,
inputs=[voice_type, narrator_gender, output_format, book_input, emotion_tags_processed, book_title],
outputs=[audio_output, audiobook_file],
queue=True
).then(
# Make the download box visible after generation completes successfully
lambda x: gr.update(visible=True) if x is not None else gr.update(visible=False),
inputs=[audiobook_file],
outputs=[download_box]
)
# Navigation button functionality for textbox scrolling
top_btn.click(
None,
inputs=[],
outputs=[],
js="""
function() {
const textbox = document.querySelector('#text_editor textarea');
if (textbox) {
textbox.scrollTop = 0;
}
}
"""
)
bottom_btn.click(
None,
inputs=[],
outputs=[],
js="""
function() {
const textbox = document.querySelector('#text_editor textarea');
if (textbox) {
textbox.scrollTop = textbox.scrollHeight;
}
}
"""
)
app = gr.mount_gradio_app(app, gradio_app, path="/") # Mount Gradio at root
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)