""" Audiobook Creator Copyright (C) 2025 Prakhar Sharma This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . """ import re import os import time import json import random import asyncio import traceback from openai import OpenAI, AsyncOpenAI import warnings from utils.file_utils import empty_file from utils.llm_utils import check_if_llm_is_up from utils.character_extraction_llm import llm_identify_characters_and_output_to_jsonl from dotenv import load_dotenv load_dotenv() CHARACTER_IDENTIFICATION_LLM_BASE_URL=os.environ.get("CHARACTER_IDENTIFICATION_LLM_BASE_URL", "http://localhost:1234/v1") CHARACTER_IDENTIFICATION_LLM_API_KEY=os.environ.get("CHARACTER_IDENTIFICATION_LLM_API_KEY", "lm-studio") CHARACTER_IDENTIFICATION_LLM_MODEL_NAME=os.environ.get("CHARACTER_IDENTIFICATION_LLM_MODEL_NAME", "Qwen/Qwen3-30B-A3B-Instruct-2507") async_openai_client = AsyncOpenAI(base_url=CHARACTER_IDENTIFICATION_LLM_BASE_URL, api_key=CHARACTER_IDENTIFICATION_LLM_API_KEY) model_name = CHARACTER_IDENTIFICATION_LLM_MODEL_NAME def extract_dialogues(text): """Extract dialogue lines enclosed in quotes.""" return re.findall(r'("[^"]+")', text) async def identify_characters_and_output_book_to_jsonl(text: str): """ TWO-PASS APPROACH: Extract all characters first, then attribute dialogue. Pass 1: Extract ALL characters from entire text (including dialogue) Pass 2: Attribute speakers to dialogue (pure matching, no character creation) Args: text (str): The input text to be processed, typically a book or script. Outputs: - speaker_attributed_book.jsonl: A JSONL file where each line contains a speaker and their corresponding dialogue or narration. - character_gender_map.json: A JSON file containing gender and age scores for each character. """ # Clear the output JSONL file empty_file("speaker_attributed_book.jsonl") yield("Identifying Characters. Progress 0%") async for update in llm_identify_characters_and_output_to_jsonl( text, async_openai_client, model_name ): yield update yield "Character Identification Completed. You can now move onto the next step (Audiobook generation)." async def process_book_and_identify_characters(): is_llm_up, message = await check_if_llm_is_up(async_openai_client, model_name) if not is_llm_up: raise Exception(message) f = open("converted_book.txt", "r", encoding='utf-8') book_text = f.read() async for update in identify_characters_and_output_book_to_jsonl(book_text): yield update async def main(): f = open("converted_book.txt", "r", encoding='utf-8') book_text = f.read() # Start processing start_time = time.time() print("\nšŸ” Identifying characters and processing the book...") async for update in identify_characters_and_output_book_to_jsonl(book_text): print(update) end_time = time.time() # Calculate execution time execution_time = end_time - start_time print(f"\nā±ļø **Execution Time:** {execution_time:.6f} seconds") # Completion message print("\nāœ… **Character identification complete!**") print("šŸŽ§ Next, run the following script to generate the audiobook:") print(" āžœ `python generate_audiobook.py`") print("\nšŸš€ Happy audiobook creation!\n") if __name__ == "__main__": asyncio.run(main())