""" Audiobook Creator Copyright (C) 2025 Prakhar Sharma This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . """ import traceback import os from dotenv import load_dotenv import random import asyncio from openai import AsyncOpenAI import traceback import re load_dotenv() NO_THINK_MODE = os.environ.get("NO_THINK_MODE", "true") # Retry configuration MAX_RETRIES = 3 BASE_DELAY = 0.1 # Base delay in seconds MAX_DELAY = 10 # Maximum delay in seconds def clean_thinking_tags(response_text): """ Remove XML tags and their content from LLM responses. Args: response_text (str): The raw response from the LLM Returns: str: The cleaned response with thinking tags removed """ if not response_text: return response_text # Remove ... blocks (including multiline) cleaned = re.sub(r'.*?', '', response_text, flags=re.DOTALL) # Remove content before closing tag if opening tag is missing cleaned = re.sub(r'^.*?', '', cleaned, flags=re.DOTALL) # Remove any remaining standalone thinking tags cleaned = re.sub(r'', '', cleaned) # Clean up extra whitespace and newlines cleaned = cleaned.strip() return cleaned def check_if_have_to_include_no_think_token(): if NO_THINK_MODE == True or NO_THINK_MODE == "true": return "/no_think" else: return "" async def check_if_llm_is_up(async_openai_client, model_name): try: response = await async_openai_client.chat.completions.create( model=model_name, messages=[ {"role": "user", "content": f"{check_if_have_to_include_no_think_token()} Hello, this is a health test. Reply with any word if you're working."} ] ) # Clean the response from thinking tags raw_content = response.choices[0].message.content.strip() cleaned_content = clean_thinking_tags(raw_content) return True, cleaned_content except Exception as e: traceback.print_exc() return False, "Your configured LLM is not working. Please check if the .env file is correctly set up. Error: " + str(e) async def generate_audio_with_retry(client: AsyncOpenAI, tts_model: str, text_to_speak: str, voice_to_speak_in: str, max_retries=MAX_RETRIES): """ Generate audio with retry mechanism and exponential backoff. Args: client: The AsyncOpenAI client instance tts_model: The TTS model to use text_to_speak: The text to convert to speech voice_to_speak_in: The voice to use for TTS max_retries: Maximum number of retry attempts Returns: bytearray: Audio data buffer Raises: Exception: If all retry attempts fail """ last_exception = None for attempt in range(max_retries + 1): try: # Create an in-memory buffer for the audio data audio_buffer = bytearray() # Generate audio for the part async with client.audio.speech.with_streaming_response.create( model=tts_model, voice=voice_to_speak_in, response_format="wav", speed=0.85, input=text_to_speak, timeout=600 ) as response: async for chunk in response.iter_bytes(): audio_buffer.extend(chunk) # If we reach here, the request was successful if attempt > 0: print(f"Successfully generated audio after {attempt} retry attempts") return audio_buffer except Exception as e: traceback.print_exc() print(f"Error: {e}") last_exception = e if attempt < max_retries: # Calculate delay with exponential backoff and jitter delay = min(BASE_DELAY * (2 ** attempt), MAX_DELAY) # Add jitter to prevent thundering herd jitter = random.uniform(0, 0.1) * delay total_delay = delay + jitter print(f"Connection error on attempt {attempt + 1}/{max_retries + 1}: {e}") print(f"Retrying in {total_delay:.2f} seconds...") await asyncio.sleep(total_delay) continue else: # Either max retries reached or non-connection error print(f"Failed to generate audio after {attempt + 1} attempts: {e}") break # If we reach here, all retry attempts failed raise Exception(f"Failed to generate audio after {max_retries + 1} attempts. Last error: {last_exception}")