import os
import re
import time
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
import urllib3

# --- CONFIG ---
ROOT_PATH = r"\\BOXICUBE\Docs\MAGAZINES\EIR"
BASE_URL = "https://larouchepub.com/eiw/public/"

# Suppress SSL warnings since we’re using verify=False
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# --- FUNCTIONS ---

def ask_year_range():
    yr_range = input("Enter year range (YYYY..YYYY): ").strip()
    try:
        start, end = map(int, yr_range.split(".."))
        return range(start, end + 1)
    except:
        print("Invalid format. Use YYYY..YYYY")
        return ask_year_range()

def detect_month(filename):
    """Extract month from filename: eirv13n02-19860110.pdf -> 01"""
    m = re.search(r'(\d{4})(\d{2})(\d{2})', filename)
    if m:
        return m.group(2)
    return "00"

def safe_get(url, retries=3, delay=3):
    """Wrapper around requests.get with retries, skipping SSL verification."""
    for attempt in range(retries):
        try:
            r = requests.get(url, timeout=15, verify=False)
            r.raise_for_status()
            return r
        except Exception as e:
            print(f"  Attempt {attempt+1} failed for {url}: {e}")
            time.sleep(delay)
    return None

def download_file(file_url, year, counter):
    filename = file_url.split("/")[-1]
    # If it's a .php, rename to .pdf for saving
    if filename.endswith(".php"):
        filename = filename.replace(".php", ".pdf")

    month = detect_month(filename)
    folder = os.path.join(ROOT_PATH, str(year), month)
    os.makedirs(folder, exist_ok=True)
    filepath = os.path.join(folder, filename)

    # ✅ Check before making any request
    if os.path.exists(filepath):
        print(f"Already downloaded: {filename}")
        return counter

    print(f"Downloading {filename} → {folder}")
    r = safe_get(file_url)
    if r:
        try:
            with open(filepath, "wb") as f:
                f.write(r.content)
            counter += 1
        except Exception as e:
            print("  ERROR writing file:", e)
    return counter

def process_issue(issue_url, year, counter):
    r = safe_get(issue_url)
    if not r:
        print("  Failed to process issue page")
        return counter
    soup = BeautifulSoup(r.text, "html.parser")
    button = soup.find("button", class_="buttonA")
    if button:
        onclick = button.get("onclick", "")
        m = re.search(r"window.location=['\"](.+?)['\"]", onclick)
        if m:
            file_url = urljoin(issue_url, m.group(1))
            counter = download_file(file_url, year, counter)
        else:
            print("  Could not parse file URL")
    else:
        print("  No download button found")
    return counter

def process_year(year):
    year_url = urljoin(BASE_URL, f"{year}/")
    print(f"\nProcessing year {year}: {year_url}")
    r = safe_get(year_url)
    if not r:
        print("  Failed to process year page")
        return 0
    soup = BeautifulSoup(r.text, "html.parser")
    columns = soup.find_all("div", class_=re.compile(r"column\d"))
    if not columns:
        print(f"No columns found for {year}")
        return 0
    counter = 0
    for col in columns:
        links = col.find_all("a", href=True)
        for a in links:
            issue_href = a["href"]

            # Fix: handle folder vs. .html links properly
            if issue_href.endswith("/"):
                issue_url = urljoin(year_url, issue_href + "index.html")
            elif issue_href.endswith(".html"):
                issue_url = urljoin(year_url, issue_href)
            else:
                issue_url = urljoin(year_url, issue_href + "/index.html")

            print(f"  Found issue: {issue_url}")
            counter = process_issue(issue_url, year, counter)
    print(f"\nYear {year}: downloaded {counter} files")
    return counter

# --- MAIN ---
def main():
    years = ask_year_range()
    total = 0
    for year in years:
        total += process_year(year)
    print(f"\nAll done! Total files downloaded: {total}")

if __name__ == "__main__":
    main()
