#!/usr/bin/env python3
import argparse, os, sys, json, csv, html,re, datetime as dt
from pathlib import Path
from urllib.parse import urlparse, parse_qs
from telethon import TelegramClient
from telethon.errors import RPCError
from telethon.tl.types import Message, User, Chat, Channel

try:
    from dotenv import load_dotenv
    load_dotenv()
except Exception:
    pass

def env(name, default=None, cast=str):
    v = os.getenv(name, default)
    if v is None: return None
    return cast(v) if (cast and v is not None) else v

def parse_date(s):
    if not s: return None
    for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M"):
        try: return dt.datetime.strptime(s, fmt)
        except ValueError: pass
    raise ValueError(f"Bad date: {s} (use YYYY-MM-DD or YYYY-MM-DDTHH:MM)")

def to_row(m: Message, media_path: str = ""):
    return {
        "id": m.id,
        "date": m.date.isoformat() if m.date else "",
        "sender_id": getattr(m.from_id, "user_id", None) or getattr(m.from_id, "channel_id", None) or "",
        "text": m.message or "",
        "reply_to_msg_id": m.reply_to_msg_id or "",
        "views": m.views or "",
        "forwards": m.forwards or "",
        "reactions": ",".join([f"{r.reaction}: {r.count}" for r in (m.reactions.results if m.reactions else [])]) if getattr(m, "reactions", None) else "",
        "media": type(m.media).__name__ if m.media else "",
        "media_path": media_path,
        "entities": type(m.entities).__name__ if m.entities else "",
    }

def ensure_dir(p: str | Path):
    Path(p).mkdir(parents=True, exist_ok=True)
    return Path(p)
    
def youtube_thumb(url: str) -> str | None:
    """Return a YouTube thumbnail URL if the link is a YouTube/Shorts link, else None."""
    try:
        u = urlparse(url)
        host = (u.netloc or "").lower()
        path = u.path or ""
        vid = None
        if "youtube.com" in host:
            if path.startswith("/watch"):
                q = parse_qs(u.query or "")
                vid = (q.get("v") or [None])[0]
            elif path.startswith("/shorts/") or path.startswith("/live/"):
                parts = path.strip("/").split("/")
                if len(parts) >= 2:
                    vid = parts[1]
        elif "youtu.be" in host:
            vid = path.strip("/").split("/")[0] or None
        if vid:
            return f"https://i.ytimg.com/vi/{vid}/hqdefault.jpg"
    except Exception:
        pass
    return None


def write_html_single(out_path: Path, chat_label: str, rows: list[dict]):
    ensure_dir(out_path.parent)
def row_to_html(r):
    body_text = r.get("text") or ""
    body = html.escape(body_text).replace("\n", "<br>")
    meta = []
    if r["sender_id"]: meta.append(f"from: {r['sender_id']}")
    if r["views"]: meta.append(f"views: {r['views']}")
    if r["forwards"]: meta.append(f"fwd: {r['forwards']}")
    if r["reactions"]: meta.append(f"react: {html.escape(r['reactions'])}")
    if r["reply_to_msg_id"]: meta.append(f"reply→{r['reply_to_msg_id']}")
    meta_str = " • ".join(meta)

    # YouTube previews (detect any URLs in the text)
    previews = []
    for m in re.finditer(r'https?://\S+', body_text):
        u = m.group(0)
        th = youtube_thumb(u)
        if th:
            previews.append(f'<a class="yt" href="{html.escape(u)}" target="_blank" rel="noopener"><img src="{html.escape(th)}" alt="YouTube thumbnail"></a>')
    previews_html = "".join(previews)

    # Existing media (files/photos/videos you downloaded)
    media_html = ""
    if r["media_path"]:
        rel = html.escape(r["media_path"])
        lower = rel.lower()
        if lower.endswith((".png",".jpg",".jpeg",".gif",".webp",".bmp")):
            media_html = f'<div class="media"><img src="{rel}" alt="media"></div>'
        elif lower.endswith((".mp4",".webm",".ogg")):
            media_html = f'<div class="media"><video src="{rel}" controls></video></div>'
        else:
            media_html = f'<div class="media"><a href="{rel}">Download attachment</a></div>'

    return f"""
    <article id="m{r['id']}">
      <header>
        <a class="msgid" href="#m{r['id']}">#{r['id']}</a>
        <time>{html.escape(r['date'])}</time>
        <span class="meta">{meta_str}</span>
      </header>
      <div class="text">{body}</div>
      <div class="previews">{previews_html}</div>
      {media_html}
    </article>
    """

    items = "\n".join(row_to_html(r) for r in rows)
    title = f"Telegram export — {html.escape(chat_label)}"
    doc = f"""<!doctype html>
<html lang="en">
<meta charset="utf-8">
<title>{title}</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body{{font:14px/1.4 system-ui,Segoe UI,Arial,sans-serif;max-width:900px;margin:2rem auto;padding:0 1rem;background:#0b0b0b;color:#eaeaea}}
h1{{font-size:1.4rem;margin:0 0 1rem}}
header.top{{display:flex;gap:1rem;align-items:baseline;justify-content:space-between}}
article{{border:1px solid #2a2a2a;border-radius:14px;padding:12px 14px;margin:12px 0;background:#141414;box-shadow:0 1px 2px rgba(0,0,0,.2)}}
article header{{display:flex;gap:.8rem;align-items:center;font-size:.85rem;color:#bdbdbd}}
article header .msgid{{text-decoration:none;color:#8ab4f8}}
article .text{{margin-top:.3rem;white-space:normal;word-wrap:break-word}}
.media img, .media video{max-width:100%;height:auto;border-radius:10px;margin-top:.6rem}
.previews img{max-width:100%;height:auto;border-radius:10px;margin-top:.6rem;display:block}

footer{{opacity:.7;font-size:.8rem;margin:2rem 0}}
</style>
<body>
<header class="top">
  <h1>{title}</h1>
  <div>{len(rows)} messages</div>
</header>
{items}
<footer>Generated by tg_fetch_default_truedelta.py</footer>
</body>
</html>
"""
    out_path.write_text(doc, encoding="utf-8")

def print_dialog_row(d, out=sys.stdout):
    ent = d.entity
    if isinstance(ent, User):
        typ = "user"; uname = getattr(ent, "username", None); forum = "-"
    elif isinstance(ent, Chat):
        typ = "group"; uname = getattr(ent, "username", None); forum = "no"
    elif isinstance(ent, Channel):
        typ = "channel" if ent.broadcast else ("supergroup" if ent.megagroup else "channel")
        uname = getattr(ent, "username", None); forum = "yes" if getattr(ent, "forum", False) else "no"
    else:
        typ = "unknown"; uname = None; forum = "-"
    name = d.name or ""
    uid = d.id
    uname = uname or ""
    print(f"{uid}\t{typ}\t{forum}\t{name}\t{uname}", file=out)

def main():
    p = argparse.ArgumentParser(description="Fetch/export Telegram messages from a chat or a forum topic.")
    # list/search
    p.add_argument("--list", action="store_true", help="List your dialogs and exit.")
    p.add_argument("--find", help="Filter list by case-insensitive substring (name or username).")

    # selection
    p.add_argument("--from", dest="chat", default="@TrueDelta", help="Target chat: @username or numeric id (-100...). Defaults to @TrueDelta.")
    p.add_argument("--topic-id", type=int, help="Forum topic id (root message id).")
    p.add_argument("--limit", type=int, default=200, help="Max messages (0 = all).")
    p.add_argument("--since", help="Start date (YYYY-MM-DD or YYYY-MM-DDTHH:MM).")
    p.add_argument("--until", help="End date (YYYY-MM-DD or YYYY-MM-DDTHH:MM).")
    p.add_argument("--query", help="Substring filter on text (case-insensitive).")
    p.add_argument("--reverse", action="store_true", help="Oldest first (default newest first).")

    # outputs
    p.add_argument("--out-json", help="Write results to JSON.")
    p.add_argument("--out-csv", help="Write results to CSV.")
    p.add_argument("--print", action="store_true", help="Print to stdout.")
    p.add_argument("--media-dir", help="Download media to this folder (optional).")
    p.add_argument("--out-html", help="Write a single combined HTML file here.")

    # auth/env
    p.add_argument("--session", default=env("SESSION","tg_session"))
    p.add_argument("--api-id", type=int, default=env("API_ID", None, int))
    p.add_argument("--api-hash", default=env("API_HASH"))
    p.add_argument("--phone", default=env("PHONE_NUMBER"))

    args = p.parse_args()

    if not args.api_id or not args.api_hash:
        print("ERROR: API_ID/API_HASH missing (.env or flags).", file=sys.stderr)
        sys.exit(1)

    since_dt = parse_date(args.since) if args.since else None
    until_dt = parse_date(args.until) if args.until else None
    text_q = args.query.lower() if args.query else None
    lim = None if args.limit == 0 else args.limit

    client = TelegramClient(args.session, args.api_id, args.api_hash)

    async def run():
        # Login if needed
        if not await client.is_user_authorized():
            if not args.phone:
                print("First login requires --phone or PHONE_NUMBER in .env", file=sys.stderr)
                sys.exit(1)
            await client.send_code_request(args.phone)
            code = input("Enter the login code you received in Telegram: ").strip()
            try:
                await client.sign_in(args.phone, code)
            except RPCError as e:
                print(f"Login failed: {e}", file=sys.stderr)
                sys.exit(1)

        # LIST mode
        if args.list:
            print("id\ttype\tforum\tname\tusername")
            async for d in client.iter_dialogs():
                if args.find:
                    q = args.find.lower()
                    nm = (d.name or "").lower()
                    un = (getattr(d.entity, "username", "") or "").lower()
                    if q not in nm and q not in un:
                        continue
                print_dialog_row(d)
            return

        # Resolve chat
        try:
            entity = await client.get_entity(args.chat)
        except Exception as e:
            print(f"Could not resolve chat '{args.chat}': {e}", file=sys.stderr)
            sys.exit(1)

        results = []
        iter_kwargs = {"entity": entity, "limit": lim, "reverse": args.reverse}
        if args.topic_id:
            iter_kwargs["reply_to"] = args.topic_id

        # Prepare media dir if requested
        media_dir = None
        if args.media_dir:
            media_dir = ensure_dir(args.media_dir)

        # Fetch
        async for m in client.iter_messages(**iter_kwargs):
            # Date filters
            if since_dt and (m.date is None or m.date < since_dt):
                if not args.reverse: break
            if until_dt and (m.date is None or m.date > until_dt):
                if args.reverse: break
                else: continue

            # Text filter
            if text_q:
                body = (m.message or "")
                if text_q not in body.lower():
                    continue

            # Download media if asked
            media_path = ""
            if media_dir and m.media:
                try:
                    saved = await client.download_media(m, file=str(media_dir / ""))
                    if saved:
                        media_path = str(Path(saved))
                except Exception as e:
                    print(f"Media download failed for msg {m.id}: {e}", file=sys.stderr)

            results.append(to_row(m, media_path))

        # Outputs: CSV/JSON/print
        if args.out_json:
            ensure_dir(Path(args.out_json).parent)
            with open(args.out_json, "w", encoding="utf-8") as f:
                json.dump(results, f, ensure_ascii=False, indent=2)
        if args.out_csv:
            ensure_dir(Path(args.out_csv).parent)
            with open(args.out_csv, "w", newline="", encoding="utf-8") as f:
                flds = ["id","date","sender_id","text","reply_to_msg_id","views","forwards","reactions","media","media_path","entities"]
                w = csv.DictWriter(f, fieldnames=flds)
                w.writeheader()
                for r in results:
                    w.writerow(r)

        # HTML output
        if args.out_html:
            write_html_single(Path(args.out_html), str(args.chat), results)

        # Print/log
        if args.print or (not args.out_json and not args.out_csv and not args.out_html):
            for r in results:
                print(f"[{r['id']}] {r['date']}  {r['text'][:120].replace('\\n',' ')}")
        print(f"Fetched {len(results)} messages.", file=sys.stderr)

    with client:
        client.loop.run_until_complete(run())

if __name__ == "__main__":
    main()
