#!/usr/bin/env python3
import argparse, os, sys, json, csv, html, datetime as dt
from pathlib import Path
from telethon import TelegramClient
from telethon.errors import RPCError
from telethon.tl.types import Message

try:
    from dotenv import load_dotenv
    load_dotenv()
except Exception:
    pass

def env(name, default=None, cast=str):
    v = os.getenv(name, default)
    if v is None: return None
    return cast(v) if (cast and v is not None) else v

def parse_date(s):
    if not s: return None
    for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M"):
        try: return dt.datetime.strptime(s, fmt)
        except ValueError: pass
    raise ValueError(f"Bad date: {s} (use YYYY-MM-DD or YYYY-MM-DDTHH:MM)")

def to_row(m: Message, media_path: str = ""):
    return {
        "id": m.id,
        "date": m.date.isoformat() if m.date else "",
        "sender_id": getattr(m.from_id, "user_id", None) or getattr(m.from_id, "channel_id", None) or "",
        "text": m.message or "",
        "reply_to_msg_id": m.reply_to_msg_id or "",
        "views": m.views or "",
        "forwards": m.forwards or "",
        "reactions": ",".join([f"{r.reaction}: {r.count}" for r in (m.reactions.results if m.reactions else [])]) if getattr(m, "reactions", None) else "",
        "media": type(m.media).__name__ if m.media else "",
        "media_path": media_path,
        "entities": type(m.entities).__name__ if m.entities else "",
    }

def ensure_dir(p: str | Path):
    Path(p).mkdir(parents=True, exist_ok=True)
    return Path(p)

def write_html_single(out_path: Path, chat_label: str, rows: list[dict]):
    ensure_dir(out_path.parent)
    def row_to_html(r):
        body = html.escape(r["text"]).replace("\n", "<br>")
        meta = []
        if r["sender_id"]: meta.append(f"from: {r['sender_id']}")
        if r["views"]: meta.append(f"views: {r['views']}")
        if r["forwards"]: meta.append(f"fwd: {r['forwards']}")
        if r["reactions"]: meta.append(f"react: {html.escape(r['reactions'])}")
        if r["reply_to_msg_id"]: meta.append(f"reply→{r['reply_to_msg_id']}")
        meta_str = " • ".join(meta)
        media_html = ""
        if r["media_path"]:
            rel = html.escape(r["media_path"])
            # naive embed for common types
            lower = rel.lower()
            if lower.endswith((".png",".jpg",".jpeg",".gif",".webp",".bmp")):
                media_html = f'<div class="media"><img src="{rel}" alt="media"></div>'
            elif lower.endswith((".mp4",".webm",".ogg")):
                media_html = f'<div class="media"><video src="{rel}" controls></video></div>'
            else:
                media_html = f'<div class="media"><a href="{rel}">Download attachment</a></div>'
        return f"""
        <article id="m{r['id']}">
          <header>
            <a class="msgid" href="#m{r['id']}">#{r['id']}</a>
            <time>{html.escape(r['date'])}</time>
            <span class="meta">{meta_str}</span>
          </header>
          <div class="text">{body}</div>
          {media_html}
        </article>
        """

    items = "\n".join(row_to_html(r) for r in rows)
    title = f"Telegram export — {html.escape(chat_label)}"
    doc = f"""<!doctype html>
<html lang="en">
<meta charset="utf-8">
<title>{title}</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body{{font:14px/1.4 system-ui,Segoe UI,Arial,sans-serif;max-width:900px;margin:2rem auto;padding:0 1rem;background:#0b0b0b;color:#eaeaea}}
h1{{font-size:1.4rem;margin:0 0 1rem}}
header.top{{display:flex;gap:1rem;align-items:baseline;justify-content:space-between}}
article{{border:1px solid #2a2a2a;border-radius:14px;padding:12px 14px;margin:12px 0;background:#141414;box-shadow:0 1px 2px rgba(0,0,0,.2)}}
article header{{display:flex;gap:.8rem;align-items:center;font-size:.85rem;color:#bdbdbd}}
article header .msgid{{text-decoration:none;color:#8ab4f8}}
article .text{{margin-top:.3rem;white-space:normal;word-wrap:break-word}}
.media img, .media video{{max-width:100%;height:auto;border-radius:10px;margin-top:.6rem}}
footer{{opacity:.7;font-size:.8rem;margin:2rem 0}}
</style>
<body>
<header class="top">
  <h1>{title}</h1>
  <div>{len(rows)} messages</div>
</header>
{items}
<footer>Generated by tg_fetch.py</footer>
</body>
</html>
"""
    out_path.write_text(doc, encoding="utf-8")

def write_html_per_message(out_dir: Path, chat_label: str, rows: list[dict]):
    ensure_dir(out_dir)
    # index page
    idx_items = []
    for r in rows:
        fname = f"msg_{r['id']}.html"
        idx_items.append(f'<li><a href="{fname}">#{r["id"]}</a> <time>{html.escape(r["date"])}</time> — {html.escape((r["text"] or "")[:80])}</li>')
        # page
        body = html.escape(r["text"]).replace("\n", "<br>")
        media_html = ""
        if r["media_path"]:
            rel = Path(r["media_path"]).name if Path(r["media_path"]).is_absolute() else r["media_path"]
            rel = html.escape(rel)
            lower = rel.lower()
            if lower.endswith((".png",".jpg",".jpeg",".gif",".webp",".bmp")):
                media_html = f'<div class="media"><img src="../media/{rel}" alt="media"></div>' if "../" not in rel else f'<div class="media"><img src="{rel}" alt="media"></div>'
            elif lower.endswith((".mp4",".webm",".ogg")):
                media_html = f'<div class="media"><video src="../media/{rel}" controls></video></div>' if "../" not in rel else f'<div class="media"><video src="{rel}" controls></video></div>'
            else:
                media_html = f'<div class="media"><a href="../media/{rel}">Download attachment</a></div>' if "../" not in rel else f'<div class="media"><a href="{rel}">Download attachment</a></div>'

        page = f"""<!doctype html>
<html lang="en"><meta charset="utf-8">
<title>#{r['id']} — {html.escape(chat_label)}</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="../style.css">
<body>
<a href="index.html">← back</a>
<article id="m{r['id']}">
  <header>
    <a class="msgid" href="#m{r['id']}">#{r['id']}</a>
    <time>{html.escape(r['date'])}</time>
  </header>
  <div class="text">{body}</div>
  {media_html}
</article>
</body></html>
"""
        (out_dir / fname).write_text(page, encoding="utf-8")

    index = f"""<!doctype html>
<html lang="en"><meta charset="utf-8">
<title>Telegram export — {html.escape(chat_label)}</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="../style.css">
<body>
<h1>Telegram export — {html.escape(chat_label)}</h1>
<p>{len(rows)} messages.</p>
<ol>
{'\n'.join(idx_items)}
</ol>
</body></html>
"""
    (out_dir / "index.html").write_text(index, encoding="utf-8")

    # shared CSS
    css = """body{font:14px/1.4 system-ui,Segoe UI,Arial,sans-serif;max-width:900px;margin:2rem auto;padding:0 1rem;background:#0b0b0b;color:#eaeaea}
h1{font-size:1.4rem;margin:0 0 1rem}
article{border:1px solid #2a2a2a;border-radius:14px;padding:12px 14px;margin:12px 0;background:#141414;box-shadow:0 1px 2px rgba(0,0,0,.2)}
article header{display:flex;gap:.8rem;align-items:center;font-size:.85rem;color:#bdbdbd}
article header .msgid{text-decoration:none;color:#8ab4f8}
article .text{margin-top:.3rem;white-space:normal;word-wrap:break-word}
.media img, .media video{max-width:100%;height:auto;border-radius:10px;margin-top:.6rem}
a{color:#8ab4f8}
"""
    (out_dir.parent / "style.css").write_text(css, encoding="utf-8")

def main():
    p = argparse.ArgumentParser(description="Fetch/export Telegram messages from a chat or a forum topic.")
    # selection
    p.add_argument("--from", dest="chat", required=True, help="Target chat: @username or numeric id (-100...).")
    p.add_argument("--topic-id", type=int, help="Forum topic id (root message id).")
    p.add_argument("--limit", type=int, default=200, help="Max messages (0 = all).")
    p.add_argument("--since", help="Start date (YYYY-MM-DD or YYYY-MM-DDTHH:MM).")
    p.add_argument("--until", help="End date (YYYY-MM-DD or YYYY-MM-DDTHH:MM).")
    p.add_argument("--query", help="Substring filter on text (case-insensitive).")
    p.add_argument("--reverse", action="store_true", help="Oldest first (default newest first).")

    # outputs
    p.add_argument("--out-json", help="Write results to JSON.")
    p.add_argument("--out-csv", help="Write results to CSV.")
    p.add_argument("--print", action="store_true", help="Print to stdout.")
    p.add_argument("--media-dir", help="Download media to this folder (optional).")
    p.add_argument("--out-html", help="Write a single combined HTML file here.")
    p.add_argument("--out-html-dir", help="Write one HTML file per message into this folder (also writes an index.html).")

    # auth/env
    p.add_argument("--session", default=env("SESSION","tg_session"))
    p.add_argument("--api-id", type=int, default=env("API_ID", None, int))
    p.add_argument("--api-hash", default=env("API_HASH"))
    p.add_argument("--phone", default=env("PHONE_NUMBER"))

    args = p.parse_args()

    if not args.api_id or not args.api_hash:
        print("ERROR: API_ID/API_HASH missing (.env or flags).", file=sys.stderr)
        sys.exit(1)

    since_dt = parse_date(args.since) if args.since else None
    until_dt = parse_date(args.until) if args.until else None
    text_q = args.query.lower() if args.query else None
    lim = None if args.limit == 0 else args.limit

    client = TelegramClient(args.session, args.api_id, args.api_hash)

    async def run():
        # Login if needed
        if not await client.is_user_authorized():
            if not args.phone:
                print("First login requires --phone or PHONE_NUMBER in .env", file=sys.stderr)
                sys.exit(1)
            await client.send_code_request(args.phone)
            code = input("Enter the login code you received in Telegram: ").strip()
            try:
                await client.sign_in(args.phone, code)
            except RPCError as e:
                print(f"Login failed: {e}", file=sys.stderr)
                sys.exit(1)

        # Resolve chat
        try:
            entity = await client.get_entity(args.chat)
        except Exception as e:
            print(f"Could not resolve chat '{args.chat}': {e}", file=sys.stderr)
            sys.exit(1)

        results = []
        iter_kwargs = {"entity": entity, "limit": lim, "reverse": args.reverse}
        if args.topic_id:
            iter_kwargs["reply_to"] = args.topic_id

        # Prepare media dir if requested
        media_dir = None
        if args.media_dir:
            media_dir = ensure_dir(args.media_dir)

        # Fetch
        async for m in client.iter_messages(**iter_kwargs):
            # Date filters
            if since_dt and (m.date is None or m.date < since_dt):
                if not args.reverse: break
            if until_dt and (m.date is None or m.date > until_dt):
                if args.reverse: break
                else: continue

            # Text filter
            if text_q:
                body = (m.message or "")
                if text_q not in body.lower():
                    continue

            # Download media if asked
            media_path = ""
            if media_dir and m.media:
                try:
                    saved = await client.download_media(m, file=str(media_dir / ""))
                    if saved:
                        # store relative path from HTML perspective later
                        media_path = str(Path(saved))
                except Exception as e:
                    print(f"Media download failed for msg {m.id}: {e}", file=sys.stderr)

            results.append(to_row(m, media_path))

        # Outputs: CSV/JSON/print
        if args.out_json:
            ensure_dir(Path(args.out_json).parent)
            with open(args.out_json, "w", encoding="utf-8") as f:
                json.dump(results, f, ensure_ascii=False, indent=2)
        if args.out_csv:
            ensure_dir(Path(args.out_csv).parent)
            with open(args.out_csv, "w", newline="", encoding="utf-8") as f:
                flds = ["id","date","sender_id","text","reply_to_msg_id","views","forwards","reactions","media","media_path","entities"]
                w = csv.DictWriter(f, fieldnames=flds)
                w.writeheader()
                for r in results:
                    w.writerow(r)

        # HTML outputs
        chat_label = str(args.chat)
        if args.out_html:
            write_html_single(Path(args.out_html), chat_label, results)
        if args.out_html_dir:
            outdir = ensure_dir(args.out_html_dir)
            # If media_dir given, try to reference media relatively from a sibling "media" folder
            # Recommend: use --media-dir "<out-html-dir>/../media" for simplest relative paths
            write_html_per_message(outdir, chat_label, results)

        # Print/log
        if args.print or (not args.out_json and not args.out_csv and not args.out_html and not args.out_html_dir):
            for r in results:
                print(f"[{r['id']}] {r['date']}  {r['text'][:120].replace('\\n',' ')}")
        print(f"Fetched {len(results)} messages.", file=sys.stderr)

    with client:
        client.loop.run_until_complete(run())

if __name__ == "__main__":
    main()
