#!/usr/bin/env python3
"""
AK-Threads-Booster: Render human-readable markdown companion files from the tracker.

Usage:
    python render_companions.py --tracker threads_daily_tracker.json
    python render_companions.py --tracker threads_daily_tracker.json --lang en
    python render_companions.py --tracker threads_daily_tracker.json --output-dir /path/to/working-dir

Outputs (default zh filenames; English aliases via --lang en):
    - 歷史貼文-按時間排序.md  / posts_by_date.md    — full post archive, newest first
    - 歷史貼文-按主題分類.md  / posts_by_topic.md   — topic-grouped index
    - 留言記錄.md             / comments.md         — flat comment log

Behavior:
    - auto-detects existing filename convention (Chinese vs English) and preserves it
    - backs up companion files that were modified AFTER tracker.last_updated
      (assumes user hand-edited them) to <name>.bak-<ISO>
    - idempotent: safe to re-run after every tracker mutation
"""

import argparse
import json
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple


ZH_NAMES = {
    "by_date": "歷史貼文-按時間排序.md",
    "by_topic": "歷史貼文-按主題分類.md",
    "comments": "留言記錄.md",
}
EN_NAMES = {
    "by_date": "posts_by_date.md",
    "by_topic": "posts_by_topic.md",
    "comments": "comments.md",
}

HEADER_NOTICE_ZH = "> 此檔由 /setup 與 /refresh 自動生成。手動編輯會在下次重新生成時覆蓋。\n\n"
HEADER_NOTICE_EN = "> Auto-generated by /setup and /refresh. Manual edits will be overwritten.\n\n"


def parse_iso(s: Optional[str]) -> Optional[datetime]:
    if not s:
        return None
    s = s.strip()
    for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S",
                "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M", "%Y-%m-%d"):
        try:
            dt = datetime.strptime(s, fmt)
            if dt.tzinfo is None:
                dt = dt.replace(tzinfo=timezone.utc)
            return dt
        except ValueError:
            continue
    try:
        return datetime.fromisoformat(s.replace("Z", "+00:00"))
    except ValueError:
        return None


def fmt_dt(dt: Optional[datetime]) -> str:
    if dt is None:
        return "unknown"
    return dt.strftime("%Y-%m-%d %H:%M")


def resolve_filenames(output_dir: Path, lang: str) -> Dict[str, Path]:
    """Prefer existing filename if already present; else use lang default."""
    result = {}
    for key in ("by_date", "by_topic", "comments"):
        zh = output_dir / ZH_NAMES[key]
        en = output_dir / EN_NAMES[key]
        if zh.exists():
            result[key] = zh
        elif en.exists():
            result[key] = en
        else:
            result[key] = output_dir / (ZH_NAMES[key] if lang == "zh" else EN_NAMES[key])
    return result


def backup_if_user_modified(path: Path, tracker_mtime: float) -> None:
    """If path exists and was modified after the tracker, back it up."""
    if not path.exists():
        return
    if path.stat().st_mtime > tracker_mtime + 1:
        ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
        backup = path.with_suffix(path.suffix + f".bak-{ts}")
        backup.write_bytes(path.read_bytes())


def metrics_line(metrics: Dict[str, Any]) -> str:
    if not metrics:
        return ""
    vals = [(k, metrics.get(k)) for k in ("views", "likes", "replies", "reposts", "shares") if metrics.get(k)]
    if not vals:
        return ""
    return "**數據:** " + " ".join(f"{k}={v}" for k, v in vals)


def render_by_date(posts: List[Dict], notice: str, lang: str) -> str:
    title = "# Threads 歷史貼文（按時間排序）" if lang == "zh" else "# Threads posts by date"
    lines = [title, "", notice.rstrip() + "\n"]
    lines.append(f"> Total: {len(posts)} posts\n")

    last_month = None
    for p in posts:
        dt = parse_iso(p.get("created_at"))
        month = dt.strftime("%Y-%m") if dt else "unknown"
        if month != last_month:
            lines.append(f"\n## {month}\n")
            last_month = month

        lines.append(f"### {fmt_dt(dt)}\n")
        topics = p.get("topics") or []
        content_type = p.get("content_type") or (topics[0] if topics else "-")
        lines.append(f"**分類:** {content_type}  ")
        lines.append(f"**ID:** {p.get('id', '-')}  ")
        ml = metrics_line(p.get("metrics") or {})
        if ml:
            lines.append(ml + "  ")
        lines.append("")
        lines.append((p.get("text") or "").rstrip())
        lines.append("\n---\n")
    return "\n".join(lines)


def render_by_topic(posts: List[Dict], notice: str, lang: str, by_date_name: str) -> str:
    title = "# Threads 歷史貼文（按主題分類）" if lang == "zh" else "# Threads posts by topic"
    lines = [title, "", notice.rstrip() + "\n"]
    lines.append(f"> Indexed from {len(posts)} posts. See `{by_date_name}` for full text.\n")

    by_topic: Dict[str, List[Dict]] = {}
    for p in posts:
        topics = p.get("topics") or ["uncategorized"]
        for t in topics:
            by_topic.setdefault(t, []).append(p)

    for topic in sorted(by_topic.keys(), key=lambda k: (-len(by_topic[k]), k)):
        bucket = sorted(by_topic[topic], key=lambda p: parse_iso(p.get("created_at")) or datetime.min.replace(tzinfo=timezone.utc), reverse=True)
        lines.append(f"\n## {topic}  ({len(bucket)})\n")
        for p in bucket:
            dt = parse_iso(p.get("created_at"))
            preview = (p.get("text") or "").strip().split("\n", 1)[0][:80]
            lines.append(f"- **{fmt_dt(dt)}** — {preview}  \n  ID: `{p.get('id','-')}`")
    return "\n".join(lines)


def render_comments(posts: List[Dict], tracker: Dict, notice: str, lang: str) -> str:
    title = "# Threads 留言記錄" if lang == "zh" else "# Threads comments log"
    lines = [title, "", notice.rstrip() + "\n"]

    # collect (dt, post_ref, user, text) tuples
    items: List[Tuple[datetime, str, str, str]] = []
    for p in posts:
        pid = p.get("id", "-")
        pdate = fmt_dt(parse_iso(p.get("created_at")))
        ref = f"{pid} ({pdate})"
        for c in p.get("comments") or []:
            dt = parse_iso(c.get("created_at")) or datetime.min.replace(tzinfo=timezone.utc)
            user = c.get("user") or "-"
            items.append((dt, ref, user, (c.get("text") or "").strip()))

    items.sort(key=lambda x: x[0], reverse=True)
    lines.append(f"> Total: {len(items)} comments\n")

    for dt, ref, user, text in items:
        label = fmt_dt(dt) if dt.year > 1 else "unknown"
        lines.append(f"\n**[{label}]** on {ref} — @{user}")
        lines.append(text)

    unmatched = tracker.get("unmatched_comments") or []
    if unmatched:
        heading = "## 未配對留言" if lang == "zh" else "## Unmatched"
        lines.append(f"\n\n{heading}  ({len(unmatched)})\n")
        unmatched_sorted = sorted(unmatched, key=lambda c: parse_iso(c.get("created_at")) or datetime.min.replace(tzinfo=timezone.utc), reverse=True)
        for c in unmatched_sorted:
            label = fmt_dt(parse_iso(c.get("created_at")))
            lines.append(f"\n**[{label}]** @{c.get('user') or '-'}")
            lines.append((c.get("text") or "").strip())

    return "\n".join(lines)


def main(argv: Optional[List[str]] = None) -> int:
    ap = argparse.ArgumentParser(description="Render human-readable companion markdown files from the tracker JSON.")
    ap.add_argument("--tracker", required=True, help="Path to threads_daily_tracker.json")
    ap.add_argument("--output-dir", default=None, help="Where to write companion files (default: tracker's directory)")
    ap.add_argument("--lang", choices=["zh", "en"], default="zh", help="Default filename language when companions don't yet exist (default: zh)")
    args = ap.parse_args(argv)

    tracker_path = Path(args.tracker).resolve()
    if not tracker_path.exists():
        print(f"[render_companions] tracker not found: {tracker_path}", file=sys.stderr)
        return 1

    output_dir = Path(args.output_dir).resolve() if args.output_dir else tracker_path.parent
    output_dir.mkdir(parents=True, exist_ok=True)

    tracker = json.loads(tracker_path.read_text(encoding="utf-8"))
    posts_raw = tracker.get("posts") or []
    if isinstance(posts_raw, dict):
        print("[render_companions] tracker uses legacy dict shape for posts; run /setup Path E migration first.", file=sys.stderr)
        return 2

    posts = sorted(
        posts_raw,
        key=lambda p: parse_iso(p.get("created_at")) or datetime.min.replace(tzinfo=timezone.utc),
        reverse=True,
    )

    names = resolve_filenames(output_dir, args.lang)
    notice = HEADER_NOTICE_ZH if args.lang == "zh" else HEADER_NOTICE_EN
    tracker_mtime = tracker_path.stat().st_mtime

    for path in names.values():
        backup_if_user_modified(path, tracker_mtime)

    names["by_date"].write_text(render_by_date(posts, notice, args.lang), encoding="utf-8")
    names["by_topic"].write_text(render_by_topic(posts, notice, args.lang, names["by_date"].name), encoding="utf-8")
    names["comments"].write_text(render_comments(posts, tracker, notice, args.lang), encoding="utf-8")

    total_comments = sum(len(p.get("comments") or []) for p in posts)
    unmatched = len(tracker.get("unmatched_comments") or [])

    print(f"[render_companions] wrote:")
    for key, path in names.items():
        print(f"  {key}: {path.relative_to(output_dir) if path.is_relative_to(output_dir) else path}")
    print(f"[render_companions] posts rendered: {len(posts)}")
    print(f"[render_companions] comments rendered: {total_comments} (+ {unmatched} unmatched)")
    return 0


if __name__ == "__main__":
    sys.exit(main())
