#!/usr/bin/env python3
"""
The Bridge — Claude outline → DaVinci Resolve FCPXML 1.9 timeline.

Takes a documentary outline (markdown with timecodes, or SRT) and emits an
FCPXML 1.9 file that DaVinci Resolve imports cleanly via File → Import → Timeline.

The output timeline has:
  - V1 video track with named empty placeholder clips at each beat position
    (clip name = beat label, so you see "HERO SETUP", "VILLAIN PRESSURE", etc.
    sitting on your timeline like sticky notes)
  - Color-coded markers at each beat start, matched to storytelling role
    (red=hero, yellow=villain, green=crescendo, blue=exposition, purple=valley,
    cyan=other)

You stop hunting for timecodes. You start filling in the gaps under labelled
placeholders that already exist on your timeline.

Usage:
  python bridge.py outline.md --fps 24 --out timeline.fcpxml
  python bridge.py outline.srt --fps 23.976
  python bridge.py outline.md --fps 25 --project "Architect Doc v1"

Input formats supported:
  1. Markdown outline with timecodes, e.g.:
       ## 00:00:10 — Hero Setup
       Architect walks through the construction site.
       The wind picks up.

       ## 00:00:24 — Villain Pressure
       Cut to the developer's office...

  2. Bracketed-timecode markdown:
       [00:00:10] HERO: Architect walks through site
       [00:00:24] VILLAIN: Developer's office

  3. SRT subtitle file (standard format)

No dependencies. Python 3.9+. Works offline.
"""
from __future__ import annotations

import argparse
import html
import re
import sys
import uuid
from dataclasses import dataclass, field
from fractions import Fraction
from pathlib import Path
from typing import Iterable


# --------------------------------------------------------------------------- #
# Beat model                                                                  #
# --------------------------------------------------------------------------- #


BEAT_COLORS = {
    "hero":       ("Hero",       "1 0.4 0.4 1"),       # red
    "villain":    ("Villain",    "1 1 0.3 1"),         # yellow
    "crescendo":  ("Crescendo",  "0.4 1 0.4 1"),       # green
    "climax":     ("Crescendo",  "0.4 1 0.4 1"),       # green (alias)
    "exposition": ("Exposition", "0.4 0.6 1 1"),       # blue
    "opening":    ("Exposition", "0.4 0.6 1 1"),       # blue (alias)
    "valley":     ("Valley",     "0.8 0.4 1 1"),       # purple
    "release":    ("Valley",     "0.8 0.4 1 1"),       # purple (alias)
    "ending":     ("Release",    "0.8 0.4 1 1"),       # purple (alias)
    "default":    ("Beat",       "0.4 0.9 1 1"),       # cyan
}


@dataclass
class Beat:
    start_seconds: float
    label: str
    body: str = ""
    duration_seconds: float | None = None  # filled in pass 2
    color_key: str = "default"

    @property
    def display_name(self) -> str:
        return self.label.strip() or "Beat"


# --------------------------------------------------------------------------- #
# Timecode helpers                                                            #
# --------------------------------------------------------------------------- #


TC_PATTERN = re.compile(
    r"(\d{1,2}):(\d{1,2}):(\d{1,2})(?:[.,](\d{1,3}))?"
)
# MM:SS shorthand (e.g. "01:30") that humans write — not followed by another colon.
MM_SS_PATTERN = re.compile(r"(?<!\d:)(\d{1,2}):(\d{2})(?:[.,](\d{1,3}))?(?!:)")


def parse_timecode(raw: str) -> float | None:
    """Convert HH:MM:SS(.mmm | ,mmm) → seconds. Also accepts MM:SS shorthand.
    Returns None if no match."""
    m = TC_PATTERN.search(raw)
    if m:
        hh, mm, ss, ms = m.groups()
        seconds = int(hh) * 3600 + int(mm) * 60 + int(ss)
        if ms:
            seconds += int(ms.ljust(3, "0")) / 1000
        return seconds
    # Fall back to MM:SS shorthand
    m = MM_SS_PATTERN.search(raw)
    if m:
        mm, ss, ms = m.groups()
        seconds = int(mm) * 60 + int(ss)
        if ms:
            seconds += int(ms.ljust(3, "0")) / 1000
        return seconds
    return None


def classify_beat(label: str, body: str) -> str:
    """Match storytelling keywords in the LABEL only — body text shouldn't override
    the explicit beat name (e.g. body mentioning 'opening day' shouldn't make a
    Release beat classify as opening exposition)."""
    text = label.lower()
    for explicit in ("hero", "villain", "crescendo", "climax",
                     "exposition", "opening", "valley", "release", "ending"):
        if explicit in text:
            return explicit
    return "default"


# --------------------------------------------------------------------------- #
# Parsers                                                                     #
# --------------------------------------------------------------------------- #


def parse_srt(content: str) -> list[Beat]:
    """Standard SRT — each cue becomes a beat with the cue text as label."""
    beats: list[Beat] = []
    # blocks separated by blank line
    for block in re.split(r"\r?\n\r?\n+", content.strip()):
        lines = [ln.strip() for ln in block.splitlines() if ln.strip()]
        if len(lines) < 2:
            continue
        # find timecode line (the one with -->)
        tc_line = next((ln for ln in lines if "-->" in ln), None)
        if not tc_line:
            continue
        start_raw, _, end_raw = tc_line.partition("-->")
        start = parse_timecode(start_raw)
        end = parse_timecode(end_raw)
        if start is None:
            continue
        text_lines = [ln for ln in lines if ln != tc_line and not ln.isdigit()]
        if not text_lines:
            continue
        label = text_lines[0]
        body = "\n".join(text_lines[1:]) if len(text_lines) > 1 else ""
        beat = Beat(
            start_seconds=start,
            label=label,
            body=body,
            duration_seconds=(end - start) if end and end > start else None,
        )
        beat.color_key = classify_beat(beat.label, beat.body)
        beats.append(beat)
    return beats


def parse_markdown(content: str) -> list[Beat]:
    """
    Parse markdown outlines in either form:
      A) Heading-with-timecode:
           ## 00:00:10 — Hero Setup
           body lines until next heading
      B) Bracketed-timecode line:
           [00:00:10] HERO: Architect walks
           [00:00:24] VILLAIN: Developer office
    """
    beats: list[Beat] = []

    # Form B — bracketed lines.
    # Accept HH:MM:SS or MM:SS shorthand inside brackets
    bracket_pattern = re.compile(r"^\s*\[((?:\d{1,2}:)?\d{1,2}:\d{2}(?:[.,]\d+)?)\]\s*(.*)$")
    bracket_hits = []
    for line in content.splitlines():
        m = bracket_pattern.match(line)
        if m:
            start = parse_timecode(m.group(1))
            rest = m.group(2).strip()
            if start is not None and rest:
                bracket_hits.append((start, rest))
    if bracket_hits:
        for start, rest in bracket_hits:
            # split label from body at first colon/dash if present
            label, body = _split_label_body(rest)
            beat = Beat(start_seconds=start, label=label, body=body)
            beat.color_key = classify_beat(label, body)
            beats.append(beat)
        return beats

    # Form A — heading-with-timecode.
    heading_pattern = re.compile(
        r"^\s{0,3}(#{1,6})\s+"                                  # markdown heading marker
        r"(?P<tc>(?:\d{1,2}:)?\d{1,2}:\d{2}(?:[.,]\d+)?)"        # HH:MM:SS or MM:SS
        r"\s*[—–\-:]?\s*"                                        # separator
        r"(?P<label>.*?)\s*$"
    )
    current: Beat | None = None
    buffer: list[str] = []
    for line in content.splitlines():
        m = heading_pattern.match(line)
        if m:
            if current is not None:
                current.body = "\n".join(buffer).strip()
                current.color_key = classify_beat(current.label, current.body)
                beats.append(current)
                buffer = []
            start = parse_timecode(m.group("tc"))
            label = m.group("label").strip() or "Beat"
            if start is None:
                continue
            current = Beat(start_seconds=start, label=label)
        else:
            if current is not None:
                buffer.append(line)
    if current is not None:
        current.body = "\n".join(buffer).strip()
        current.color_key = classify_beat(current.label, current.body)
        beats.append(current)
    return beats


def _split_label_body(rest: str) -> tuple[str, str]:
    """Split 'HERO: Architect walks' or 'HERO — Architect walks' into (label, body)."""
    for sep in (": ", " — ", " – ", " - "):
        if sep in rest:
            label, _, body = rest.partition(sep)
            return label.strip(), body.strip()
    return rest.strip(), ""


def parse_outline(path: Path) -> list[Beat]:
    content = path.read_text(encoding="utf-8")
    suffix = path.suffix.lower()
    if suffix == ".srt":
        return parse_srt(content)
    return parse_markdown(content)


# --------------------------------------------------------------------------- #
# FCPXML generation                                                           #
# --------------------------------------------------------------------------- #


# Whitelist of FCPXML-friendly frame rates. Non-whitelisted values are rejected.
# NTSC fractional rates use the 1001 denominator convention. Frame duration
# numerators / denominators per Apple FCPXML conventions:
#   23.976 → 1001/24000s    29.97  → 1001/30000s    59.94 → 1001/60000s
# Integer rates use a 100/<rate*100>s shape so frame counts and integer math line up.
_NTSC_FRAME_DURATIONS = {
    23.976: (1001, 24000),
    29.97:  (1001, 30000),
    59.94:  (1001, 60000),
}
_SUPPORTED_FPS = (23.976, 24.0, 25.0, 29.97, 30.0, 50.0, 59.94, 60.0)


def _validate_fps(fps: float) -> float:
    """Snap user-supplied FPS to nearest supported value, or raise."""
    # Tolerate floats like 23.98 by snapping to 23.976
    for s in _SUPPORTED_FPS:
        if abs(fps - s) < 0.02:
            return s
    raise SystemExit(
        f"bridge: unsupported FPS {fps}. Supported: "
        + ", ".join(str(s) for s in _SUPPORTED_FPS)
    )


def _frame_duration_components(fps: float) -> tuple[int, int]:
    """Return (numerator, denominator) of the FCPXML frame duration for fps."""
    if abs(fps - round(fps)) < 1e-6:
        return (100, int(round(fps)) * 100)
    key = round(fps, 3)
    if key in _NTSC_FRAME_DURATIONS:
        return _NTSC_FRAME_DURATIONS[key]
    # Fallback for unknown fractional rate — best-effort
    return (1001, int(round(fps + 0.024)) * 1000)


def seconds_to_rational(seconds: float, fps: float) -> str:
    """Convert seconds to an FCPXML rational time string like '4800/2400s'."""
    num, den = _frame_duration_components(fps)
    # frames in this duration
    frames = round(seconds * fps)
    return f"{frames * num}/{den}s"


def frame_duration_rational(fps: float) -> str:
    num, den = _frame_duration_components(fps)
    return f"{num}/{den}s"


def format_name(fps: float, width: int = 1920, height: int = 1080) -> str:
    """Best-effort match for FFVideoFormat preset name."""
    fps_label = {
        23.976: "2398",
        24.0: "24",
        25.0: "25",
        29.97: "2997",
        30.0: "30",
        50.0: "50",
        59.94: "5994",
        60.0: "60",
    }.get(round(fps, 3), str(int(round(fps))))
    return f"FFVideoFormat{height}p{fps_label}"


def fill_durations(beats: list[Beat], fps: float, tail_seconds: float = 5.0) -> int:
    """In-place: ensure every beat has a duration, clamp overlaps.
    Returns the number of beats whose explicit duration had to be clamped to avoid overlap."""
    clamped = 0
    for i, beat in enumerate(beats):
        next_start = beats[i + 1].start_seconds if i + 1 < len(beats) else None
        if beat.duration_seconds and beat.duration_seconds > 0:
            # Clamp if explicit duration would overlap the next beat
            if next_start is not None and beat.start_seconds + beat.duration_seconds > next_start + 1e-6:
                beat.duration_seconds = max(0.1, next_start - beat.start_seconds)
                clamped += 1
            continue
        if next_start is not None:
            beat.duration_seconds = max(0.5, next_start - beat.start_seconds)
        else:
            beat.duration_seconds = tail_seconds
    return clamped


def build_fcpxml(
    beats: list[Beat],
    fps: float,
    project_name: str,
    event_name: str,
    width: int = 1920,
    height: int = 1080,
) -> str:
    if not beats:
        raise ValueError("No beats parsed from outline — check your input format.")

    beats = sorted(beats, key=lambda b: b.start_seconds)
    clamped = fill_durations(beats, fps)
    if clamped:
        print(f"bridge: warning — clamped {clamped} overlapping beat duration(s) "
              "(SRT cues that ran past the next cue's start).", file=sys.stderr)
    sequence_duration = beats[-1].start_seconds + (beats[-1].duration_seconds or 5.0)

    fmt_id = "r1"
    fmt_name = format_name(fps, width, height)
    frame_dur = frame_duration_rational(fps)

    seq_duration_str = seconds_to_rational(sequence_duration, fps)

    # Build spine entries — sequential placeholder gap clips with names + own marker.
    # Each <gap> embeds beat description in its name (truncated) so the clip is
    # self-documenting in Resolve's clip name column. Marker provides storytelling tag.
    # Per FCPXML 1.9 spec: spine elements must be contiguous; markers are children of
    # the parent clip/gap with start = offset into that clip.
    spine_parts: list[str] = []
    cursor = 0.0
    for beat in beats:
        # Insert leading filler gap if there's a hole before this beat
        if beat.start_seconds > cursor + 1e-6:
            lead_dur = seconds_to_rational(beat.start_seconds - cursor, fps)
            spine_parts.append(
                f'      <gap offset="{seconds_to_rational(cursor, fps)}" '
                f'start="0s" duration="{lead_dur}"/>'
            )
        offset = seconds_to_rational(beat.start_seconds, fps)
        duration_s = beat.duration_seconds or 5.0
        duration = seconds_to_rational(duration_s, fps)
        body_summary = (beat.body or "").replace("\n", " ").strip()
        if body_summary:
            # truncate body to a one-liner so the gap name carries the gist
            if len(body_summary) > 90:
                body_summary = body_summary[:87].rstrip() + "..."
            display = f"{beat.display_name} — {body_summary}"
        else:
            display = beat.display_name
        clip_name = html.escape(display)
        kind, _color_rgba = BEAT_COLORS.get(beat.color_key, BEAT_COLORS["default"])
        marker_name = html.escape(f"[{kind.upper()}] {beat.display_name}")
        spine_parts.append(
            f'      <gap name="{clip_name}" offset="{offset}" start="0s" '
            f'duration="{duration}">\n'
            f'        <marker start="0s" duration="{frame_dur}" '
            f'value="{marker_name}"/>\n'
            f'      </gap>'
        )
        cursor = beat.start_seconds + duration_s

    spine_inner = "\n".join(spine_parts)

    project_name_esc = html.escape(project_name)
    event_name_esc = html.escape(event_name)
    full_seq_duration = seq_duration_str

    fcpxml = f"""<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE fcpxml>
<fcpxml version="1.9">
  <resources>
    <format id="{fmt_id}" name="{fmt_name}" frameDuration="{frame_dur}" width="{width}" height="{height}" colorSpace="1-1-1 (Rec. 709)"/>
  </resources>
  <library>
    <event name="{event_name_esc}">
      <project name="{project_name_esc}">
        <sequence format="{fmt_id}" duration="{full_seq_duration}" tcStart="0s" tcFormat="NDF" audioLayout="stereo" audioRate="48k">
          <spine>
{spine_inner}
          </spine>
        </sequence>
      </project>
    </event>
  </library>
</fcpxml>
"""
    return fcpxml


# --------------------------------------------------------------------------- #
# CLI                                                                         #
# --------------------------------------------------------------------------- #


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(
        prog="bridge",
        description=("Convert a documentary outline (markdown or SRT) into a "
                     "DaVinci Resolve-importable FCPXML timeline."),
    )
    parser.add_argument("input", type=Path, help="Outline file (.md, .markdown, .srt, .txt)")
    parser.add_argument("--fps", type=float, default=24.0,
                        help="Frame rate. Supported: 23.976, 24, 25, 29.97, 30, 50, 59.94, 60. Default 24.")
    parser.add_argument("--out", type=Path, default=None,
                        help="Output FCPXML path. Default: <input>.fcpxml")
    parser.add_argument("--project", default="Bridge Output",
                        help="Project name shown in DaVinci. Default 'Bridge Output'.")
    parser.add_argument("--event", default="Documentary Outline",
                        help="Event name. Default 'Documentary Outline'.")
    parser.add_argument("--width", type=int, default=1920, help="Frame width. Default 1920.")
    parser.add_argument("--height", type=int, default=1080, help="Frame height. Default 1080.")
    parser.add_argument("--list", action="store_true",
                        help="Just list parsed beats and exit (debug).")
    args = parser.parse_args(argv)
    args.fps = _validate_fps(args.fps)

    if not args.input.exists():
        print(f"bridge: input not found: {args.input}", file=sys.stderr)
        return 2

    raw = args.input.read_text(encoding="utf-8")
    beats = parse_outline(args.input)
    if not beats:
        # Diagnose the most likely cause to save the user from format hunting.
        lines = raw.splitlines()
        sample = "\n".join(lines[:6]) if lines else "(empty file)"
        hints = []
        if re.search(r"\d{1,2}:\d{1,2}(?!:)", raw) and not TC_PATTERN.search(raw):
            hints.append("  → Your timecodes look like MM:SS. Bridge needs HH:MM:SS (use 00:01:30, not 1:30).")
        if args.input.suffix.lower() in {".md", ".markdown"} and not re.search(r"^\s{0,3}#{1,6}\s", raw, re.MULTILINE):
            if not re.search(r"^\s*\[", raw, re.MULTILINE):
                hints.append("  → No markdown headings or bracketed-timecode lines found in your .md file.")
        if args.input.suffix.lower() == ".srt" and "-->" not in raw:
            hints.append("  → SRT file is missing '-->' timecode lines.")
        if not hints:
            hints.append("  → Run with --list to see what (if anything) was parsed.")
        print("bridge: no beats parsed.", file=sys.stderr)
        for h in hints:
            print(h, file=sys.stderr)
        print(f"  Sample of input head:\n    {sample.splitlines()[0] if sample else '(empty)'}", file=sys.stderr)
        print("  Supported formats:", file=sys.stderr)
        print("    A) ## HH:MM:SS — Beat name", file=sys.stderr)
        print("    B) [HH:MM:SS] BEAT — description", file=sys.stderr)
        print("    C) standard SRT", file=sys.stderr)
        return 1

    if args.list:
        for b in beats:
            mm = int(b.start_seconds // 60)
            ss = b.start_seconds - mm * 60
            print(f"  {mm:02d}:{ss:05.2f}  [{b.color_key}]  {b.label}")
        return 0

    # Warn about likely-misconfigured FPS for very short outlines.
    if beats[-1].start_seconds == 0 and len(beats) > 1:
        print("bridge: warning — all beats parsed with start=0. Check your timecode format.", file=sys.stderr)

    fcpxml = build_fcpxml(
        beats, fps=args.fps,
        project_name=args.project, event_name=args.event,
        width=args.width, height=args.height,
    )

    # Post-build structural validation — catches XML errors before they hit Resolve.
    try:
        import xml.etree.ElementTree as ET
        ET.fromstring(fcpxml)
    except ET.ParseError as e:
        print(f"bridge: ERROR — generated XML failed parse check: {e}", file=sys.stderr)
        print("  This is a bug in Bridge, not in your outline. Please report.", file=sys.stderr)
        return 3

    out_path = args.out or args.input.with_suffix(".fcpxml")
    out_path.write_text(fcpxml, encoding="utf-8")
    print(f"bridge: wrote {out_path}  ({len(beats)} beats, {args.fps} fps)")
    print(f"  → In DaVinci: File → Import → Timeline → pick this file.")
    return 0


if __name__ == "__main__":
    sys.exit(main())