#!/usr/bin/env python3
"""
ingest.py — card backup + checksum (the wedding-day safety net).

Copies every file from a source (camera card / SD / SSD) to a destination,
generates an MD5 manifest, and verifies the copy reads back byte-for-byte.

Then optionally mirrors to a second destination (the "never lose the card"
rule). Exits 0 only if every file is verified on every destination.

**Do not format the card until this exits 0.**

Usage:
  python3 ingest.py /Volumes/A_CAM_SD ./02_CARD_BACKUPS/A_CAM
  python3 ingest.py /Volumes/A_CAM_SD ./02_CARD_BACKUPS/A_CAM --mirror /Volumes/BACKUP/A_CAM
  python3 ingest.py /Volumes/A_CAM_SD ./02_CARD_BACKUPS/A_CAM --verify  # only verify existing

Output:
  <destination>/ingest-manifest.md   — markdown manifest
  <destination>/ingest-manifest.csv  — path,bytes,md5 rows

Python 3.9+. Standard library only. Uses MD5 (not cryptographic — fast
fingerprint suitable for ingest verification).
"""
from __future__ import annotations

import argparse
import csv
import hashlib
import shutil
import sys
import time
from dataclasses import dataclass
from pathlib import Path


CHUNK = 4 * 1024 * 1024  # 4 MB streaming chunks


@dataclass
class FileResult:
    rel_path: str
    bytes: int
    md5: str
    elapsed: float
    error: str = ""


def md5_of(path: Path) -> tuple[str, int]:
    h = hashlib.md5()
    total = 0
    with path.open("rb") as f:
        while True:
            chunk = f.read(CHUNK)
            if not chunk:
                break
            h.update(chunk)
            total += len(chunk)
    return h.hexdigest(), total


def human_bytes(n: int) -> str:
    for unit in ("B", "KB", "MB", "GB", "TB"):
        if n < 1024:
            return f"{n:.1f} {unit}"
        n /= 1024
    return f"{n:.1f} PB"


def collect(source: Path) -> list[Path]:
    return sorted(p for p in source.rglob("*")
                  if p.is_file() and not p.name.startswith("._"))


def copy_one(src: Path, dest: Path) -> None:
    dest.parent.mkdir(parents=True, exist_ok=True)
    shutil.copy2(src, dest)


def ingest_one(src_file: Path, source_root: Path, dest_root: Path) -> FileResult:
    rel = src_file.relative_to(source_root)
    out = dest_root / rel
    t0 = time.monotonic()
    try:
        copy_one(src_file, out)
        # Verify by hashing the destination, not the source.
        out_md5, out_bytes = md5_of(out)
        src_md5, src_bytes = md5_of(src_file)
        if out_md5 != src_md5:
            return FileResult(str(rel), out_bytes, out_md5, time.monotonic() - t0,
                              f"CHECKSUM MISMATCH src={src_md5} dest={out_md5}")
        if out_bytes != src_bytes:
            return FileResult(str(rel), out_bytes, out_md5, time.monotonic() - t0,
                              f"BYTE COUNT MISMATCH src={src_bytes} dest={out_bytes}")
        return FileResult(str(rel), out_bytes, out_md5, time.monotonic() - t0)
    except Exception as e:
        return FileResult(str(rel), 0, "", time.monotonic() - t0, str(e))


def verify_only(dest_root: Path) -> list[FileResult]:
    """Re-hash everything against the existing manifest."""
    manifest_csv = dest_root / "ingest-manifest.csv"
    if not manifest_csv.is_file():
        print(f"ingest: no manifest at {manifest_csv}", file=sys.stderr)
        sys.exit(1)
    results: list[FileResult] = []
    with manifest_csv.open(encoding="utf-8") as f:
        for row in csv.DictReader(f):
            rel = row["path"]
            expected_md5 = row["md5"]
            f_path = dest_root / rel
            if not f_path.is_file():
                results.append(FileResult(rel, 0, "", 0.0, "MISSING"))
                continue
            actual, n = md5_of(f_path)
            error = "" if actual == expected_md5 else f"CHECKSUM DRIFT was={expected_md5} now={actual}"
            results.append(FileResult(rel, n, actual, 0.0, error))
    return results


def write_manifest(dest_root: Path, results: list[FileResult], source: Path, mirror: Path | None) -> None:
    csv_path = dest_root / "ingest-manifest.csv"
    md_path = dest_root / "ingest-manifest.md"
    with csv_path.open("w", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        w.writerow(["path", "bytes", "md5", "error"])
        for r in results:
            w.writerow([r.rel_path, r.bytes, r.md5, r.error])
    total_bytes = sum(r.bytes for r in results if not r.error)
    errors = [r for r in results if r.error]
    md = [
        f"# Ingest manifest",
        f"",
        f"- Source: `{source}`",
        f"- Destination: `{dest_root}`",
    ]
    if mirror:
        md.append(f"- Mirror: `{mirror}`")
    md += [
        f"- Files: {len(results)}",
        f"- Total: {human_bytes(total_bytes)}",
        f"- Errors: {len(errors)}",
        f"- Verified at: {time.strftime('%Y-%m-%d %H:%M:%S')}",
        f"",
    ]
    if errors:
        md.append("## ⚠ ERRORS — do not format the card")
        md.append("")
        for r in errors:
            md.append(f"- `{r.rel_path}` — {r.error}")
        md.append("")
    md.append("## Files (verified)")
    md.append("")
    md.append("| File | Size | MD5 |")
    md.append("|---|---|---|")
    for r in results:
        if r.error:
            continue
        md.append(f"| `{r.rel_path}` | {human_bytes(r.bytes)} | `{r.md5[:12]}…` |")
    md_path.write_text("\n".join(md) + "\n", encoding="utf-8")


def main(argv: list[str] | None = None) -> int:
    p = argparse.ArgumentParser(prog="ingest", description=__doc__.split("\n\n")[0])
    p.add_argument("source", type=Path, help="Source folder (camera card / SD)")
    p.add_argument("dest", type=Path, help="Primary destination folder")
    p.add_argument("--mirror", type=Path, default=None,
                   help="Second destination (recommended for cards before format)")
    p.add_argument("--verify", action="store_true",
                   help="Don't copy — re-hash dest against existing manifest")
    args = p.parse_args(argv)

    if args.verify:
        results = verify_only(args.dest)
        errors = [r for r in results if r.error]
        ok = len(results) - len(errors)
        print(f"ingest verify: {ok}/{len(results)} files match manifest")
        if errors:
            print("ERRORS:", file=sys.stderr)
            for r in errors:
                print(f"  {r.rel_path}: {r.error}", file=sys.stderr)
            return 1
        return 0

    if not args.source.is_dir():
        print(f"ingest: source not a directory: {args.source}", file=sys.stderr)
        return 2

    files = collect(args.source)
    if not files:
        print(f"ingest: no files found in {args.source}", file=sys.stderr)
        return 1

    total_bytes = sum(f.stat().st_size for f in files)
    print(f"ingest: {len(files)} file(s), {human_bytes(total_bytes)} from {args.source}")
    print(f"  → primary: {args.dest}")
    if args.mirror:
        print(f"  → mirror:  {args.mirror}")

    args.dest.mkdir(parents=True, exist_ok=True)
    if args.mirror:
        args.mirror.mkdir(parents=True, exist_ok=True)

    primary_results: list[FileResult] = []
    mirror_results: list[FileResult] = []
    for i, src_file in enumerate(files, 1):
        r = ingest_one(src_file, args.source, args.dest)
        primary_results.append(r)
        tag = "FAIL" if r.error else "ok"
        rate = (r.bytes / r.elapsed / 1024 / 1024) if r.elapsed > 0 else 0
        print(f"  [{i}/{len(files)}] {tag:>4} {r.rel_path}  ({human_bytes(r.bytes)} @ {rate:.0f} MB/s)")
        if args.mirror and not r.error:
            mr = ingest_one(src_file, args.source, args.mirror)
            mirror_results.append(mr)
            mtag = "FAIL" if mr.error else "ok"
            print(f"            mirror {mtag} {mr.rel_path}")

    write_manifest(args.dest, primary_results, args.source, args.mirror)
    if args.mirror:
        write_manifest(args.mirror, mirror_results, args.source, args.dest)

    errors = [r for r in primary_results if r.error] + [r for r in mirror_results if r.error]
    if errors:
        print(f"\ningest: {len(errors)} error(s). DO NOT FORMAT THE CARD.", file=sys.stderr)
        return 1

    print(f"\ningest: all {len(files)} file(s) verified on all destinations.")
    print(f"  Manifests: {args.dest}/ingest-manifest.md")
    if args.mirror:
        print(f"             {args.mirror}/ingest-manifest.md")
    print(f"  ✓ Safe to reformat the card.")
    return 0


if __name__ == "__main__":
    sys.exit(main())
