#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
SIREP · Directory Mapper
- Escanea un directorio y genera:
  - dir_map.json (árbol + metadata)
  - dir_map.md   (vista humana)
Opcional:
  - --hash   calcula sha256 para archivos pequeños/medianos (cap configurable)
  - --exts   filtra por extensiones (ej: .php,.js,.html)
  - --exclude patrones (glob) separados por coma
"""
import os, json, argparse, fnmatch, hashlib, time
from pathlib import Path

DEFAULT_EXCLUDE = [
    ".git*", "node_modules*", "__pycache__*", "*.pyc", "*.pyo",
    ".venv*", "venv*", "dist*", "build*", "*.log", "*.tmp",
    ".DS_Store", "Thumbs.db"
]

def sha256_file(path: Path, max_bytes: int) -> str | None:
    try:
        size = path.stat().st_size
        if size > max_bytes:
            return None
        h = hashlib.sha256()
        with path.open("rb") as f:
            while True:
                b = f.read(1024 * 1024)
                if not b: break
                h.update(b)
        return h.hexdigest()
    except Exception:
        return None

def should_exclude(rel: str, patterns: list[str]) -> bool:
    rel_norm = rel.replace("\\", "/")
    for p in patterns:
        p = p.strip()
        if not p: 
            continue
        if fnmatch.fnmatch(rel_norm, p) or fnmatch.fnmatch(Path(rel_norm).name, p):
            return True
    return False

def scan(root: Path, exclude: list[str], exts: set[str] | None, do_hash: bool, hash_cap: int):
    root = root.resolve()
    items = []

    for dirpath, dirnames, filenames in os.walk(root):
        # prune excluded dirs early
        rel_dir = os.path.relpath(dirpath, root)
        if rel_dir == ".": rel_dir = ""
        pruned=[]
        for d in list(dirnames):
            rel = os.path.join(rel_dir, d) if rel_dir else d
            if should_exclude(rel, exclude):
                pruned.append(d)
        for d in pruned:
            dirnames.remove(d)

        for name in filenames:
            rel = os.path.join(rel_dir, name) if rel_dir else name
            if should_exclude(rel, exclude):
                continue
            p = Path(dirpath) / name
            ext = p.suffix.lower()
            if exts and ext not in exts:
                continue
            st = p.stat()
            items.append({
                "path": rel.replace("\\", "/"),
                "name": name,
                "ext": ext,
                "size": st.st_size,
                "mtime": int(st.st_mtime),
                "sha256": sha256_file(p, hash_cap) if do_hash else None
            })

    # build tree
    tree = {"name": root.name, "path": ".", "type": "dir", "children": {}}
    for it in items:
        parts = it["path"].split("/")
        cur = tree["children"]
        for i,part in enumerate(parts):
            is_last = (i == len(parts)-1)
            if is_last:
                cur[part] = {"type":"file", **it}
            else:
                if part not in cur:
                    cur[part] = {"type":"dir", "name": part, "children": {}}
                cur = cur[part]["children"]

    return items, tree

def tree_to_md(node, indent=0):
    sp = "  " * indent
    out=[]
    if node.get("type")=="dir":
        name = node.get("name","")
        out.append(f"{sp}- 📁 {name}")
        children = node.get("children",{})
        for k in sorted(children.keys()):
            out.extend(tree_to_md(children[k], indent+1))
    else:
        out.append(f"{sp}- 📄 {node.get('name')} ({node.get('size',0)} bytes)")
    return out

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--root", required=True, help="Directorio raíz a mapear")
    ap.add_argument("--out", default="./out", help="Directorio de salida")
    ap.add_argument("--exclude", default=",".join(DEFAULT_EXCLUDE), help="Patrones glob separados por coma")
    ap.add_argument("--exts", default="", help="Filtrar extensiones: .php,.js,.html (vacío = todo)")
    ap.add_argument("--hash", action="store_true", help="Calcular sha256 (cap por archivo)")
    ap.add_argument("--hash-cap", type=int, default=20*1024*1024, help="Máximo bytes por archivo para hash (default 20MB)")
    args = ap.parse_args()

    root = Path(args.root)
    out_dir = Path(args.out)
    out_dir.mkdir(parents=True, exist_ok=True)

    exclude = [x.strip() for x in args.exclude.split(",") if x.strip()]
    exts = None
    if args.exts.strip():
        exts = set([e.strip().lower() for e in args.exts.split(",") if e.strip()])
        exts = set([e if e.startswith(".") else "."+e for e in exts])

    items, tree = scan(root, exclude, exts, args.hash, args.hash_cap)

    payload = {
        "generated_at": int(time.time()),
        "root": str(root.resolve()),
        "exclude": exclude,
        "exts": sorted(list(exts)) if exts else None,
        "count": len(items),
        "items": items,
        "tree": tree
    }

    (out_dir/"dir_map.json").write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
    (out_dir/"dir_map.md").write_text("\n".join(tree_to_md(tree)), encoding="utf-8")

    print(f"[OK] items={len(items)} -> {out_dir/'dir_map.json'}")

if __name__ == "__main__":
    main()
