#!/usr/bin/env python3
"""
DirMapper v0 — inventario de discos / carpetas para Monark
- Escanea rutas
- Guarda metadatos (ruta, tamaño, mtime)
- (opcional) hash rápido por bloques para detectar duplicados
Salida: CSV

Uso:
  python dir_mapper.py /mnt /media /data --out inventory.csv
  python dir_mapper.py / --max-files 200000 --out inventory.csv
"""
import argparse, os, csv, hashlib, time
from pathlib import Path

def fast_hash(path: Path, block_size=1024*1024, blocks=2):
    """Hash rápido leyendo N bloques (inicio y final)."""
    try:
        st = path.stat()
        if st.st_size == 0:
            return "0"*64
        h = hashlib.sha256()
        with path.open("rb") as f:
            h.update(f.read(block_size))
            if st.st_size > block_size * blocks:
                f.seek(max(0, st.st_size - block_size))
                h.update(f.read(block_size))
        return h.hexdigest()
    except Exception:
        return ""

def walk(paths, follow_symlinks=False, max_files=None):
    count=0
    for root in paths:
        rootp=Path(root)
        if not rootp.exists():
            continue
        for dirpath, dirnames, filenames in os.walk(root, followlinks=follow_symlinks):
            for fn in filenames:
                p=Path(dirpath)/fn
                try:
                    st=p.stat()
                    yield {
                        "path": str(p),
                        "size": int(st.st_size),
                        "mtime": int(st.st_mtime),
                    }
                    count+=1
                    if max_files and count>=max_files:
                        return
                except Exception:
                    continue

def main():
    ap=argparse.ArgumentParser()
    ap.add_argument("paths", nargs="+")
    ap.add_argument("--out", default="inventory.csv")
    ap.add_argument("--hash", action="store_true", help="hash rápido (más lento)")
    ap.add_argument("--max-files", type=int, default=None)
    ap.add_argument("--follow-symlinks", action="store_true")
    args=ap.parse_args()

    t0=time.time()
    with open(args.out,"w",newline="",encoding="utf-8") as f:
        w=csv.DictWriter(f, fieldnames=["path","size","mtime","fast_hash"])
        w.writeheader()
        for row in walk(args.paths, follow_symlinks=args.follow_symlinks, max_files=args.max_files):
            row["fast_hash"]=fast_hash(Path(row["path"])) if args.hash else ""
            w.writerow(row)
    print(f"OK: {args.out}  (secs={time.time()-t0:.1f})")

if __name__=="__main__":
    main()
