"""generate_subproceso_pdfs.py — Genera 1 PDF por sub-proceso del flow escalado_formatos
================================================================================

Output: 6 PDFs en `_pdfs/`:
  PDF_DESCARGAR.pdf
  PDF_ANALIZAR.pdf
  PDF_INTERPRETAR.pdf
  PDF_BUSCAR.pdf
  PDF_COMPARADOR.pdf
  PDF_TRELLO_EDITOR.pdf

Fuente (no inventa nada):
  1. `.claude/agents/escalado_formatos/<agent>.md` — spec canónica de la skill
  2. `factory_metadata.json` — operacional + verifier_l1 + supervisor_l2_skill
  3. `_documentacion/PDF_MAESTRO_ESCALADO_VIDEO_v17.pdf` (master flow logic)

Cada PDF contiene literalmente lo que la skill hace, para que:
  - El auditor humano (Fer) lo lea y verifique cumplimiento
  - El supervisor de cada estación lo use como audit base
  - El Master de Control de Calidad cruce los 6 para detectar patrones sistémicos

msg 4723 Fer 2026-05-09 verbatim: "cada sub-proceso si puedes crear un PDF donde
defino literalmente lo que tiene que hacer, que es lo que hace la skill, pero
para que el control de calidad lo tenga más claro".

Ejecutar: python generate_subproceso_pdfs.py
"""
from __future__ import annotations
import json
import re
from pathlib import Path
from datetime import datetime
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import cm
from reportlab.lib import colors
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, PageBreak, Preformatted, Table, TableStyle
)

ROOT = Path(__file__).parent
CLAUDE_AGENTS = Path(r'C:\Users\ferna\.claude\agents\escalado_formatos')
META_PATH = ROOT / 'factory_metadata.json'
OUT_DIR = ROOT / '_pdfs'
OUT_DIR.mkdir(exist_ok=True)

# Mapeo station_id → agent_md_filename (orden del flow)
STATION_AGENT_MAP = [
    ('descargar',       'descargador-video-escalado.md',      'DESCARGAR'),
    ('analizar',        'analizador-combo-completo.md',       'ANALIZAR'),
    ('interpretar',     'interpretador-estrategia-busqueda.md','INTERPRETAR'),
    ('buscar',          'buscador-paralelo-fb-gethookd.md',   'BUSCAR'),
    ('comparador',      'comparador-finalistas.md',           'COMPARADOR'),
    ('trello_editor',   'creador-trello-editor-videos.md',    'TRELLO EDITOR'),
]


def parse_md(md_text: str) -> tuple[dict, list[tuple[str, str, str]]]:
    """Parse simple markdown: returns (frontmatter_dict, list of (kind, level, text)).

    kind ∈ {h, p, code, list_item}
    level: int for headings
    """
    frontmatter = {}
    body_lines = md_text.splitlines()
    if body_lines and body_lines[0].strip() == '---':
        end = -1
        for i in range(1, len(body_lines)):
            if body_lines[i].strip() == '---':
                end = i
                break
        if end > 0:
            for line in body_lines[1:end]:
                if ':' in line:
                    k, v = line.split(':', 1)
                    frontmatter[k.strip()] = v.strip().strip('"\'')
            body_lines = body_lines[end+1:]

    items = []
    in_code = False
    code_buf = []
    for line in body_lines:
        if line.startswith('```'):
            if in_code:
                items.append(('code', 0, '\n'.join(code_buf)))
                code_buf = []
                in_code = False
            else:
                in_code = True
            continue
        if in_code:
            code_buf.append(line)
            continue
        m = re.match(r'^(#{1,6})\s+(.*)', line)
        if m:
            items.append(('h', len(m.group(1)), m.group(2).strip()))
            continue
        if re.match(r'^\s*[-*]\s+', line):
            items.append(('list_item', 0, re.sub(r'^\s*[-*]\s+', '', line).strip()))
            continue
        if line.strip() == '':
            items.append(('blank', 0, ''))
            continue
        items.append(('p', 0, line.rstrip()))
    if in_code and code_buf:
        items.append(('code', 0, '\n'.join(code_buf)))
    return frontmatter, items


def build_styles():
    base = getSampleStyleSheet()
    styles = {
        'title': ParagraphStyle('title', parent=base['Title'], fontSize=22, spaceAfter=12, textColor=colors.HexColor('#1a3060')),
        'subtitle': ParagraphStyle('subtitle', parent=base['Heading2'], fontSize=14, spaceAfter=8, textColor=colors.HexColor('#1a3060')),
        'h1': ParagraphStyle('h1', parent=base['Heading1'], fontSize=16, spaceBefore=10, spaceAfter=6, textColor=colors.HexColor('#1a3060')),
        'h2': ParagraphStyle('h2', parent=base['Heading2'], fontSize=13, spaceBefore=8, spaceAfter=4, textColor=colors.HexColor('#2a5090')),
        'h3': ParagraphStyle('h3', parent=base['Heading3'], fontSize=11.5, spaceBefore=6, spaceAfter=3, textColor=colors.HexColor('#2a5090')),
        'body': ParagraphStyle('body', parent=base['BodyText'], fontSize=10, leading=13, spaceAfter=4),
        'list': ParagraphStyle('list', parent=base['BodyText'], fontSize=10, leading=13, leftIndent=18, spaceAfter=2, bulletIndent=6),
        'code': ParagraphStyle('code', parent=base['Code'], fontSize=8.5, leading=11, leftIndent=10, backColor=colors.HexColor('#f1f3f6'), textColor=colors.HexColor('#222')),
        'meta': ParagraphStyle('meta', parent=base['BodyText'], fontSize=9, leading=11, textColor=colors.HexColor('#666')),
        'audit_title': ParagraphStyle('audit_title', parent=base['Heading2'], fontSize=14, textColor=colors.HexColor('#a52a2a'), spaceBefore=12, spaceAfter=6),
        'audit_item': ParagraphStyle('audit_item', parent=base['BodyText'], fontSize=10, leading=13, leftIndent=18, spaceAfter=2),
    }
    return styles


def md_inline_to_html(s: str) -> str:
    """Mini markdown→HTML for ReportLab Paragraph."""
    s = s.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
    s = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', s)
    s = re.sub(r'`(.+?)`', r'<font face="Courier" size="9">\1</font>', s)
    s = re.sub(r'\*(.+?)\*', r'<i>\1</i>', s)
    return s


def render_md_to_flowables(items: list[tuple[str, int, str]], styles: dict) -> list:
    flow = []
    for kind, lvl, text in items:
        if kind == 'h':
            style_key = {1: 'h1', 2: 'h2', 3: 'h3'}.get(lvl, 'h3')
            flow.append(Paragraph(md_inline_to_html(text), styles[style_key]))
        elif kind == 'p':
            flow.append(Paragraph(md_inline_to_html(text), styles['body']))
        elif kind == 'list_item':
            flow.append(Paragraph('• ' + md_inline_to_html(text), styles['list']))
        elif kind == 'code':
            flow.append(Preformatted(text, styles['code']))
            flow.append(Spacer(1, 4))
        elif kind == 'blank':
            flow.append(Spacer(1, 3))
    return flow


def build_audit_checklist(station_id: str, station_meta: dict, agent_meta: dict) -> list:
    """Genera la sección final 'CHECKLIST DE AUDITORIA' que usa el supervisor."""
    items = []
    sup_skill = station_meta.get('supervisor_l2_skill') or agent_meta.get('supervisor_l2_skill')
    items.append(f"El output existe en la sección caja.{station_id} y NO está vacío.")
    if 'verifier_l1' in agent_meta:
        items.append(f"Verifier L1 determinista pasa: <font face='Courier' size='9'>{agent_meta['verifier_l1']}</font>.")
    if sup_skill:
        items.append(f"Supervisor L2 olfato (Haiku): {sup_skill}.")
    if 'cache_lookup_policy' in agent_meta:
        items.append("Cache lookup ejecutado ANTES del trabajo pesado (REGLA cache, ahorro tokens).")
    items.append("REGLAs duras del agente .md NO violadas (ver sección 'Restricciones' del agente).")
    items.append("Si paralelo: el shared_state del cluster fue consultado antes de actuar y actualizado después.")
    items.append("Default Opus 4.7 obligatorio (msg 4658) — sin fallback Haiku para razonamiento.")
    items.append("Si BREAK → escalación L3 Opus + escribir caso al CBR del supervisor.")
    return items


def generate_one(station_id: str, agent_md_filename: str, label: str, master_pdf_ref: str, full_metadata: dict):
    md_path = CLAUDE_AGENTS / agent_md_filename
    md_text = md_path.read_text(encoding='utf-8')
    fm, items = parse_md(md_text)

    # Find station in metadata
    station = next((s for s in full_metadata['stations'] if s['id'] == station_id), {})
    agent_meta = (station.get('agents') or [{}])[0]

    out_pdf = OUT_DIR / f'PDF_{label.replace(" ", "_")}.pdf'
    doc = SimpleDocTemplate(
        str(out_pdf),
        pagesize=A4,
        leftMargin=2*cm, rightMargin=2*cm, topMargin=2*cm, bottomMargin=2*cm,
        title=f'PDF Sub-Proceso · {label} · Factory v4',
        author='Factory v4 · escalado_formatos',
    )
    styles = build_styles()
    flow = []

    # Header block
    flow.append(Paragraph(f'PDF SUB-PROCESO · {label}', styles['title']))
    flow.append(Paragraph('Flow: <b>Escalado Formatos · 5 similares + 1 arriesgado</b> (factory_v4.html)', styles['subtitle']))
    flow.append(Spacer(1, 6))

    meta_rows = [
        ['Station ID', station_id],
        ['Skill canónica', agent_meta.get('canonical_agent_name', '—')],
        ['Agent .md', agent_meta.get('canonical_agent_md', f'.claude/agents/escalado_formatos/{agent_md_filename}')],
        ['Verifier L1', agent_meta.get('verifier_l1', '—')],
        ['Supervisor L2 skill', agent_meta.get('supervisor_l2_skill', '—')],
        ['Modelo default', 'claude-opus-4-7 (msg 4658)'],
        ['Master PDF flow', master_pdf_ref],
        ['Generado', datetime.now().strftime('%Y-%m-%d %H:%M')],
    ]
    t = Table(meta_rows, colWidths=[5*cm, 11*cm])
    t.setStyle(TableStyle([
        ('FONTSIZE', (0,0), (-1,-1), 9),
        ('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#cccccc')),
        ('BACKGROUND', (0,0), (0,-1), colors.HexColor('#f1f3f6')),
        ('TEXTCOLOR', (0,0), (0,-1), colors.HexColor('#1a3060')),
        ('VALIGN', (0,0), (-1,-1), 'TOP'),
    ]))
    flow.append(t)
    flow.append(Spacer(1, 12))

    # Body de la spec canónica del agente
    flow.append(Paragraph('Especificación canónica de la skill (verbatim del agente .md)', styles['h1']))
    flow.append(Paragraph(
        'Esto es lo que el agente DEBE hacer literalmente. El supervisor de control de calidad valida cada output contra esta spec.',
        styles['meta']))
    flow.append(Spacer(1, 6))
    flow.extend(render_md_to_flowables(items, styles))

    # Bloque de auditoría
    flow.append(PageBreak())
    flow.append(Paragraph('CHECKLIST DE AUDITORÍA · Supervisor de Control de Calidad', styles['audit_title']))
    flow.append(Paragraph(
        'El supervisor de este sub-proceso recorre estos checks por cada ejecución. Si CUALQUIERA falla → BREAK con razón concreta y el agente reintenta con el feedback inline (REGLA #66, max_retries=5).',
        styles['meta']))
    flow.append(Spacer(1, 6))
    for i, item in enumerate(build_audit_checklist(station_id, agent_meta, agent_meta), 1):
        flow.append(Paragraph(f'<b>{i}.</b> {item}', styles['audit_item']))

    # Bloque escalación
    flow.append(Spacer(1, 12))
    flow.append(Paragraph('Cascada de escalación (Andon Tier · REGLA #146)', styles['h2']))
    chain = [
        'L1 verifier determinista (Python · 0 tokens) — runs SIEMPRE.',
        'L2 supervisor Haiku olfato — runs SIEMPRE en paralelo, captura lo nuevo.',
        'L3 supervisor sub-proceso Opus — runs si L1 FAIL o L2 FLAG. Resuelve y añade al CBR del sub-proceso.',
        'L4 Master Mejora Continua Opus — runs si ≥3 L3 escaladas en 24h o cron diario. Cross-pattern detection.',
        'L5 Fer humano — runs solo si L4 propone REGLA nueva o caso sin precedente.',
    ]
    for c in chain:
        flow.append(Paragraph('• ' + c, styles['list']))

    # Citas verbatim del Master PDF
    flow.append(Spacer(1, 12))
    flow.append(Paragraph('Referencia al Master PDF', styles['h2']))
    flow.append(Paragraph(
        f'Este sub-proceso opera dentro del flow definido en <b>{master_pdf_ref}</b>. Cualquier conflicto entre este PDF y el master → manda el master.',
        styles['body']))
    flow.append(Paragraph(
        'Si Fer modifica el master (ej: añade flag "saturated", cambia "5+1" a "4+2", etc.), el cambio se propaga a este PDF en la próxima generación. Este PDF NO es source-of-truth — es destilado del agent.md + factory_metadata.json + master.',
        styles['body']))

    doc.build(flow)
    return out_pdf


def main():
    meta = json.loads(META_PATH.read_text(encoding='utf-8'))
    # msg 4743 Fer 2026-05-09: factory_v4 ejecuta "escalado de formato" = Flow D v16 ("concepto ganador 5 similares + 1 fuera de caja").
    # v17 reescribe SOLO Flow D-body — NO aplica a este flow.
    master_pdf_ref = '_documentacion/PDF_MAESTRO_ESCALADO_VIDEO_v16.pdf · Flow D "Concepto ganador - 5 similares + 1 fuera de caja"'
    print(f'Generating 6 sub-proceso PDFs to {OUT_DIR}')
    print(f'Master flow PDF: {master_pdf_ref}')
    print('-' * 60)
    generated = []
    for station_id, agent_md, label in STATION_AGENT_MAP:
        try:
            out = generate_one(station_id, agent_md, label, master_pdf_ref, meta)
            sz = out.stat().st_size
            print(f'  ✓ {out.name} · {sz//1024}KB')
            generated.append((label, out, sz))
        except Exception as e:
            print(f'  ✗ {label}: {e}')
            raise
    print('-' * 60)
    print(f'Total: {len(generated)} PDFs · {sum(s for _,_,s in generated)//1024}KB')
    return generated


if __name__ == '__main__':
    import sys
    try:
        sys.stdout.reconfigure(encoding='utf-8')
    except Exception:
        pass
    main()