import sys, json
from faster_whisper import WhisperModel

video = r"C:\Users\ferna\proyecto cero\_factory_3d_demo\_runs\flow_2026-05-09T22-22-17-495Z_Comfortsleep\video.mp4"
model = WhisperModel("medium", device="cpu", compute_type="int8")
segments, info = model.transcribe(video, language="es", beam_size=5, vad_filter=True)
segs = []
full = []
for s in segments:
    segs.append({"start": round(s.start,2), "end": round(s.end,2), "text": s.text.strip()})
    full.append(s.text.strip())
out = {
    "language": info.language,
    "duration": info.duration,
    "text": " ".join(full).strip(),
    "word_count": len(" ".join(full).split()),
    "segments": segs,
}
print(json.dumps(out, ensure_ascii=False))
