ejercicios/utilities/subtitle_generation_vtt.py

48 lines
1.3 KiB
Python

import os.path
import torch
import whisper
def cuda_works():
print(torch.cuda.is_available())
if torch.cuda.is_available():
print(torch.cuda.get_device_name(0))
def format_time_vtt(seconds):
ms = int((seconds - int(seconds)) * 1000)
s = int(seconds) % 60
m = (int(seconds) // 60) % 60
h = int(seconds) // 3600
return f"{h:02}:{m:02}:{s:02}.{ms:03}" # VTT usa punto para los milisegundos
def generate_subtitles_vtt(input_path, output_path):
model_name = "medium"
model = whisper.load_model(model_name)
result = model.transcribe(input_path, language="es")
master_output = os.path.join(output_path, "subtitles.vtt")
with open(master_output, "w", encoding="utf-8") as vtt_file:
vtt_file.write("WEBVTT\n\n") # cabecera obligatoria VTT
for segment in result["segments"]:
start = segment["start"]
end = segment["end"]
text = segment["text"]
vtt_file.write(f"{format_time_vtt(start)} --> {format_time_vtt(end)}\n")
vtt_file.write(f"{text}\n\n")
print(f"Subtítulo VTT generado en: {master_output}")
if __name__ == "__main__":
input_video = r""
output_vtt = r""
# cuda_works()
generate_subtitles_vtt(input_video, output_vtt)