diff --git a/youtube/transcribe_yt_video.py b/youtube/transcribe_yt_video.py new file mode 100644 index 0000000..f01e402 --- /dev/null +++ b/youtube/transcribe_yt_video.py @@ -0,0 +1,24 @@ +from youtube_transcript_api import YouTubeTranscriptApi +import os + +video_id = "VIDEO_ID_HERE" # e.g., 'dQw4w9WgXcQ' +transcript = YouTubeTranscriptApi.get_transcript(video_id) + +# Create output directory if it doesn't exist +output_dir = "captions" +if not os.path.exists(output_dir): + os.makedirs(output_dir) + +# Write each caption segment to separate numbered files +for i, entry in enumerate(transcript, 1): + filename = f"cc{i}.txt" + filepath = os.path.join(output_dir, filename) + + with open(filepath, "w", encoding="utf-8") as f: + f.write(entry['text']) + + print(f"Written: {filename} - {entry['text'][:50]}...") + +print(f"\nTotal segments: {len(transcript)}") +print(f"Files saved in: {output_dir}/") +