feat: transcribe_yt_video.py
This commit is contained in:
parent
9983b1dd0a
commit
467e213b2e
24
youtube/transcribe_yt_video.py
Normal file
24
youtube/transcribe_yt_video.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from youtube_transcript_api import YouTubeTranscriptApi
|
||||||
|
import os
|
||||||
|
|
||||||
|
video_id = "VIDEO_ID_HERE" # e.g., 'dQw4w9WgXcQ'
|
||||||
|
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
||||||
|
|
||||||
|
# Create output directory if it doesn't exist
|
||||||
|
output_dir = "captions"
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
|
# Write each caption segment to separate numbered files
|
||||||
|
for i, entry in enumerate(transcript, 1):
|
||||||
|
filename = f"cc{i}.txt"
|
||||||
|
filepath = os.path.join(output_dir, filename)
|
||||||
|
|
||||||
|
with open(filepath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(entry['text'])
|
||||||
|
|
||||||
|
print(f"Written: {filename} - {entry['text'][:50]}...")
|
||||||
|
|
||||||
|
print(f"\nTotal segments: {len(transcript)}")
|
||||||
|
print(f"Files saved in: {output_dir}/")
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user