from youtube_transcript_api import YouTubeTranscriptApi import os video_id = "c7bbO_KSLPI" # Video ID from the provided URL # Create API instance and get transcript ytt_api = YouTubeTranscriptApi() transcript_list = ytt_api.list(video_id) # Try to get transcript in Korean (available for this video) try: transcript = transcript_list.find_transcript(['ko']).fetch() print("✅ Found Korean transcript") except: # Get any available transcript transcript = transcript_list.find_generated_transcript(['ko']).fetch() print("✅ Found Korean auto-generated transcript") # Create output directory and chunks subdirectory if they don't exist output_dir = "captions" chunks_dir = os.path.join(output_dir, "chunks") if not os.path.exists(output_dir): os.makedirs(output_dir) if not os.path.exists(chunks_dir): os.makedirs(chunks_dir) # Clear existing files in chunks directory for filename in os.listdir(chunks_dir): if filename.startswith("cc") and filename.endswith(".txt"): os.remove(os.path.join(chunks_dir, filename)) # Write each caption segment to separate numbered files in chunks folder for i, entry in enumerate(transcript, 1): filename = f"cc{i}.txt" filepath = os.path.join(chunks_dir, filename) with open(filepath, "w", encoding="utf-8") as f: f.write(entry.text) print(f"Written: {filename} - {entry.text[:50]}...") # Create complete transcript file with YouTube ID in filename complete_filename = f"{video_id}_complete_transcript.txt" complete_filepath = os.path.join(output_dir, complete_filename) # Combine all chunks into single file with open(complete_filepath, "w", encoding="utf-8") as f: for i in range(1, len(transcript) + 1): chunk_file = os.path.join(chunks_dir, f"cc{i}.txt") if os.path.exists(chunk_file): with open(chunk_file, "r", encoding="utf-8") as chunk_f: f.write(chunk_f.read()) print(f"\nTotal segments: {len(transcript)}") print(f"Individual files saved in: {chunks_dir}/") print(f"Complete transcript saved as: {complete_filename}")