bytelyst-devops-tools/youtube/summarize_with_perplexity.py
Saravana Dhandapani d462ed26c9 Add YouTube transcript summarization with Perplexity API
- Add summarize_with_perplexity.py script for generating summaries using Perplexity API
- Add config.json for configurable API settings, model parameters, and file paths
- Support for custom prompts and transcript files
- Configurable logging and output formatting
- Integration with existing YouTube transcription workflow
2026-02-14 15:04:32 -08:00

202 lines
7.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""
YouTube Transcript Summarizer using Perplexity API
Generates comprehensive summaries of YouTube transcripts using custom prompts
"""
import os
import sys
import json
import requests
from pathlib import Path
class PerplexitySummarizer:
def __init__(self, config_file="config.json"):
self.config = self.load_config(config_file)
self.api_key = self.config["api"]["key"]
self.base_url = self.config["api"]["base_url"]
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
def load_config(self, config_file):
"""Load configuration from JSON file"""
try:
config_path = Path(__file__).parent / config_file
with open(config_path, 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
print(f"❌ Error: Config file not found: {config_file}")
sys.exit(1)
except json.JSONDecodeError as e:
print(f"❌ Error: Invalid JSON in config file: {e}")
sys.exit(1)
except Exception as e:
print(f"❌ Error loading config: {e}")
sys.exit(1)
def load_prompt(self, prompt_file):
"""Load the prompt template from file"""
try:
with open(prompt_file, 'r', encoding='utf-8') as f:
return f.read().strip()
except FileNotFoundError:
print(f"❌ Error: Prompt file not found: {prompt_file}")
return None
except Exception as e:
print(f"❌ Error reading prompt file: {e}")
return None
def load_transcript(self, transcript_file):
"""Load the transcript content from file"""
try:
with open(transcript_file, 'r', encoding='utf-8') as f:
return f.read().strip()
except FileNotFoundError:
print(f"❌ Error: Transcript file not found: {transcript_file}")
return None
except Exception as e:
print(f"❌ Error reading transcript file: {e}")
return None
def generate_summary(self, prompt_template, transcript_content):
"""Generate summary using Perplexity API"""
# Combine prompt template with transcript content
full_prompt = f"{prompt_template}\n\nTRANSCRIPT TO ANALYZE:\n{transcript_content}"
payload = {
"model": self.config["api"]["model"],
"messages": [
{
"role": "system",
"content": self.config["system_prompt"]
},
{
"role": "user",
"content": full_prompt
}
],
"max_tokens": self.config["generation"]["max_tokens"],
"temperature": self.config["generation"]["temperature"],
"top_p": self.config["generation"]["top_p"],
"stream": self.config["generation"]["stream"]
}
try:
if self.config["logging"]["verbose"]:
print("🔄 Sending request to Perplexity API...")
response = requests.post(self.base_url, headers=self.headers, json=payload, timeout=self.config["api"]["timeout"])
if response.status_code == 200:
result = response.json()
if 'choices' in result and len(result['choices']) > 0:
return result['choices'][0]['message']['content']
else:
print("❌ Error: No response content from API")
return None
else:
print(f"❌ Error: API request failed with status {response.status_code}")
print(f"Response: {response.text}")
return None
except requests.exceptions.Timeout:
print("❌ Error: Request timed out")
return None
except requests.exceptions.RequestException as e:
print(f"❌ Error: Request failed: {e}")
return None
except Exception as e:
print(f"❌ Error: Unexpected error: {e}")
return None
def save_summary(self, summary_content, output_file):
"""Save the generated summary to file"""
try:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(summary_content)
print(f"✅ Summary saved to: {output_file}")
return True
except Exception as e:
print(f"❌ Error saving summary: {e}")
return False
def main():
# Get file paths from command line arguments or use defaults from config
if len(sys.argv) >= 2:
transcript_file = sys.argv[1]
else:
transcript_file = None # Will be set from config
if len(sys.argv) >= 3:
prompt_file = sys.argv[2]
else:
prompt_file = None # Will be set from config
# Initialize summarizer (loads config)
summarizer = PerplexitySummarizer()
# Use config defaults if not provided via command line
if not transcript_file:
transcript_file = summarizer.config["files"]["default_transcript"]
if not prompt_file:
prompt_file = summarizer.config["files"]["default_prompt"]
# Convert to absolute paths
script_dir = Path(__file__).parent
transcript_path = script_dir / transcript_file
prompt_path = script_dir / prompt_file
# Generate output filename based on transcript filename
transcript_name = Path(transcript_file).stem
output_file = script_dir / f"{transcript_name}{summarizer.config['files']['output_suffix']}"
if summarizer.config["logging"]["verbose"]:
print("🎯 YouTube Transcript Summarizer")
print("=" * 50)
print(f"📄 Transcript: {transcript_path}")
print(f"📝 Prompt: {prompt_path}")
print(f"💾 Output: {output_file}")
print("=" * 50)
# Load prompt template
if summarizer.config["logging"]["verbose"]:
print("📖 Loading prompt template...")
prompt_template = summarizer.load_prompt(prompt_path)
if not prompt_template:
sys.exit(1)
# Load transcript content
if summarizer.config["logging"]["verbose"]:
print("📖 Loading transcript content...")
transcript_content = summarizer.load_transcript(transcript_path)
if not transcript_content:
sys.exit(1)
if summarizer.config["logging"]["verbose"]:
print(f"📊 Transcript length: {len(transcript_content)} characters")
# Generate summary
if summarizer.config["logging"]["verbose"]:
print("🤖 Generating summary with Perplexity API...")
summary = summarizer.generate_summary(prompt_template, transcript_content)
if summary:
if summarizer.config["logging"]["verbose"]:
print("✅ Summary generated successfully!")
print(f"📊 Summary length: {len(summary)} characters")
# Save summary
if summarizer.save_summary(summary, output_file):
if summarizer.config["logging"]["verbose"]:
print("\n🎉 Process completed successfully!")
print(f"📁 Summary saved as: {output_file.name}")
else:
sys.exit(1)
else:
print("❌ Failed to generate summary")
sys.exit(1)
if __name__ == "__main__":
main()