Add YouTube transcript summarization with Perplexity API
- Add summarize_with_perplexity.py script for generating summaries using Perplexity API - Add config.json for configurable API settings, model parameters, and file paths - Support for custom prompts and transcript files - Configurable logging and output formatting - Integration with existing YouTube transcription workflow
This commit is contained in:
parent
fd58016586
commit
d462ed26c9
24
youtube/config.json
Normal file
24
youtube/config.json
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
"api": {
|
||||
"key": "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",
|
||||
"base_url": "https://api.perplexity.ai/chat/completions",
|
||||
"model": "llama-3.1-sonar-small-128k-online",
|
||||
"timeout": 60
|
||||
},
|
||||
"generation": {
|
||||
"max_tokens": 4000,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.9,
|
||||
"stream": false
|
||||
},
|
||||
"system_prompt": "You are an expert astrological analyst and educator specializing in Vedic astrology and ALP methodology. You provide comprehensive, accurate, and culturally authentic analysis in both Tamil and English languages.",
|
||||
"files": {
|
||||
"default_transcript": "captions/c7bbO_KSLPI_complete_transcript.txt",
|
||||
"default_prompt": "prompts/summarize_in_tamil.txt",
|
||||
"output_suffix": "_summary.md"
|
||||
},
|
||||
"logging": {
|
||||
"verbose": true,
|
||||
"show_progress": true
|
||||
}
|
||||
}
|
||||
201
youtube/summarize_with_perplexity.py
Executable file
201
youtube/summarize_with_perplexity.py
Executable file
@ -0,0 +1,201 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
YouTube Transcript Summarizer using Perplexity API
|
||||
Generates comprehensive summaries of YouTube transcripts using custom prompts
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
class PerplexitySummarizer:
|
||||
def __init__(self, config_file="config.json"):
|
||||
self.config = self.load_config(config_file)
|
||||
self.api_key = self.config["api"]["key"]
|
||||
self.base_url = self.config["api"]["base_url"]
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
def load_config(self, config_file):
|
||||
"""Load configuration from JSON file"""
|
||||
try:
|
||||
config_path = Path(__file__).parent / config_file
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"❌ Error: Config file not found: {config_file}")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"❌ Error: Invalid JSON in config file: {e}")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"❌ Error loading config: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def load_prompt(self, prompt_file):
|
||||
"""Load the prompt template from file"""
|
||||
try:
|
||||
with open(prompt_file, 'r', encoding='utf-8') as f:
|
||||
return f.read().strip()
|
||||
except FileNotFoundError:
|
||||
print(f"❌ Error: Prompt file not found: {prompt_file}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"❌ Error reading prompt file: {e}")
|
||||
return None
|
||||
|
||||
def load_transcript(self, transcript_file):
|
||||
"""Load the transcript content from file"""
|
||||
try:
|
||||
with open(transcript_file, 'r', encoding='utf-8') as f:
|
||||
return f.read().strip()
|
||||
except FileNotFoundError:
|
||||
print(f"❌ Error: Transcript file not found: {transcript_file}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"❌ Error reading transcript file: {e}")
|
||||
return None
|
||||
|
||||
def generate_summary(self, prompt_template, transcript_content):
|
||||
"""Generate summary using Perplexity API"""
|
||||
# Combine prompt template with transcript content
|
||||
full_prompt = f"{prompt_template}\n\nTRANSCRIPT TO ANALYZE:\n{transcript_content}"
|
||||
|
||||
payload = {
|
||||
"model": self.config["api"]["model"],
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": self.config["system_prompt"]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": full_prompt
|
||||
}
|
||||
],
|
||||
"max_tokens": self.config["generation"]["max_tokens"],
|
||||
"temperature": self.config["generation"]["temperature"],
|
||||
"top_p": self.config["generation"]["top_p"],
|
||||
"stream": self.config["generation"]["stream"]
|
||||
}
|
||||
|
||||
try:
|
||||
if self.config["logging"]["verbose"]:
|
||||
print("🔄 Sending request to Perplexity API...")
|
||||
response = requests.post(self.base_url, headers=self.headers, json=payload, timeout=self.config["api"]["timeout"])
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if 'choices' in result and len(result['choices']) > 0:
|
||||
return result['choices'][0]['message']['content']
|
||||
else:
|
||||
print("❌ Error: No response content from API")
|
||||
return None
|
||||
else:
|
||||
print(f"❌ Error: API request failed with status {response.status_code}")
|
||||
print(f"Response: {response.text}")
|
||||
return None
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
print("❌ Error: Request timed out")
|
||||
return None
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"❌ Error: Request failed: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"❌ Error: Unexpected error: {e}")
|
||||
return None
|
||||
|
||||
def save_summary(self, summary_content, output_file):
|
||||
"""Save the generated summary to file"""
|
||||
try:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(summary_content)
|
||||
print(f"✅ Summary saved to: {output_file}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Error saving summary: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
# Get file paths from command line arguments or use defaults from config
|
||||
if len(sys.argv) >= 2:
|
||||
transcript_file = sys.argv[1]
|
||||
else:
|
||||
transcript_file = None # Will be set from config
|
||||
|
||||
if len(sys.argv) >= 3:
|
||||
prompt_file = sys.argv[2]
|
||||
else:
|
||||
prompt_file = None # Will be set from config
|
||||
|
||||
# Initialize summarizer (loads config)
|
||||
summarizer = PerplexitySummarizer()
|
||||
|
||||
# Use config defaults if not provided via command line
|
||||
if not transcript_file:
|
||||
transcript_file = summarizer.config["files"]["default_transcript"]
|
||||
if not prompt_file:
|
||||
prompt_file = summarizer.config["files"]["default_prompt"]
|
||||
|
||||
# Convert to absolute paths
|
||||
script_dir = Path(__file__).parent
|
||||
transcript_path = script_dir / transcript_file
|
||||
prompt_path = script_dir / prompt_file
|
||||
|
||||
# Generate output filename based on transcript filename
|
||||
transcript_name = Path(transcript_file).stem
|
||||
output_file = script_dir / f"{transcript_name}{summarizer.config['files']['output_suffix']}"
|
||||
|
||||
if summarizer.config["logging"]["verbose"]:
|
||||
print("🎯 YouTube Transcript Summarizer")
|
||||
print("=" * 50)
|
||||
print(f"📄 Transcript: {transcript_path}")
|
||||
print(f"📝 Prompt: {prompt_path}")
|
||||
print(f"💾 Output: {output_file}")
|
||||
print("=" * 50)
|
||||
|
||||
# Load prompt template
|
||||
if summarizer.config["logging"]["verbose"]:
|
||||
print("📖 Loading prompt template...")
|
||||
prompt_template = summarizer.load_prompt(prompt_path)
|
||||
if not prompt_template:
|
||||
sys.exit(1)
|
||||
|
||||
# Load transcript content
|
||||
if summarizer.config["logging"]["verbose"]:
|
||||
print("📖 Loading transcript content...")
|
||||
transcript_content = summarizer.load_transcript(transcript_path)
|
||||
if not transcript_content:
|
||||
sys.exit(1)
|
||||
|
||||
if summarizer.config["logging"]["verbose"]:
|
||||
print(f"📊 Transcript length: {len(transcript_content)} characters")
|
||||
|
||||
# Generate summary
|
||||
if summarizer.config["logging"]["verbose"]:
|
||||
print("🤖 Generating summary with Perplexity API...")
|
||||
summary = summarizer.generate_summary(prompt_template, transcript_content)
|
||||
|
||||
if summary:
|
||||
if summarizer.config["logging"]["verbose"]:
|
||||
print("✅ Summary generated successfully!")
|
||||
print(f"📊 Summary length: {len(summary)} characters")
|
||||
|
||||
# Save summary
|
||||
if summarizer.save_summary(summary, output_file):
|
||||
if summarizer.config["logging"]["verbose"]:
|
||||
print("\n🎉 Process completed successfully!")
|
||||
print(f"📁 Summary saved as: {output_file.name}")
|
||||
else:
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("❌ Failed to generate summary")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue
Block a user