From d462ed26c9d86ad15b558480d1ab99c5a67966c3 Mon Sep 17 00:00:00 2001 From: Saravana Dhandapani Date: Tue, 16 Sep 2025 22:29:38 -0700 Subject: [PATCH] Add YouTube transcript summarization with Perplexity API - Add summarize_with_perplexity.py script for generating summaries using Perplexity API - Add config.json for configurable API settings, model parameters, and file paths - Support for custom prompts and transcript files - Configurable logging and output formatting - Integration with existing YouTube transcription workflow --- youtube/config.json | 24 ++++ youtube/summarize_with_perplexity.py | 201 +++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 youtube/config.json create mode 100755 youtube/summarize_with_perplexity.py diff --git a/youtube/config.json b/youtube/config.json new file mode 100644 index 0000000..a4d1f17 --- /dev/null +++ b/youtube/config.json @@ -0,0 +1,24 @@ +{ + "api": { + "key": "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", + "base_url": "https://api.perplexity.ai/chat/completions", + "model": "llama-3.1-sonar-small-128k-online", + "timeout": 60 + }, + "generation": { + "max_tokens": 4000, + "temperature": 0.7, + "top_p": 0.9, + "stream": false + }, + "system_prompt": "You are an expert astrological analyst and educator specializing in Vedic astrology and ALP methodology. You provide comprehensive, accurate, and culturally authentic analysis in both Tamil and English languages.", + "files": { + "default_transcript": "captions/c7bbO_KSLPI_complete_transcript.txt", + "default_prompt": "prompts/summarize_in_tamil.txt", + "output_suffix": "_summary.md" + }, + "logging": { + "verbose": true, + "show_progress": true + } +} \ No newline at end of file diff --git a/youtube/summarize_with_perplexity.py b/youtube/summarize_with_perplexity.py new file mode 100755 index 0000000..4f750f6 --- /dev/null +++ b/youtube/summarize_with_perplexity.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +""" +YouTube Transcript Summarizer using Perplexity API +Generates comprehensive summaries of YouTube transcripts using custom prompts +""" + +import os +import sys +import json +import requests +from pathlib import Path + +class PerplexitySummarizer: + def __init__(self, config_file="config.json"): + self.config = self.load_config(config_file) + self.api_key = self.config["api"]["key"] + self.base_url = self.config["api"]["base_url"] + self.headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + def load_config(self, config_file): + """Load configuration from JSON file""" + try: + config_path = Path(__file__).parent / config_file + with open(config_path, 'r', encoding='utf-8') as f: + return json.load(f) + except FileNotFoundError: + print(f"❌ Error: Config file not found: {config_file}") + sys.exit(1) + except json.JSONDecodeError as e: + print(f"❌ Error: Invalid JSON in config file: {e}") + sys.exit(1) + except Exception as e: + print(f"❌ Error loading config: {e}") + sys.exit(1) + + def load_prompt(self, prompt_file): + """Load the prompt template from file""" + try: + with open(prompt_file, 'r', encoding='utf-8') as f: + return f.read().strip() + except FileNotFoundError: + print(f"❌ Error: Prompt file not found: {prompt_file}") + return None + except Exception as e: + print(f"❌ Error reading prompt file: {e}") + return None + + def load_transcript(self, transcript_file): + """Load the transcript content from file""" + try: + with open(transcript_file, 'r', encoding='utf-8') as f: + return f.read().strip() + except FileNotFoundError: + print(f"❌ Error: Transcript file not found: {transcript_file}") + return None + except Exception as e: + print(f"❌ Error reading transcript file: {e}") + return None + + def generate_summary(self, prompt_template, transcript_content): + """Generate summary using Perplexity API""" + # Combine prompt template with transcript content + full_prompt = f"{prompt_template}\n\nTRANSCRIPT TO ANALYZE:\n{transcript_content}" + + payload = { + "model": self.config["api"]["model"], + "messages": [ + { + "role": "system", + "content": self.config["system_prompt"] + }, + { + "role": "user", + "content": full_prompt + } + ], + "max_tokens": self.config["generation"]["max_tokens"], + "temperature": self.config["generation"]["temperature"], + "top_p": self.config["generation"]["top_p"], + "stream": self.config["generation"]["stream"] + } + + try: + if self.config["logging"]["verbose"]: + print("🔄 Sending request to Perplexity API...") + response = requests.post(self.base_url, headers=self.headers, json=payload, timeout=self.config["api"]["timeout"]) + + if response.status_code == 200: + result = response.json() + if 'choices' in result and len(result['choices']) > 0: + return result['choices'][0]['message']['content'] + else: + print("❌ Error: No response content from API") + return None + else: + print(f"❌ Error: API request failed with status {response.status_code}") + print(f"Response: {response.text}") + return None + + except requests.exceptions.Timeout: + print("❌ Error: Request timed out") + return None + except requests.exceptions.RequestException as e: + print(f"❌ Error: Request failed: {e}") + return None + except Exception as e: + print(f"❌ Error: Unexpected error: {e}") + return None + + def save_summary(self, summary_content, output_file): + """Save the generated summary to file""" + try: + with open(output_file, 'w', encoding='utf-8') as f: + f.write(summary_content) + print(f"✅ Summary saved to: {output_file}") + return True + except Exception as e: + print(f"❌ Error saving summary: {e}") + return False + +def main(): + # Get file paths from command line arguments or use defaults from config + if len(sys.argv) >= 2: + transcript_file = sys.argv[1] + else: + transcript_file = None # Will be set from config + + if len(sys.argv) >= 3: + prompt_file = sys.argv[2] + else: + prompt_file = None # Will be set from config + + # Initialize summarizer (loads config) + summarizer = PerplexitySummarizer() + + # Use config defaults if not provided via command line + if not transcript_file: + transcript_file = summarizer.config["files"]["default_transcript"] + if not prompt_file: + prompt_file = summarizer.config["files"]["default_prompt"] + + # Convert to absolute paths + script_dir = Path(__file__).parent + transcript_path = script_dir / transcript_file + prompt_path = script_dir / prompt_file + + # Generate output filename based on transcript filename + transcript_name = Path(transcript_file).stem + output_file = script_dir / f"{transcript_name}{summarizer.config['files']['output_suffix']}" + + if summarizer.config["logging"]["verbose"]: + print("🎯 YouTube Transcript Summarizer") + print("=" * 50) + print(f"📄 Transcript: {transcript_path}") + print(f"📝 Prompt: {prompt_path}") + print(f"💾 Output: {output_file}") + print("=" * 50) + + # Load prompt template + if summarizer.config["logging"]["verbose"]: + print("📖 Loading prompt template...") + prompt_template = summarizer.load_prompt(prompt_path) + if not prompt_template: + sys.exit(1) + + # Load transcript content + if summarizer.config["logging"]["verbose"]: + print("📖 Loading transcript content...") + transcript_content = summarizer.load_transcript(transcript_path) + if not transcript_content: + sys.exit(1) + + if summarizer.config["logging"]["verbose"]: + print(f"📊 Transcript length: {len(transcript_content)} characters") + + # Generate summary + if summarizer.config["logging"]["verbose"]: + print("🤖 Generating summary with Perplexity API...") + summary = summarizer.generate_summary(prompt_template, transcript_content) + + if summary: + if summarizer.config["logging"]["verbose"]: + print("✅ Summary generated successfully!") + print(f"📊 Summary length: {len(summary)} characters") + + # Save summary + if summarizer.save_summary(summary, output_file): + if summarizer.config["logging"]["verbose"]: + print("\n🎉 Process completed successfully!") + print(f"📁 Summary saved as: {output_file.name}") + else: + sys.exit(1) + else: + print("❌ Failed to generate summary") + sys.exit(1) + +if __name__ == "__main__": + main()