From d462ed26c9d86ad15b558480d1ab99c5a67966c3 Mon Sep 17 00:00:00 2001
From: Saravana Dhandapani <saravanakumardb@gmail.com>
Date: Tue, 16 Sep 2025 22:29:38 -0700
Subject: [PATCH] Add YouTube transcript summarization with Perplexity API

- Add summarize_with_perplexity.py script for generating summaries using Perplexity API
- Add config.json for configurable API settings, model parameters, and file paths
- Support for custom prompts and transcript files
- Configurable logging and output formatting
- Integration with existing YouTube transcription workflow
---
 youtube/config.json                  |  24 ++++
 youtube/summarize_with_perplexity.py | 201 +++++++++++++++++++++++++++
 2 files changed, 225 insertions(+)
 create mode 100644 youtube/config.json
 create mode 100755 youtube/summarize_with_perplexity.py

diff --git a/youtube/config.json b/youtube/config.json
new file mode 100644
index 0000000..a4d1f17
--- /dev/null
+++ b/youtube/config.json
@@ -0,0 +1,24 @@
+{
+    "api": {
+        "key": "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",
+        "base_url": "https://api.perplexity.ai/chat/completions",
+        "model": "llama-3.1-sonar-small-128k-online",
+        "timeout": 60
+    },
+    "generation": {
+        "max_tokens": 4000,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "stream": false
+    },
+    "system_prompt": "You are an expert astrological analyst and educator specializing in Vedic astrology and ALP methodology. You provide comprehensive, accurate, and culturally authentic analysis in both Tamil and English languages.",
+    "files": {
+        "default_transcript": "captions/c7bbO_KSLPI_complete_transcript.txt",
+        "default_prompt": "prompts/summarize_in_tamil.txt",
+        "output_suffix": "_summary.md"
+    },
+    "logging": {
+        "verbose": true,
+        "show_progress": true
+    }
+}
\ No newline at end of file
diff --git a/youtube/summarize_with_perplexity.py b/youtube/summarize_with_perplexity.py
new file mode 100755
index 0000000..4f750f6
--- /dev/null
+++ b/youtube/summarize_with_perplexity.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+"""
+YouTube Transcript Summarizer using Perplexity API
+Generates comprehensive summaries of YouTube transcripts using custom prompts
+"""
+
+import os
+import sys
+import json
+import requests
+from pathlib import Path
+
+class PerplexitySummarizer:
+    def __init__(self, config_file="config.json"):
+        self.config = self.load_config(config_file)
+        self.api_key = self.config["api"]["key"]
+        self.base_url = self.config["api"]["base_url"]
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json"
+        }
+    
+    def load_config(self, config_file):
+        """Load configuration from JSON file"""
+        try:
+            config_path = Path(__file__).parent / config_file
+            with open(config_path, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        except FileNotFoundError:
+            print(f"❌ Error: Config file not found: {config_file}")
+            sys.exit(1)
+        except json.JSONDecodeError as e:
+            print(f"❌ Error: Invalid JSON in config file: {e}")
+            sys.exit(1)
+        except Exception as e:
+            print(f"❌ Error loading config: {e}")
+            sys.exit(1)
+    
+    def load_prompt(self, prompt_file):
+        """Load the prompt template from file"""
+        try:
+            with open(prompt_file, 'r', encoding='utf-8') as f:
+                return f.read().strip()
+        except FileNotFoundError:
+            print(f"❌ Error: Prompt file not found: {prompt_file}")
+            return None
+        except Exception as e:
+            print(f"❌ Error reading prompt file: {e}")
+            return None
+    
+    def load_transcript(self, transcript_file):
+        """Load the transcript content from file"""
+        try:
+            with open(transcript_file, 'r', encoding='utf-8') as f:
+                return f.read().strip()
+        except FileNotFoundError:
+            print(f"❌ Error: Transcript file not found: {transcript_file}")
+            return None
+        except Exception as e:
+            print(f"❌ Error reading transcript file: {e}")
+            return None
+    
+    def generate_summary(self, prompt_template, transcript_content):
+        """Generate summary using Perplexity API"""
+        # Combine prompt template with transcript content
+        full_prompt = f"{prompt_template}\n\nTRANSCRIPT TO ANALYZE:\n{transcript_content}"
+        
+        payload = {
+            "model": self.config["api"]["model"],
+            "messages": [
+                {
+                    "role": "system",
+                    "content": self.config["system_prompt"]
+                },
+                {
+                    "role": "user",
+                    "content": full_prompt
+                }
+            ],
+            "max_tokens": self.config["generation"]["max_tokens"],
+            "temperature": self.config["generation"]["temperature"],
+            "top_p": self.config["generation"]["top_p"],
+            "stream": self.config["generation"]["stream"]
+        }
+        
+        try:
+            if self.config["logging"]["verbose"]:
+                print("🔄 Sending request to Perplexity API...")
+            response = requests.post(self.base_url, headers=self.headers, json=payload, timeout=self.config["api"]["timeout"])
+            
+            if response.status_code == 200:
+                result = response.json()
+                if 'choices' in result and len(result['choices']) > 0:
+                    return result['choices'][0]['message']['content']
+                else:
+                    print("❌ Error: No response content from API")
+                    return None
+            else:
+                print(f"❌ Error: API request failed with status {response.status_code}")
+                print(f"Response: {response.text}")
+                return None
+                
+        except requests.exceptions.Timeout:
+            print("❌ Error: Request timed out")
+            return None
+        except requests.exceptions.RequestException as e:
+            print(f"❌ Error: Request failed: {e}")
+            return None
+        except Exception as e:
+            print(f"❌ Error: Unexpected error: {e}")
+            return None
+    
+    def save_summary(self, summary_content, output_file):
+        """Save the generated summary to file"""
+        try:
+            with open(output_file, 'w', encoding='utf-8') as f:
+                f.write(summary_content)
+            print(f"✅ Summary saved to: {output_file}")
+            return True
+        except Exception as e:
+            print(f"❌ Error saving summary: {e}")
+            return False
+
+def main():
+    # Get file paths from command line arguments or use defaults from config
+    if len(sys.argv) >= 2:
+        transcript_file = sys.argv[1]
+    else:
+        transcript_file = None  # Will be set from config
+    
+    if len(sys.argv) >= 3:
+        prompt_file = sys.argv[2]
+    else:
+        prompt_file = None  # Will be set from config
+    
+    # Initialize summarizer (loads config)
+    summarizer = PerplexitySummarizer()
+    
+    # Use config defaults if not provided via command line
+    if not transcript_file:
+        transcript_file = summarizer.config["files"]["default_transcript"]
+    if not prompt_file:
+        prompt_file = summarizer.config["files"]["default_prompt"]
+    
+    # Convert to absolute paths
+    script_dir = Path(__file__).parent
+    transcript_path = script_dir / transcript_file
+    prompt_path = script_dir / prompt_file
+    
+    # Generate output filename based on transcript filename
+    transcript_name = Path(transcript_file).stem
+    output_file = script_dir / f"{transcript_name}{summarizer.config['files']['output_suffix']}"
+    
+    if summarizer.config["logging"]["verbose"]:
+        print("🎯 YouTube Transcript Summarizer")
+        print("=" * 50)
+        print(f"📄 Transcript: {transcript_path}")
+        print(f"📝 Prompt: {prompt_path}")
+        print(f"💾 Output: {output_file}")
+        print("=" * 50)
+    
+    # Load prompt template
+    if summarizer.config["logging"]["verbose"]:
+        print("📖 Loading prompt template...")
+    prompt_template = summarizer.load_prompt(prompt_path)
+    if not prompt_template:
+        sys.exit(1)
+    
+    # Load transcript content
+    if summarizer.config["logging"]["verbose"]:
+        print("📖 Loading transcript content...")
+    transcript_content = summarizer.load_transcript(transcript_path)
+    if not transcript_content:
+        sys.exit(1)
+    
+    if summarizer.config["logging"]["verbose"]:
+        print(f"📊 Transcript length: {len(transcript_content)} characters")
+    
+    # Generate summary
+    if summarizer.config["logging"]["verbose"]:
+        print("🤖 Generating summary with Perplexity API...")
+    summary = summarizer.generate_summary(prompt_template, transcript_content)
+    
+    if summary:
+        if summarizer.config["logging"]["verbose"]:
+            print("✅ Summary generated successfully!")
+            print(f"📊 Summary length: {len(summary)} characters")
+        
+        # Save summary
+        if summarizer.save_summary(summary, output_file):
+            if summarizer.config["logging"]["verbose"]:
+                print("\n🎉 Process completed successfully!")
+                print(f"📁 Summary saved as: {output_file.name}")
+        else:
+            sys.exit(1)
+    else:
+        print("❌ Failed to generate summary")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()