From fe7d01fff775d5f29f6fbec2a9fab180152068f6 Mon Sep 17 00:00:00 2001
From: Cod-e-Codes <codylmarsengill@gmail.com>
Date: Mon, 29 Sep 2025 17:42:50 -0400
Subject: [PATCH] feat: add optional transcription with multi-provider support

- Add transcription.go with whisper.cpp, Vosk, OpenAI API, and Python script providers
- Integrate transcription into main app with Ctrl+T keybinding
- Add transcription settings to UI with provider status indicators
- Show transcription status in memo list and include in search
- Auto-transcribe option for new recordings
- Complete setup documentation in README
---
 README.md        |  88 ++++++-
 main.go          | 295 ++++++++++++++++++++---
 transcription.go | 603 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 945 insertions(+), 41 deletions(-)
 create mode 100644 transcription.go

diff --git a/README.md b/README.md
index e7532ae..02d68b1 100644
--- a/README.md
+++ b/README.md
@@ -36,6 +36,7 @@ Audio configuration interface displaying hardware/audio settings, available devi
 - Add tags for organization
 - Delete memos
 - Export memos to Downloads folder
+- **Optional transcription** with multiple provider support
 
 ### User Interface
 - Terminal user interface using Bubble Tea
@@ -115,16 +116,18 @@ go build -o voicelog main.go
 | `ctrl+x` | Stop playback |
 | `?` | Show help |
 | `ctrl+s` | Settings |
-| `ctrl+t` | Generate test file |
+| `ctrl+t` | Transcribe selected memo |
+| `F5` | Generate test file |
 | `ESC/q` | Quit |
 
 ### Basic Operations
 
 1. **Recording**: Press `SPACE` to start/stop recording
 2. **Playback**: Select a memo and press `ENTER` to play
-3. **Settings**: Press `ctrl+s` to configure audio devices
-4. **Test File**: Press `ctrl+t` to generate a 5-second 440Hz test tone
-5. **Export**: Press `e` to export selected memo to Downloads folder
+3. **Transcription**: Press `ctrl+t` to transcribe selected memo (optional)
+4. **Settings**: Press `ctrl+s` to configure audio devices and transcription
+5. **Test File**: Press `F5` to generate a 5-second 440Hz test tone
+6. **Export**: Press `ctrl+e` to export selected memo to Downloads folder
 
 ### Audio Processing Features
 
@@ -146,12 +149,86 @@ VoiceLog includes advanced audio processing capabilities:
 - **Compact Mode**: Memo list becomes compact when audio visualizer is active
 - **Real-Time Updates**: Waveform and meters update in real-time during operation
 
+### Transcription (Optional)
+
+VoiceLog supports optional voice-to-text transcription through a flexible plugin system. Transcription is **completely optional** - the application works perfectly without it.
+
+#### Supported Transcription Providers
+
+1. **whisper.cpp (Recommended - Local & Private)**
+   - High accuracy, supports many languages
+   - Runs entirely offline - no internet required
+   - Complete privacy - audio never leaves your machine
+   - Installation: [github.com/ggerganov/whisper.cpp](https://github.com/ggerganov/whisper.cpp)
+
+2. **OpenAI Whisper API (Cloud-based - Highest Accuracy)**
+   - Highest accuracy available
+   - Requires internet connection and API key
+   - Install: `pip install openai`
+   - Set `OPENAI_API_KEY` environment variable
+
+3. **Vosk (Lightweight & Fast)**
+   - Smaller models, faster processing
+   - Good for real-time applications
+   - Installation: [alphacephei.com/vosk](https://alphacephei.com/vosk/)
+
+4. **Custom Python Script**
+   - Use any transcription API (AssemblyAI, Rev.ai, etc.)
+   - Write your own integration script
+   - Full flexibility for custom workflows
+
+#### Quick Setup Examples
+
+**whisper.cpp Setup (Linux/macOS):**
+```bash
+# Clone and build whisper.cpp
+git clone https://github.com/ggerganov/whisper.cpp
+cd whisper.cpp && make
+
+# Download a model (base.en for English, base for multilingual)
+./models/download-ggml-model.sh base.en
+
+# The whisper binary will be auto-detected by VoiceLog
+```
+
+**OpenAI Whisper API Setup:**
+```bash
+# Install the OpenAI library
+pip install openai
+
+# Set your API key (get one from https://platform.openai.com)
+export OPENAI_API_KEY="your-api-key-here"
+```
+
+#### Using Transcription
+
+1. **Enable in Settings**: Press `ctrl+s` → Navigate to "Transcription:" → Toggle to ON
+2. **Select Provider**: Navigate to "Default Provider:" → Choose your installed provider
+3. **Transcribe**: Press `ctrl+t` on any memo to transcribe it
+4. **Auto-Transcribe**: Enable "Auto Transcribe:" to automatically transcribe new recordings
+
+#### Transcription Features
+
+- **Visual Indicators**: Transcribed memos show a 📝 icon in the memo list
+- **Search Integration**: Search through transcribed text using the built-in filter
+- **Provider Status**: Settings show ✓/✗ status for each provider's availability
+- **Flexible Configuration**: Each provider can be configured independently
+- **Auto-Detection**: VoiceLog automatically detects available transcription tools
+
+#### Privacy & Performance
+
+- **Local Options**: whisper.cpp and Vosk run entirely on your machine
+- **Cloud Options**: OpenAI Whisper API provides highest accuracy but requires internet
+- **No Telemetry**: VoiceLog never sends any data anywhere (except when using API providers)
+- **Storage**: Transcriptions are stored locally alongside memo metadata
+
 ## Configuration
 
 Configuration is stored in `~/.voicelog/config.json` and includes:
 - Audio device settings
 - Sample rate and format preferences
 - Audio processing settings (normalization, silence trimming, clipping detection)
+- Transcription settings (optional)
 - Memo storage path
 - Keybindings
 
@@ -159,8 +236,9 @@ Configuration is stored in `~/.voicelog/config.json` and includes:
 ```
 ~/.voicelog/
 ├── config.json          # Application configuration
+├── transcription.json   # Transcription settings (if enabled)
 ├── memos/               # Voice memo storage
-│   ├── metadata.json    # Memo metadata
+│   ├── metadata.json    # Memo metadata (includes transcriptions)
 │   └── memo_*.wav       # Audio files
 └── voicelog.log         # Application logs
 ```
diff --git a/main.go b/main.go
index cda2eff..a7dae1b 100644
--- a/main.go
+++ b/main.go
@@ -106,14 +106,15 @@ func (f AudioFormat) Extension() string {
 
 // Memo represents a voice memo with metadata
 type Memo struct {
-	ID       string    `json:"id"`
-	Filename string    `json:"filename"`
-	Name     string    `json:"title"` // Changed from Title to Name to avoid conflict
-	Duration float64   `json:"duration"`
-	Created  time.Time `json:"created"`
-	Size     int64     `json:"size"`
-	Tags     []string  `json:"tags"`
-	Format   string    `json:"format"`
+	ID            string               `json:"id"`
+	Filename      string               `json:"filename"`
+	Name          string               `json:"title"` // Changed from Title to Name to avoid conflict
+	Duration      float64              `json:"duration"`
+	Created       time.Time            `json:"created"`
+	Size          int64                `json:"size"`
+	Tags          []string             `json:"tags"`
+	Format        string               `json:"format"`
+	Transcription *TranscriptionResult `json:"transcription,omitempty"`
 }
 
 // Implement list.Item interface
@@ -128,16 +129,27 @@ func (m Memo) Description() string {
 	if len(m.Tags) > 0 {
 		// Truncate tags if they're too long
 		tagString := strings.Join(m.Tags, ", ")
-		if len(tagString) > 20 {
-			tagString = tagString[:17] + "..."
+		if len(tagString) > 15 { // Reduced to make room for transcription indicator
+			tagString = tagString[:12] + "..."
 		}
 		tags = " [" + tagString + "]"
 	}
-	return fmt.Sprintf("%s, %s%s", duration, size, tags)
+
+	// Add transcription indicator
+	transcriptionStatus := ""
+	if m.Transcription != nil {
+		transcriptionStatus = " 📝"
+	}
+
+	return fmt.Sprintf("%s, %s%s%s", duration, size, tags, transcriptionStatus)
 }
 
 func (m Memo) FilterValue() string {
-	return m.Name + " " + strings.Join(m.Tags, " ")
+	searchText := m.Name + " " + strings.Join(m.Tags, " ")
+	if m.Transcription != nil {
+		searchText += " " + m.Transcription.Text
+	}
+	return searchText
 }
 
 // Truncate text to specified length
@@ -443,6 +455,9 @@ type Model struct {
 	isClipping         bool      // Current clipping status
 	peakLevel          float32   // Current peak level
 
+	// Transcription
+	transcriptionManager *TranscriptionManager
+
 	// UI components
 	textInput textinput.Model
 	help      help.Model
@@ -467,23 +482,24 @@ type Model struct {
 
 // Key bindings
 type keyMap struct {
-	Record   key.Binding
-	Play     key.Binding
-	Stop     key.Binding
-	Delete   key.Binding
-	Rename   key.Binding
-	Tag      key.Binding
-	Export   key.Binding
-	Help     key.Binding
-	Settings key.Binding
-	TestFile key.Binding
-	Quit     key.Binding
-	Up       key.Binding
-	Down     key.Binding
-	Enter    key.Binding
-	Escape   key.Binding
-	Left     key.Binding
-	Right    key.Binding
+	Record     key.Binding
+	Play       key.Binding
+	Stop       key.Binding
+	Delete     key.Binding
+	Rename     key.Binding
+	Tag        key.Binding
+	Export     key.Binding
+	Help       key.Binding
+	Settings   key.Binding
+	TestFile   key.Binding
+	Transcribe key.Binding
+	Quit       key.Binding
+	Up         key.Binding
+	Down       key.Binding
+	Enter      key.Binding
+	Escape     key.Binding
+	Left       key.Binding
+	Right      key.Binding
 }
 
 // ShortHelp returns keybindings to be shown in the mini help view
@@ -494,9 +510,9 @@ func (k keyMap) ShortHelp() []key.Binding {
 // FullHelp returns keybindings for the expanded help view
 func (k keyMap) FullHelp() [][]key.Binding {
 	return [][]key.Binding{
-		{k.Record, k.Play, k.Stop, k.Up, k.Down}, // Core controls
-		{k.Rename, k.Tag, k.Delete, k.Export},    // Management
-		{k.Settings, k.TestFile, k.Help, k.Quit}, // Other
+		{k.Record, k.Play, k.Stop, k.Up, k.Down},               // Core controls
+		{k.Rename, k.Tag, k.Delete, k.Export},                  // Management
+		{k.Transcribe, k.Settings, k.TestFile, k.Help, k.Quit}, // Other
 	}
 }
 
@@ -538,8 +554,12 @@ var keys = keyMap{
 		key.WithHelp("ctrl+s", "settings"),
 	),
 	TestFile: key.NewBinding(
+		key.WithKeys("f5"),
+		key.WithHelp("f5", "test file"),
+	),
+	Transcribe: key.NewBinding(
 		key.WithKeys("ctrl+t"),
-		key.WithHelp("ctrl+t", "test file"),
+		key.WithHelp("ctrl+t", "transcribe"),
 	),
 	Quit: key.NewBinding(
 		key.WithKeys("q", "ctrl+c"),
@@ -677,6 +697,10 @@ func initialModel() Model {
 	audioAnalyzer := NewAudioAnalyzer(config.SampleRate, config.WaveformSampleRate)
 	clippingDetector := NewClippingDetector(config.ClippingThreshold)
 
+	// Initialize transcription manager
+	homeDir, _ := os.UserHomeDir()
+	transcriptionManager := NewTranscriptionManager(filepath.Join(homeDir, ConfigDir))
+
 	return Model{
 		state:               StateViewing,
 		config:              config,
@@ -693,6 +717,9 @@ func initialModel() Model {
 		audioAnalyzer:    audioAnalyzer,
 		clippingDetector: clippingDetector,
 		realtimeWaveform: make([]float32, config.WaveformSampleRate),
+
+		// Transcription
+		transcriptionManager: transcriptionManager,
 	}
 }
 
@@ -1033,11 +1060,19 @@ func (m Model) handleSettingsKeys(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 	case key.Matches(msg, keys.Up):
 		if m.settingsSelectedIdx > 0 {
 			m.settingsSelectedIdx--
+			// Skip separator line
+			if m.settingsSelectedIdx == 12 && m.settingsSelectedIdx > 0 {
+				m.settingsSelectedIdx--
+			}
 		}
 
 	case key.Matches(msg, keys.Down):
-		if m.settingsSelectedIdx < 11 { // 12 settings items (0-11)
+		if m.settingsSelectedIdx < 15 { // 16 settings items (0-15) - added transcription settings
 			m.settingsSelectedIdx++
+			// Skip separator line
+			if m.settingsSelectedIdx == 12 && m.settingsSelectedIdx < 15 {
+				m.settingsSelectedIdx++
+			}
 		}
 
 	case key.Matches(msg, keys.Left):
@@ -1232,6 +1267,37 @@ func (m *Model) adjustSetting(delta int) {
 		if m.clippingDetector != nil {
 			m.clippingDetector.threshold = m.config.ClippingThreshold
 		}
+	case 13: // Transcription Enabled
+		enabled := !m.transcriptionManager.GetConfig().Enabled
+		if err := m.transcriptionManager.SetEnabled(enabled); err != nil {
+			log.Printf("Error setting transcription enabled: %v", err)
+		}
+	case 14: // Default Provider
+		providers := m.transcriptionManager.GetAllProviders()
+		currentProvider := m.transcriptionManager.GetConfig().DefaultProvider
+		currentIdx := -1
+		for i, provider := range providers {
+			if provider == currentProvider {
+				currentIdx = i
+				break
+			}
+		}
+		if currentIdx >= 0 {
+			nextIdx := (currentIdx + delta + len(providers)) % len(providers)
+			if err := m.transcriptionManager.SetDefaultProvider(providers[nextIdx]); err != nil {
+				log.Printf("Error setting default provider: %v", err)
+			}
+		} else if len(providers) > 0 {
+			// Set first provider if no current provider
+			if err := m.transcriptionManager.SetDefaultProvider(providers[0]); err != nil {
+				log.Printf("Error setting default provider: %v", err)
+			}
+		}
+	case 15: // Auto Transcribe
+		auto := !m.transcriptionManager.GetConfig().AutoTranscribe
+		if err := m.transcriptionManager.SetAutoTranscribe(auto); err != nil {
+			log.Printf("Error setting auto-transcribe: %v", err)
+		}
 	}
 }
 
@@ -1336,6 +1402,11 @@ func (m Model) handleMainKeys(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 	case key.Matches(msg, keys.TestFile):
 		m.loadTestFile()
 
+	case key.Matches(msg, keys.Transcribe):
+		if len(m.memos) > 0 {
+			m.transcribeMemo()
+		}
+
 	case key.Matches(msg, keys.Record):
 		if m.recording {
 			m.stopRecording()
@@ -1726,6 +1797,10 @@ func (m *Model) stopRecording() {
 
 		// Add to memos list
 		m.memos = append([]Memo{memo}, m.memos...)
+
+		// Auto-transcribe if enabled
+		m.autoTranscribeMemo(&memo)
+
 		// Refresh list items to include the new memo
 		m.memoList.SetItems(convertMemosToListItems(m.memos))
 
@@ -2056,8 +2131,13 @@ func (m Model) renderSettings() string {
 		"Auto Trim Silence:",
 		"Silence Threshold:",
 		"Clipping Threshold:",
+		"", // Separator
+		"Transcription:",
+		"Default Provider:",
+		"Auto Transcribe:",
 	}
 
+	transcriptionConfig := m.transcriptionManager.GetConfig()
 	values := []string{
 		m.getDeviceName(m.config.InputDevice),
 		m.getDeviceName(m.config.OutputDevice),
@@ -2071,10 +2151,20 @@ func (m Model) renderSettings() string {
 		boolToString(m.config.AutoTrimSilence),
 		fmt.Sprintf("%.1f%%", m.config.SilenceThreshold*100),
 		fmt.Sprintf("%.0f%%", m.config.ClippingThreshold*100),
+		"", // Separator value
+		boolToString(transcriptionConfig.Enabled),
+		m.getTranscriptionProviderDisplay(transcriptionConfig.DefaultProvider),
+		boolToString(transcriptionConfig.AutoTranscribe),
 	}
 
 	var lines []string
 	for i, setting := range settings {
+		// Skip empty separator settings
+		if setting == "" && i == 12 {
+			lines = append(lines, "")
+			continue
+		}
+
 		var line string
 		if i == m.settingsSelectedIdx {
 			line += selectedStyle.Render("▶ ")
@@ -2086,8 +2176,8 @@ func (m Model) renderSettings() string {
 		line += " "
 		line += successStyle.Render(values[i])
 
-		// Add arrows for navigation
-		if i == m.settingsSelectedIdx {
+		// Add arrows for navigation (skip separator)
+		if i == m.settingsSelectedIdx && i != 12 {
 			line += " " + mutedStyle.Render("← →")
 		}
 
@@ -2132,6 +2222,50 @@ func (m Model) getDeviceName(deviceID string) string {
 	return fmt.Sprintf("Unknown Device (ID: %s)", deviceID)
 }
 
+// Get transcription provider display name
+func (m Model) getTranscriptionProviderDisplay(providerName string) string {
+	if providerName == "" {
+		return "None"
+	}
+
+	available := m.transcriptionManager.GetAvailableProviders()
+	isAvailable := false
+	for _, name := range available {
+		if name == providerName {
+			isAvailable = true
+			break
+		}
+	}
+
+	if isAvailable {
+		switch providerName {
+		case "whisper.cpp":
+			return "Whisper.cpp ✓"
+		case "vosk":
+			return "Vosk ✓"
+		case "openai_whisper":
+			return "OpenAI Whisper ✓"
+		case "python_script":
+			return "Custom Script ✓"
+		default:
+			return providerName + " ✓"
+		}
+	} else {
+		switch providerName {
+		case "whisper.cpp":
+			return "Whisper.cpp ✗"
+		case "vosk":
+			return "Vosk ✗"
+		case "openai_whisper":
+			return "OpenAI Whisper ✗"
+		case "python_script":
+			return "Custom Script ✗"
+		default:
+			return providerName + " ✗"
+		}
+	}
+}
+
 // Get system audio info
 func getSystemAudioInfo() string {
 	// Avoid initializing PortAudio here to prevent strict init/term cycles on some platforms
@@ -2943,6 +3077,95 @@ func renderPeakBar(level float32, width int) string {
 	return bar
 }
 
+// Transcribe the currently selected memo
+func (m *Model) transcribeMemo() {
+	if len(m.memos) == 0 {
+		m.showNotification("No memo selected")
+		return
+	}
+
+	memo := &m.memos[m.selectedIdx]
+	if memo.Transcription != nil {
+		m.showNotification("Memo already transcribed")
+		return
+	}
+
+	if !m.transcriptionManager.GetConfig().Enabled {
+		m.showNotification("Transcription is disabled - enable in settings")
+		return
+	}
+
+	filePath := filepath.Join(m.config.MemosPath, memo.Filename)
+
+	// Check if file exists
+	if _, err := os.Stat(filePath); os.IsNotExist(err) {
+		m.showNotification("Audio file not found")
+		return
+	}
+
+	m.showNotification("Transcribing...")
+
+	// Run transcription in background (simplified for TUI)
+	result, err := m.transcriptionManager.Transcribe(filePath, "")
+	if err != nil {
+		m.showNotification(fmt.Sprintf("Transcription failed: %v", err))
+		return
+	}
+
+	// Store transcription result in memo
+	result.MemoID = memo.ID
+	memo.Transcription = result
+
+	// Update the memo in the list
+	for i := range m.memos {
+		if m.memos[i].ID == memo.ID {
+			m.memos[i].Transcription = result
+			break
+		}
+	}
+
+	// Save metadata
+	if err := saveMemos(m.memos, m.config.MemosPath); err != nil {
+		log.Printf("Error saving memos metadata: %v", err)
+		m.showNotification("Error saving transcription")
+	} else {
+		m.showNotification("Transcription completed!")
+	}
+
+	// Refresh list items
+	m.memoList.SetItems(convertMemosToListItems(m.memos))
+}
+
+// Auto-transcribe a memo if enabled
+func (m *Model) autoTranscribeMemo(memo *Memo) {
+	if !m.transcriptionManager.GetConfig().Enabled || !m.transcriptionManager.GetConfig().AutoTranscribe {
+		return
+	}
+
+	if memo.Transcription != nil {
+		return // Already transcribed
+	}
+
+	filePath := filepath.Join(m.config.MemosPath, memo.Filename)
+
+	// Run transcription in background
+	go func() {
+		result, err := m.transcriptionManager.Transcribe(filePath, "")
+		if err != nil {
+			log.Printf("Auto-transcription failed: %v", err)
+			return
+		}
+
+		result.MemoID = memo.ID
+		memo.Transcription = result
+
+		// Update metadata
+		if err := saveMemos(m.memos, m.config.MemosPath); err != nil {
+			log.Printf("Error saving auto-transcription: %v", err)
+		}
+	}()
+}
+
 // Main function
 func main() {
 	setupLogging()
diff --git a/transcription.go b/transcription.go
new file mode 100644
index 0000000..b48ae05
--- /dev/null
+++ b/transcription.go
@@ -0,0 +1,603 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+// TranscriptionProvider represents a plugin interface for transcription services
+type TranscriptionProvider interface {
+	Name() string
+	IsAvailable() bool
+	Transcribe(audioPath string) (string, error)
+	Configure(config map[string]string) error
+}
+
+// TranscriptionConfig holds transcription settings
+type TranscriptionConfig struct {
+	Enabled         bool                         `json:"enabled"`
+	DefaultProvider string                       `json:"default_provider"`
+	AutoTranscribe  bool                         `json:"auto_transcribe"`
+	ProviderConfigs map[string]map[string]string `json:"provider_configs"`
+}
+
+// TranscriptionResult holds the transcription output
+type TranscriptionResult struct {
+	MemoID        string  `json:"memo_id"`
+	Text          string  `json:"text"`
+	Provider      string  `json:"provider"`
+	Confidence    float64 `json:"confidence,omitempty"`
+	Language      string  `json:"language,omitempty"`
+	TranscribedAt string  `json:"transcribed_at"`
+}
+
+// ============================================================================
+// WHISPER.CPP PROVIDER (External Command)
+// ============================================================================
+
+type WhisperCppProvider struct {
+	execPath  string
+	modelPath string
+	language  string
+}
+
+func NewWhisperCppProvider() *WhisperCppProvider {
+	return &WhisperCppProvider{
+		language: "en",
+	}
+}
+
+func (w *WhisperCppProvider) Name() string {
+	return "whisper.cpp"
+}
+
+func (w *WhisperCppProvider) IsAvailable() bool {
+	// Check if whisper executable exists in PATH or configured location
+	if w.execPath != "" {
+		if _, err := os.Stat(w.execPath); err == nil {
+			return true
+		}
+	}
+
+	// Check common locations
+	commonPaths := []string{
+		"whisper",
+		"./whisper",
+		"/usr/local/bin/whisper",
+		"/usr/bin/whisper",
+		"whisper.exe", // Windows
+		"./whisper.exe",
+	}
+
+	for _, path := range commonPaths {
+		if _, err := exec.LookPath(path); err == nil {
+			w.execPath = path
+			return true
+		}
+	}
+
+	return false
+}
+
+func (w *WhisperCppProvider) Configure(config map[string]string) error {
+	if path, ok := config["exec_path"]; ok {
+		w.execPath = path
+	}
+	if path, ok := config["model_path"]; ok {
+		w.modelPath = path
+	}
+	if lang, ok := config["language"]; ok {
+		w.language = lang
+	}
+	return nil
+}
+
+func (w *WhisperCppProvider) Transcribe(audioPath string) (string, error) {
+	if !w.IsAvailable() {
+		return "", fmt.Errorf("whisper.cpp not found in PATH")
+	}
+
+	args := []string{"-f", audioPath}
+
+	// Add model path if configured
+	if w.modelPath != "" {
+		args = append(args, "-m", w.modelPath)
+	}
+
+	// Add language
+	args = append(args, "-l", w.language)
+
+	// Output to text file
+	args = append(args, "-otxt")
+
+	cmd := exec.Command(w.execPath, args...)
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("whisper.cpp failed: %v\nOutput: %s", err, output)
+	}
+
+	// Read the generated text file
+	txtFile := strings.TrimSuffix(audioPath, filepath.Ext(audioPath)) + ".txt"
+	text, err := os.ReadFile(txtFile)
+	if err != nil {
+		return "", fmt.Errorf("failed to read transcription: %v", err)
+	}
+
+	// Clean up temp file
+	os.Remove(txtFile)
+
+	return strings.TrimSpace(string(text)), nil
+}
+
+// ============================================================================
+// VOSK PROVIDER (External Command)
+// ============================================================================
+
+type VoskProvider struct {
+	execPath  string
+	modelPath string
+}
+
+func NewVoskProvider() *VoskProvider {
+	return &VoskProvider{}
+}
+
+func (v *VoskProvider) Name() string {
+	return "vosk"
+}
+
+func (v *VoskProvider) IsAvailable() bool {
+	if v.execPath != "" {
+		if _, err := os.Stat(v.execPath); err == nil {
+			return true
+		}
+	}
+
+	// Check for vosk-transcriber or similar
+	commonPaths := []string{
+		"vosk-transcriber",
+		"vosk",
+		"./vosk-transcriber",
+		"vosk-transcriber.exe", // Windows
+	}
+
+	for _, path := range commonPaths {
+		if _, err := exec.LookPath(path); err == nil {
+			v.execPath = path
+			return true
+		}
+	}
+
+	return false
+}
+
+func (v *VoskProvider) Configure(config map[string]string) error {
+	if path, ok := config["exec_path"]; ok {
+		v.execPath = path
+	}
+	if path, ok := config["model_path"]; ok {
+		v.modelPath = path
+	}
+	return nil
+}
+
+func (v *VoskProvider) Transcribe(audioPath string) (string, error) {
+	if !v.IsAvailable() {
+		return "", fmt.Errorf("vosk not found")
+	}
+
+	args := []string{audioPath}
+	if v.modelPath != "" {
+		args = append([]string{"-m", v.modelPath}, args...)
+	}
+
+	cmd := exec.Command(v.execPath, args...)
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("vosk failed: %v", err)
+	}
+
+	return strings.TrimSpace(string(output)), nil
+}
+
+// ============================================================================
+// PYTHON SCRIPT PROVIDER (for users with custom scripts)
+// ============================================================================
+
+type PythonScriptProvider struct {
+	scriptPath string
+}
+
+func NewPythonScriptProvider() *PythonScriptProvider {
+	return &PythonScriptProvider{}
+}
+
+func (p *PythonScriptProvider) Name() string {
+	return "python_script"
+}
+
+func (p *PythonScriptProvider) IsAvailable() bool {
+	if p.scriptPath == "" {
+		return false
+	}
+
+	if _, err := os.Stat(p.scriptPath); err != nil {
+		return false
+	}
+
+	// Check if python is available
+	for _, pythonCmd := range []string{"python3", "python", "py"} {
+		if _, err := exec.LookPath(pythonCmd); err == nil {
+			return true
+		}
+	}
+
+	return false
+}
+
+func (p *PythonScriptProvider) Configure(config map[string]string) error {
+	if path, ok := config["script_path"]; ok {
+		p.scriptPath = path
+	}
+	return nil
+}
+
+func (p *PythonScriptProvider) Transcribe(audioPath string) (string, error) {
+	if !p.IsAvailable() {
+		return "", fmt.Errorf("python script not configured or python not found")
+	}
+
+	// Try python commands in order of preference
+	var pythonCmd string
+	for _, cmd := range []string{"python3", "python", "py"} {
+		if _, err := exec.LookPath(cmd); err == nil {
+			pythonCmd = cmd
+			break
+		}
+	}
+
+	if pythonCmd == "" {
+		return "", fmt.Errorf("no python interpreter found")
+	}
+
+	cmd := exec.Command(pythonCmd, p.scriptPath, audioPath)
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("python script failed: %v\nOutput: %s", err, output)
+	}
+
+	return strings.TrimSpace(string(output)), nil
+}
+
+// ============================================================================
+// OPENAI WHISPER API PROVIDER (via Python)
+// ============================================================================
+
+type OpenAIWhisperProvider struct {
+	apiKey     string
+	pythonPath string
+}
+
+func NewOpenAIWhisperProvider() *OpenAIWhisperProvider {
+	return &OpenAIWhisperProvider{}
+}
+
+func (o *OpenAIWhisperProvider) Name() string {
+	return "openai_whisper"
+}
+
+func (o *OpenAIWhisperProvider) IsAvailable() bool {
+	// Check if API key is configured
+	if o.apiKey == "" {
+		if envKey := os.Getenv("OPENAI_API_KEY"); envKey == "" {
+			return false
+		}
+	}
+
+	// Check if python is available
+	for _, pythonCmd := range []string{"python3", "python", "py"} {
+		if _, err := exec.LookPath(pythonCmd); err == nil {
+			return true
+		}
+	}
+
+	return false
+}
+
+func (o *OpenAIWhisperProvider) Configure(config map[string]string) error {
+	if key, ok := config["api_key"]; ok {
+		o.apiKey = key
+	}
+	if path, ok := config["python_path"]; ok {
+		o.pythonPath = path
+	}
+	return nil
+}
+
+func (o *OpenAIWhisperProvider) Transcribe(audioPath string) (string, error) {
+	if !o.IsAvailable() {
+		return "", fmt.Errorf("OpenAI Whisper API not configured")
+	}
+
+	// Create a temporary Python script for OpenAI API call
+	script := `
+import openai
+import sys
+import os
+
+# Set API key
+api_key = os.getenv('OPENAI_API_KEY')
+if not api_key:
+	api_key = '` + o.apiKey + `'
+
+client = openai.OpenAI(api_key=api_key)
+
+# Transcribe audio file
+try:
+	with open(sys.argv[1], 'rb') as audio_file:
+		transcript = client.audio.transcriptions.create(
+			model="whisper-1",
+			file=audio_file
+		)
+	print(transcript.text)
+except Exception as e:
+	print(f"Error: {e}", file=sys.stderr)
+	sys.exit(1)
+`
+
+	// Write temp script
+	tempScript := filepath.Join(os.TempDir(), "voicelog_openai_transcribe.py")
+	if err := os.WriteFile(tempScript, []byte(script), 0600); err != nil {
+		return "", fmt.Errorf("failed to create temp script: %v", err)
+	}
+	defer os.Remove(tempScript)
+
+	// Find python command
+	var pythonCmd string
+	for _, cmd := range []string{"python3", "python", "py"} {
+		if _, err := exec.LookPath(cmd); err == nil {
+			pythonCmd = cmd
+			break
+		}
+	}
+
+	// Set API key in environment if configured
+	env := os.Environ()
+	if o.apiKey != "" {
+		env = append(env, "OPENAI_API_KEY="+o.apiKey)
+	}
+
+	cmd := exec.Command(pythonCmd, tempScript, audioPath)
+	cmd.Env = env
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("OpenAI Whisper API failed: %v\nOutput: %s", err, output)
+	}
+
+	return strings.TrimSpace(string(output)), nil
+}
+
+// ============================================================================
+// TRANSCRIPTION MANAGER
+// ============================================================================
+
+type TranscriptionManager struct {
+	providers map[string]TranscriptionProvider
+	config    TranscriptionConfig
+	configDir string
+}
+
+func NewTranscriptionManager(configDir string) *TranscriptionManager {
+	tm := &TranscriptionManager{
+		providers: make(map[string]TranscriptionProvider),
+		configDir: configDir,
+		config: TranscriptionConfig{
+			Enabled:         false,
+			DefaultProvider: "",
+			AutoTranscribe:  false,
+			ProviderConfigs: make(map[string]map[string]string),
+		},
+	}
+
+	// Register available providers
+	tm.RegisterProvider(NewWhisperCppProvider())
+	tm.RegisterProvider(NewVoskProvider())
+	tm.RegisterProvider(NewPythonScriptProvider())
+	tm.RegisterProvider(NewOpenAIWhisperProvider())
+
+	// Load config
+	tm.LoadConfig()
+
+	// Configure providers from saved config
+	for name, provider := range tm.providers {
+		if providerConfig, ok := tm.config.ProviderConfigs[name]; ok {
+			provider.Configure(providerConfig)
+		}
+	}
+
+	return tm
+}
+
+func (tm *TranscriptionManager) RegisterProvider(provider TranscriptionProvider) {
+	tm.providers[provider.Name()] = provider
+}
+
+func (tm *TranscriptionManager) GetAvailableProviders() []string {
+	var available []string
+	for name, provider := range tm.providers {
+		if provider.IsAvailable() {
+			available = append(available, name)
+		}
+	}
+	return available
+}
+
+func (tm *TranscriptionManager) GetAllProviders() []string {
+	var all []string
+	for name := range tm.providers {
+		all = append(all, name)
+	}
+	return all
+}
+
+func (tm *TranscriptionManager) IsProviderAvailable(name string) bool {
+	if provider, ok := tm.providers[name]; ok {
+		return provider.IsAvailable()
+	}
+	return false
+}
+
+func (tm *TranscriptionManager) Transcribe(audioPath string, providerName string) (*TranscriptionResult, error) {
+	if !tm.config.Enabled {
+		return nil, fmt.Errorf("transcription is disabled")
+	}
+
+	// Use default provider if none specified
+	if providerName == "" {
+		providerName = tm.config.DefaultProvider
+	}
+
+	if providerName == "" {
+		return nil, fmt.Errorf("no default provider configured")
+	}
+
+	provider, ok := tm.providers[providerName]
+	if !ok {
+		return nil, fmt.Errorf("provider not found: %s", providerName)
+	}
+
+	if !provider.IsAvailable() {
+		return nil, fmt.Errorf("provider not available: %s", providerName)
+	}
+
+	text, err := provider.Transcribe(audioPath)
+	if err != nil {
+		return nil, err
+	}
+
+	result := &TranscriptionResult{
+		Text:          text,
+		Provider:      providerName,
+		TranscribedAt: time.Now().Format(time.RFC3339),
+	}
+
+	return result, nil
+}
+
+func (tm *TranscriptionManager) LoadConfig() error {
+	configPath := filepath.Join(tm.configDir, "transcription.json")
+
+	data, err := os.ReadFile(configPath)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil // Use defaults
+		}
+		return err
+	}
+
+	return json.Unmarshal(data, &tm.config)
+}
+
+func (tm *TranscriptionManager) SaveConfig() error {
+	configPath := filepath.Join(tm.configDir, "transcription.json")
+
+	data, err := json.MarshalIndent(tm.config, "", "  ")
+	if err != nil {
+		return err
+	}
+
+	return os.WriteFile(configPath, data, 0644)
+}
+
+// ConfigureProvider updates a provider's configuration
+func (tm *TranscriptionManager) ConfigureProvider(providerName string, config map[string]string) error {
+	provider, ok := tm.providers[providerName]
+	if !ok {
+		return fmt.Errorf("provider not found: %s", providerName)
+	}
+
+	if err := provider.Configure(config); err != nil {
+		return err
+	}
+
+	// Save to config
+	if tm.config.ProviderConfigs == nil {
+		tm.config.ProviderConfigs = make(map[string]map[string]string)
+	}
+	tm.config.ProviderConfigs[providerName] = config
+	return tm.SaveConfig()
+}
+
+// SetEnabled enables or disables transcription
+func (tm *TranscriptionManager) SetEnabled(enabled bool) error {
+	tm.config.Enabled = enabled
+	return tm.SaveConfig()
+}
+
+// SetDefaultProvider sets the default transcription provider
+func (tm *TranscriptionManager) SetDefaultProvider(providerName string) error {
+	if _, ok := tm.providers[providerName]; !ok {
+		return fmt.Errorf("provider not found: %s", providerName)
+	}
+	tm.config.DefaultProvider = providerName
+	return tm.SaveConfig()
+}
+
+// SetAutoTranscribe enables or disables auto-transcription
+func (tm *TranscriptionManager) SetAutoTranscribe(auto bool) error {
+	tm.config.AutoTranscribe = auto
+	return tm.SaveConfig()
+}
+
+// GetConfig returns the current transcription configuration
+func (tm *TranscriptionManager) GetConfig() TranscriptionConfig {
+	return tm.config
+}
+
+// ShowSetupInstructions prints setup instructions for transcription providers
+func ShowTranscriptionSetupInstructions() {
+	fmt.Println("=== VoiceLog Transcription Setup ===")
+	fmt.Println()
+	fmt.Println("VoiceLog supports optional transcription through external tools.")
+	fmt.Println("No installation required - configure only if you want transcription.")
+	fmt.Println()
+	fmt.Println("Supported transcription engines:")
+	fmt.Println()
+	fmt.Println("1. whisper.cpp (Recommended - Local, Private)")
+	fmt.Println("   - High accuracy, supports many languages")
+	fmt.Println("   - Installation: https://github.com/ggerganov/whisper.cpp")
+	fmt.Println("   - Download model: https://huggingface.co/ggerganov/whisper.cpp")
+	fmt.Println("   - Quick start:")
+	fmt.Println("     git clone https://github.com/ggerganov/whisper.cpp")
+	fmt.Println("     cd whisper.cpp && make")
+	fmt.Println("     ./models/download-ggml-model.sh base.en")
+	fmt.Println()
+	fmt.Println("2. Vosk (Lightweight, Offline)")
+	fmt.Println("   - Fast, good for real-time transcription")
+	fmt.Println("   - Installation: https://alphacephei.com/vosk/")
+	fmt.Println("   - Download models from: https://alphacephei.com/vosk/models")
+	fmt.Println()
+	fmt.Println("3. OpenAI Whisper API (Cloud-based)")
+	fmt.Println("   - Highest accuracy, requires internet & API key")
+	fmt.Println("   - Set OPENAI_API_KEY environment variable")
+	fmt.Println("   - Install: pip install openai")
+	fmt.Println()
+	fmt.Println("4. Custom Python Script")
+	fmt.Println("   - Use your own script with any API (AssemblyAI, Rev.ai, etc.)")
+	fmt.Println("   - Script should accept audio file path and output text")
+	fmt.Println("   - Example template available in documentation")
+	fmt.Println()
+	fmt.Println("Configuration:")
+	fmt.Println("   Press Ctrl+S -> Navigate to 'Transcription Settings'")
+	fmt.Println("   Enable transcription and select your provider")
+	fmt.Println()
+	fmt.Println("Usage:")
+	fmt.Println("   Press Ctrl+T to transcribe the selected memo")
+	fmt.Println("   Enable auto-transcribe to automatically transcribe new recordings")
+	fmt.Println()
+}