From bb7f5efbe0976b0406f8f4d547dbe713635aa266 Mon Sep 17 00:00:00 2001
From: Tyler Stapler <tystapler@gmail.com>
Date: Fri, 24 Apr 2026 08:47:56 -0700
Subject: [PATCH 1/6] feat(android/voice): on-device STT and LLM via
 SpeechRecognizer + Gemini Nano
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix Android microphone runtime permission — was silently denied with no prompt;
  now uses ActivityResultContracts.RequestPermission wired through MainActivity
- Add AndroidSpeechRecognizerProvider: wraps android.speech.SpeechRecognizer as a
  DirectSpeechProvider (combined record+transcribe, no audio upload, works offline)
  with EXTRA_PREFER_OFFLINE=true and RMS amplitude for waveform animation
- Add MlKitLlmFormatterProvider: on-device LLM formatting via ML Kit Prompt API
  (Gemini Nano through AICore); handles AVAILABLE/DOWNLOADABLE/DOWNLOADING states;
  returns user-friendly error while model downloads rather than blocking
- Add DirectSpeechProvider interface to commonMain to support integrated pipelines
  that bypass the two-step record→STT path
- Both on-device options are configurable via Settings → Voice Capture toggles;
  toggles only appear on devices that report availability
- Pipeline priority: device LLM > Anthropic Claude > OpenAI > no-op
- Bump minSdk 24→26 (required by com.google.mlkit:genai-prompt:1.0.0-beta2)
- Add genai-prompt dependency to kmp androidMain

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 androidApp/build.gradle.kts                   |   3 +-
 .../dev/stapler/stelekit/MainActivity.kt      |  48 ++++++-
 kmp/build.gradle.kts                          |   3 +
 .../stelekit/voice/AndroidAudioRecorder.kt    |  10 +-
 .../voice/AndroidSpeechRecognizerProvider.kt  | 114 ++++++++++++++++
 .../voice/MlKitLlmFormatterProvider.kt        |  81 +++++++++++
 .../kotlin/dev/stapler/stelekit/ui/App.kt     |  14 +-
 .../ui/components/settings/SettingsDialog.kt  |   4 +
 .../settings/VoiceCaptureSettings.kt          | 126 +++++++++++++-----
 .../stelekit/voice/DirectSpeechProvider.kt    |  15 +++
 .../stelekit/voice/VoiceCaptureViewModel.kt   |  85 ++++++------
 .../stelekit/voice/VoicePipelineConfig.kt     |   7 +-
 .../stelekit/voice/VoicePipelineFactory.kt    |  16 ++-
 .../stapler/stelekit/voice/VoiceSettings.kt   |  14 ++
 14 files changed, 457 insertions(+), 83 deletions(-)
 create mode 100644 kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt
 create mode 100644 kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/MlKitLlmFormatterProvider.kt
 create mode 100644 kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/DirectSpeechProvider.kt

diff --git a/androidApp/build.gradle.kts b/androidApp/build.gradle.kts
index ead54dc..9a33d92 100644
--- a/androidApp/build.gradle.kts
+++ b/androidApp/build.gradle.kts
@@ -13,7 +13,7 @@ android {
 
     defaultConfig {
         applicationId = "dev.stapler.stelekit"
-        minSdk = 24
+        minSdk = 26
         targetSdk = 36
         versionCode = 1
         versionName = (findProperty("appVersion") as? String ?: "0.1.0")
@@ -66,4 +66,5 @@ dependencies {
     implementation(platform("androidx.compose:compose-bom:2024.09.02"))
     implementation("androidx.compose.ui:ui")
     implementation("androidx.compose.material3:material3")
+
 }
diff --git a/androidApp/src/main/kotlin/dev/stapler/stelekit/MainActivity.kt b/androidApp/src/main/kotlin/dev/stapler/stelekit/MainActivity.kt
index babf894..d1e7a60 100644
--- a/androidApp/src/main/kotlin/dev/stapler/stelekit/MainActivity.kt
+++ b/androidApp/src/main/kotlin/dev/stapler/stelekit/MainActivity.kt
@@ -1,6 +1,8 @@
 package dev.stapler.stelekit
 
+import android.Manifest
 import android.content.Intent
+import android.content.pm.PackageManager
 import android.net.Uri
 import android.os.Bundle
 import android.util.Log
@@ -8,6 +10,7 @@ import androidx.activity.ComponentActivity
 import androidx.activity.compose.setContent
 import androidx.activity.enableEdgeToEdge
 import androidx.activity.result.contract.ActivityResultContracts
+import androidx.core.content.ContextCompat
 import androidx.compose.runtime.getValue
 import androidx.compose.runtime.mutableStateOf
 import androidx.compose.runtime.remember
@@ -17,7 +20,11 @@ import dev.stapler.stelekit.domain.UrlFetcherAndroid
 import dev.stapler.stelekit.platform.SteleKitContext
 import dev.stapler.stelekit.platform.PlatformFileSystem
 import dev.stapler.stelekit.ui.StelekitApp
+import android.speech.SpeechRecognizer
+import androidx.compose.runtime.LaunchedEffect
 import dev.stapler.stelekit.voice.AndroidAudioRecorder
+import dev.stapler.stelekit.voice.AndroidSpeechRecognizerProvider
+import dev.stapler.stelekit.voice.MlKitLlmFormatterProvider
 import dev.stapler.stelekit.voice.VoiceSettings
 import dev.stapler.stelekit.voice.buildVoicePipeline
 import dev.stapler.stelekit.platform.PlatformSettings
@@ -26,6 +33,14 @@ import kotlinx.coroutines.CompletableDeferred
 class MainActivity : ComponentActivity() {
 
     private var pendingFolderPick: CompletableDeferred<String?>? = null
+    private var pendingMicPermission: CompletableDeferred<Boolean>? = null
+
+    private val micPermissionLauncher = registerForActivityResult(
+        ActivityResultContracts.RequestPermission()
+    ) { granted ->
+        pendingMicPermission?.complete(granted)
+        pendingMicPermission = null
+    }
 
     private val folderPickerLauncher = registerForActivityResult(
         ActivityResultContracts.OpenDocumentTree()
@@ -106,20 +121,47 @@ class MainActivity : ComponentActivity() {
                     }
                 }
             }
-            val audioRecorder = remember { AndroidAudioRecorder(this@MainActivity.applicationContext) }
+            val audioRecorder = remember { AndroidAudioRecorder(this@MainActivity.applicationContext, this@MainActivity::requestMicrophonePermission) }
             val voiceSettings = remember { VoiceSettings(PlatformSettings()) }
-            var voicePipeline by remember { mutableStateOf(buildVoicePipeline(audioRecorder, voiceSettings)) }
+            val deviceSttAvailable = remember { AndroidSpeechRecognizerProvider.isAvailable(this@MainActivity.applicationContext) }
+            val deviceSttProvider = remember {
+                if (deviceSttAvailable) AndroidSpeechRecognizerProvider(this@MainActivity.applicationContext) else null
+            }
+            val mlKitProvider = remember { MlKitLlmFormatterProvider.create() }
+            var deviceLlmAvailable by remember { mutableStateOf(false) }
+            LaunchedEffect(Unit) {
+                deviceLlmAvailable = mlKitProvider?.checkEligible() ?: false
+            }
+            fun buildPipeline() = buildVoicePipeline(
+                audioRecorder,
+                voiceSettings,
+                if (deviceSttAvailable && voiceSettings.getUseDeviceStt()) deviceSttProvider else null,
+                if (deviceLlmAvailable && voiceSettings.getUseDeviceLlm()) mlKitProvider else null,
+            )
+            var voicePipeline by remember { mutableStateOf(buildPipeline()) }
             StelekitApp(
                 fileSystem = fileSystem,
                 graphPath = fileSystem.getDefaultGraphPath(),
                 urlFetcher = UrlFetcherAndroid(),
                 voicePipeline = voicePipeline,
                 voiceSettings = voiceSettings,
-                onRebuildVoicePipeline = { voicePipeline = buildVoicePipeline(audioRecorder, voiceSettings) },
+                onRebuildVoicePipeline = { voicePipeline = buildPipeline() },
+                deviceSttAvailable = deviceSttAvailable,
+                deviceLlmAvailable = deviceLlmAvailable,
             )
         }
     }
 
+    private suspend fun requestMicrophonePermission(): Boolean {
+        if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO)
+            == PackageManager.PERMISSION_GRANTED
+        ) return true
+        val deferred = CompletableDeferred<Boolean>()
+        pendingMicPermission = deferred
+        runOnUiThread { micPermissionLauncher.launch(Manifest.permission.RECORD_AUDIO) }
+        return deferred.await()
+    }
+
     companion object {
         private const val TAG = "MainActivity"
         /** Authority for AOSP ExternalStorageProvider — the only provider supported in v1. */
diff --git a/kmp/build.gradle.kts b/kmp/build.gradle.kts
index e2f57f6..695f466 100644
--- a/kmp/build.gradle.kts
+++ b/kmp/build.gradle.kts
@@ -175,6 +175,9 @@ kotlin {
 
                 // Encrypted SharedPreferences for API key storage
                 implementation("androidx.security:security-crypto:1.1.0-alpha06")
+
+                // On-device LLM via Gemini Nano (Pixel 9+ and AICore-enabled OEM flagships)
+                implementation("com.google.mlkit:genai-prompt:1.0.0-beta2")
             }
         }
 
diff --git a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidAudioRecorder.kt b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidAudioRecorder.kt
index 3c17caf..d82ab22 100644
--- a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidAudioRecorder.kt
+++ b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidAudioRecorder.kt
@@ -25,7 +25,11 @@ import kotlin.math.sqrt
 
 private const val TAG = "AndroidAudioRecorder"
 
-class AndroidAudioRecorder(private val context: Context) : AudioRecorder {
+class AndroidAudioRecorder(
+    private val context: Context,
+    /** Called before recording starts; must return true if RECORD_AUDIO permission is granted. */
+    private val requestMicPermission: (suspend () -> Boolean)? = null,
+) : AudioRecorder {
 
     companion object {
         private const val SAMPLE_RATE = 16_000
@@ -51,6 +55,10 @@ class AndroidAudioRecorder(private val context: Context) : AudioRecorder {
     @Volatile private var pauseRequested = false
 
     override suspend fun startRecording(): PlatformAudioFile = withContext(Dispatchers.IO) {
+        if (requestMicPermission != null && !requestMicPermission()) {
+            return@withContext PlatformAudioFile("")
+        }
+
         stopRequested = false
         pauseRequested = false
 
diff --git a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt
new file mode 100644
index 0000000..159a91a
--- /dev/null
+++ b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt
@@ -0,0 +1,114 @@
+// Copyright (c) 2026 Tyler Stapler
+// SPDX-License-Identifier: Elastic-2.0
+package dev.stapler.stelekit.voice
+
+import android.content.Context
+import android.content.Intent
+import android.os.Bundle
+import android.os.Handler
+import android.os.Looper
+import android.speech.RecognitionListener
+import android.speech.RecognizerIntent
+import android.speech.SpeechRecognizer
+import android.util.Log
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.flow.Flow
+import kotlinx.coroutines.flow.MutableStateFlow
+import kotlinx.coroutines.flow.asStateFlow
+import kotlinx.coroutines.suspendCancellableCoroutine
+import kotlinx.coroutines.withContext
+import kotlin.coroutines.resume
+
+private const val TAG = "AndroidSpeechRecognizer"
+
+class AndroidSpeechRecognizerProvider(private val context: Context) : DirectSpeechProvider {
+
+    companion object {
+        fun isAvailable(context: Context): Boolean =
+            SpeechRecognizer.isRecognitionAvailable(context)
+    }
+
+    private val _amplitudeFlow = MutableStateFlow(0f)
+    override val amplitudeFlow: Flow<Float> = _amplitudeFlow.asStateFlow()
+
+    @Volatile private var activeRecognizer: SpeechRecognizer? = null
+    private val mainHandler = Handler(Looper.getMainLooper())
+
+    override suspend fun listen(): TranscriptResult = suspendCancellableCoroutine { cont ->
+        mainHandler.post {
+            val recognizer = SpeechRecognizer.createSpeechRecognizer(context)
+            activeRecognizer = recognizer
+
+            recognizer.setRecognitionListener(object : RecognitionListener {
+                override fun onReadyForSpeech(params: Bundle?) {}
+                override fun onBeginningOfSpeech() {}
+                override fun onBufferReceived(buffer: ByteArray?) {}
+                override fun onEndOfSpeech() {}
+                override fun onEvent(eventType: Int, params: Bundle?) {}
+                override fun onPartialResults(partialResults: Bundle?) {}
+
+                override fun onRmsChanged(rmsdB: Float) {
+                    // Map roughly -2..10 dB → 0..1
+                    _amplitudeFlow.value = ((rmsdB + 2f) / 12f).coerceIn(0f, 1f)
+                }
+
+                override fun onResults(results: Bundle?) {
+                    _amplitudeFlow.value = 0f
+                    activeRecognizer = null
+                    recognizer.destroy()
+                    if (!cont.isActive) return
+                    val text = results
+                        ?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
+                        ?.firstOrNull()
+                    Log.d(TAG, "onResults: text=${text?.take(80)}")
+                    if (text.isNullOrBlank()) cont.resume(TranscriptResult.Empty)
+                    else cont.resume(TranscriptResult.Success(text))
+                }
+
+                override fun onError(error: Int) {
+                    _amplitudeFlow.value = 0f
+                    activeRecognizer = null
+                    recognizer.destroy()
+                    if (!cont.isActive) return
+                    Log.w(TAG, "onError: code=$error")
+                    cont.resume(mapError(error))
+                }
+            })
+
+            val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
+                putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
+                putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, true)
+                putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
+                putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 3_000L)
+                putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 1_500L)
+            }
+            recognizer.startListening(intent)
+
+            cont.invokeOnCancellation {
+                mainHandler.post {
+                    activeRecognizer?.let {
+                        it.cancel()
+                        it.destroy()
+                        activeRecognizer = null
+                    }
+                    _amplitudeFlow.value = 0f
+                }
+            }
+        }
+    }
+
+    override suspend fun stopListening() {
+        withContext(Dispatchers.Main) {
+            activeRecognizer?.stopListening()
+        }
+    }
+
+    private fun mapError(error: Int): TranscriptResult = when (error) {
+        SpeechRecognizer.ERROR_NO_MATCH,
+        SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> TranscriptResult.Empty
+        SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> TranscriptResult.Failure.PermissionDenied
+        SpeechRecognizer.ERROR_NETWORK,
+        SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> TranscriptResult.Failure.NetworkError
+        else -> TranscriptResult.Failure.ApiError(error, "Speech recognition error (code $error)")
+    }
+}
diff --git a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/MlKitLlmFormatterProvider.kt b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/MlKitLlmFormatterProvider.kt
new file mode 100644
index 0000000..2181931
--- /dev/null
+++ b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/MlKitLlmFormatterProvider.kt
@@ -0,0 +1,81 @@
+// Copyright (c) 2026 Tyler Stapler
+// SPDX-License-Identifier: Elastic-2.0
+package dev.stapler.stelekit.voice
+
+import android.util.Log
+import com.google.mlkit.genai.common.FeatureStatus
+import com.google.mlkit.genai.prompt.Generation
+import com.google.mlkit.genai.prompt.GenerativeModel
+import kotlinx.coroutines.CancellationException
+
+private const val TAG = "MlKitLlmFormatter"
+
+/**
+ * On-device LLM formatter backed by ML Kit Prompt API (Gemini Nano via AICore).
+ *
+ * Supported devices: Pixel 9+ and major OEM flagships with AICore (Samsung S25, etc.).
+ * Output hard-capped at 256 tokens by the on-device model — suitable for short voice notes.
+ * API status: beta (com.google.mlkit:genai-prompt:1.0.0-beta2).
+ */
+class MlKitLlmFormatterProvider private constructor(
+    private val model: GenerativeModel,
+) : LlmFormatterProvider {
+
+    companion object {
+        /** Creates the provider; returns null if the ML Kit library fails to initialise. */
+        fun create(): MlKitLlmFormatterProvider? = runCatching {
+            MlKitLlmFormatterProvider(Generation.getClient())
+        }.getOrElse { e ->
+            Log.w(TAG, "Failed to create GenerativeModel", e)
+            null
+        }
+    }
+
+    /** Returns true when the device supports on-device inference (model available or will download). */
+    suspend fun checkEligible(): Boolean = runCatching {
+        when (model.checkStatus()) {
+            FeatureStatus.AVAILABLE,
+            FeatureStatus.DOWNLOADABLE,
+            FeatureStatus.DOWNLOADING -> true
+            else -> false
+        }
+    }.getOrElse { e ->
+        Log.w(TAG, "checkStatus failed", e)
+        false
+    }
+
+    override suspend fun format(transcript: String, systemPrompt: String): LlmResult {
+        return try {
+            when (model.checkStatus()) {
+                FeatureStatus.AVAILABLE -> {
+                    Log.d(TAG, "Running on-device inference (${transcript.length} chars input)")
+                    val response = model.generateContent(systemPrompt)
+                    val text = response.candidates.firstOrNull()?.text?.trim()
+                    if (text.isNullOrBlank()) {
+                        LlmResult.Failure.ApiError(-1, "Empty response from on-device model")
+                    } else {
+                        Log.d(TAG, "On-device inference complete (${text.length} chars output)")
+                        LlmResult.Success(text, LlmProviderSupport.detectTruncation(text))
+                    }
+                }
+                FeatureStatus.DOWNLOADABLE,
+                FeatureStatus.DOWNLOADING -> {
+                    // AICore downloads the model in the background automatically.
+                    // Blocking here would take several minutes — return a friendly retry message.
+                    LlmResult.Failure.ApiError(
+                        -1,
+                        "On-device model is downloading — try again in a few minutes"
+                    )
+                }
+                else -> {
+                    LlmResult.Failure.ApiError(-1, "On-device LLM not supported on this device")
+                }
+            }
+        } catch (e: CancellationException) {
+            throw e
+        } catch (e: Exception) {
+            Log.e(TAG, "On-device inference error", e)
+            LlmResult.Failure.ApiError(-1, "On-device LLM error: ${e.message}")
+        }
+    }
+}
diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt
index c3dfdf4..ce22dba 100644
--- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt
+++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt
@@ -110,6 +110,8 @@ fun StelekitApp(
     voicePipeline: VoicePipelineConfig = remember { VoicePipelineConfig() },
     voiceSettings: VoiceSettings? = null,
     onRebuildVoicePipeline: (() -> Unit)? = null,
+    deviceSttAvailable: Boolean = false,
+    deviceLlmAvailable: Boolean = false,
     spanRecorder: SpanRecorder = NoOpSpanRecorder,
 ) {
     val platformSettings = remember { PlatformSettings() }
@@ -229,6 +231,8 @@ fun StelekitApp(
             voicePipeline = voicePipeline,
             voiceSettings = voiceSettings,
             onRebuildVoicePipeline = onRebuildVoicePipeline,
+            deviceSttAvailable = deviceSttAvailable,
+            deviceLlmAvailable = deviceLlmAvailable,
             spanRecorder = spanRecorder,
         )
     }
@@ -254,6 +258,8 @@ private fun GraphContent(
     voicePipeline: VoicePipelineConfig = VoicePipelineConfig(),
     voiceSettings: VoiceSettings? = null,
     onRebuildVoicePipeline: (() -> Unit)? = null,
+    deviceSttAvailable: Boolean = false,
+    deviceLlmAvailable: Boolean = false,
     spanRecorder: SpanRecorder = NoOpSpanRecorder,
 ) {
     CompositionLocalProvider(LocalSpanRecorder provides spanRecorder) {
@@ -647,7 +653,7 @@ private fun GraphContent(
                                         onTap = { voiceCaptureViewModel.onMicTapped() },
                                         onDismissError = { voiceCaptureViewModel.dismissError() },
                                         onAutoReset = { voiceCaptureViewModel.resetToIdle() },
-                                        amplitudeFlow = voicePipeline.audioRecorder.amplitudeFlow,
+                                        amplitudeFlow = voicePipeline.effectiveAmplitudeFlow,
                                     )
                                 },
                             )
@@ -662,6 +668,8 @@ private fun GraphContent(
                         fileSystem = fileSystem,
                         voiceSettings = voiceSettings,
                         onRebuildVoicePipeline = onRebuildVoicePipeline,
+                        deviceSttAvailable = deviceSttAvailable,
+                        deviceLlmAvailable = deviceLlmAvailable,
                         frameMetric = frameMetricState,
                         debugState = debugMenuState,
                         onDebugStateChange = { newState ->
@@ -863,6 +871,8 @@ private fun GraphDialogLayer(
     fileSystem: FileSystem,
     voiceSettings: VoiceSettings? = null,
     onRebuildVoicePipeline: (() -> Unit)? = null,
+    deviceSttAvailable: Boolean = false,
+    deviceLlmAvailable: Boolean = false,
     frameMetric: kotlinx.coroutines.flow.StateFlow<dev.stapler.stelekit.performance.FrameMetric>,
     debugState: DebugMenuState = DebugMenuState(),
     onDebugStateChange: (DebugMenuState) -> Unit = {},
@@ -900,6 +910,8 @@ private fun GraphDialogLayer(
         onLeftHandedChange = { viewModel.setLeftHanded(it) },
         voiceSettings = voiceSettings,
         onRebuildVoicePipeline = onRebuildVoicePipeline,
+        deviceSttAvailable = deviceSttAvailable,
+        deviceLlmAvailable = deviceLlmAvailable,
     )
 
     appState.diskConflict?.let { conflict ->
diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/SettingsDialog.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/SettingsDialog.kt
index 0639e04..70785ae 100644
--- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/SettingsDialog.kt
+++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/SettingsDialog.kt
@@ -32,6 +32,8 @@ fun SettingsDialog(
     onLeftHandedChange: (Boolean) -> Unit = {},
     voiceSettings: VoiceSettings? = null,
     onRebuildVoicePipeline: (() -> Unit)? = null,
+    deviceSttAvailable: Boolean = false,
+    deviceLlmAvailable: Boolean = false,
 ) {
     if (visible) {
         Dialog(
@@ -118,6 +120,8 @@ fun SettingsDialog(
                                     VoiceCaptureSettings(
                                         voiceSettings = voiceSettings,
                                         onRebuildPipeline = onRebuildVoicePipeline,
+                                        deviceSttAvailable = deviceSttAvailable,
+                                        deviceLlmAvailable = deviceLlmAvailable,
                                     )
                                 }
                             }
diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt
index 7d26066..a98b682 100644
--- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt
+++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt
@@ -27,34 +27,60 @@ import dev.stapler.stelekit.voice.VoiceSettings
 fun VoiceCaptureSettings(
     voiceSettings: VoiceSettings,
     onRebuildPipeline: () -> Unit,
+    deviceSttAvailable: Boolean = false,
+    deviceLlmAvailable: Boolean = false,
 ) {
     var whisperKey by remember { mutableStateOf(voiceSettings.getWhisperApiKey() ?: "") }
     var anthropicKey by remember { mutableStateOf(voiceSettings.getAnthropicKey() ?: "") }
     var openAiKey by remember { mutableStateOf(voiceSettings.getOpenAiKey() ?: "") }
     var llmEnabled by remember { mutableStateOf(voiceSettings.getLlmEnabled()) }
+    var useDeviceStt by remember { mutableStateOf(voiceSettings.getUseDeviceStt()) }
+    var useDeviceLlm by remember { mutableStateOf(voiceSettings.getUseDeviceLlm()) }
     var saved by remember { mutableStateOf(false) }
 
     SettingsSection("Transcription (Speech-to-Text)") {
-        Text(
-            "Whisper API key — used for speech transcription (~\$0.003/min).",
-            style = MaterialTheme.typography.bodySmall,
-            color = MaterialTheme.colorScheme.onSurfaceVariant,
-            modifier = Modifier.padding(bottom = 8.dp),
-        )
-        OutlinedTextField(
-            value = whisperKey,
-            onValueChange = { whisperKey = it; saved = false },
-            label = { Text("OpenAI / Whisper API key") },
-            visualTransformation = PasswordVisualTransformation(),
-            singleLine = true,
-            modifier = Modifier.fillMaxWidth(),
-        )
+        if (deviceSttAvailable) {
+            Row(
+                modifier = Modifier.fillMaxWidth().padding(bottom = 8.dp),
+                horizontalArrangement = Arrangement.SpaceBetween,
+                verticalAlignment = Alignment.CenterVertically,
+            ) {
+                Text("Use on-device speech recognition", style = MaterialTheme.typography.bodyMedium)
+                Switch(
+                    checked = useDeviceStt,
+                    onCheckedChange = { useDeviceStt = it; saved = false },
+                )
+            }
+            if (useDeviceStt) {
+                Text(
+                    "Transcription happens on-device — no API key or network required.",
+                    style = MaterialTheme.typography.bodySmall,
+                    color = MaterialTheme.colorScheme.onSurfaceVariant,
+                    modifier = Modifier.padding(bottom = 8.dp),
+                )
+            }
+        }
+        if (!deviceSttAvailable || !useDeviceStt) {
+            Text(
+                "Whisper API key — used for speech transcription (~\$0.003/min).",
+                style = MaterialTheme.typography.bodySmall,
+                color = MaterialTheme.colorScheme.onSurfaceVariant,
+                modifier = Modifier.padding(bottom = 8.dp),
+            )
+            OutlinedTextField(
+                value = whisperKey,
+                onValueChange = { whisperKey = it; saved = false },
+                label = { Text("OpenAI / Whisper API key") },
+                visualTransformation = PasswordVisualTransformation(),
+                singleLine = true,
+                modifier = Modifier.fillMaxWidth(),
+            )
+        }
     }
 
     SettingsSection("LLM Formatting") {
         Text(
-            "Formats the raw transcript into Logseq outliner syntax with bullet points and [[wikilinks]]. " +
-                "Provide one key — Anthropic is used if both are set.",
+            "Formats the raw transcript into Logseq outliner syntax with bullet points and [[wikilinks]].",
             style = MaterialTheme.typography.bodySmall,
             color = MaterialTheme.colorScheme.onSurfaceVariant,
             modifier = Modifier.padding(bottom = 8.dp),
@@ -71,24 +97,54 @@ fun VoiceCaptureSettings(
             )
         }
         if (llmEnabled) {
-            OutlinedTextField(
-                value = anthropicKey,
-                onValueChange = { anthropicKey = it; saved = false },
-                label = { Text("Anthropic (Claude) API key") },
-                visualTransformation = PasswordVisualTransformation(),
-                singleLine = true,
-                modifier = Modifier.fillMaxWidth(),
-            )
-            OutlinedTextField(
-                value = openAiKey,
-                onValueChange = { openAiKey = it; saved = false },
-                label = { Text("OpenAI / compatible API key") },
-                visualTransformation = PasswordVisualTransformation(),
-                singleLine = true,
-                modifier = Modifier
-                    .fillMaxWidth()
-                    .padding(top = 8.dp),
-            )
+            if (deviceLlmAvailable) {
+                Row(
+                    modifier = Modifier.fillMaxWidth().padding(bottom = 8.dp),
+                    horizontalArrangement = Arrangement.SpaceBetween,
+                    verticalAlignment = Alignment.CenterVertically,
+                ) {
+                    Text("Use on-device LLM (Gemini Nano)", style = MaterialTheme.typography.bodyMedium)
+                    Switch(
+                        checked = useDeviceLlm,
+                        onCheckedChange = { useDeviceLlm = it; saved = false },
+                    )
+                }
+                if (useDeviceLlm) {
+                    Text(
+                        "Formatting runs on-device — no API key or network required. " +
+                            "256-token output limit; longer notes may be truncated.",
+                        style = MaterialTheme.typography.bodySmall,
+                        color = MaterialTheme.colorScheme.onSurfaceVariant,
+                        modifier = Modifier.padding(bottom = 8.dp),
+                    )
+                }
+            }
+            if (!deviceLlmAvailable || !useDeviceLlm) {
+                Text(
+                    "Provide one key — Anthropic is used if both are set.",
+                    style = MaterialTheme.typography.bodySmall,
+                    color = MaterialTheme.colorScheme.onSurfaceVariant,
+                    modifier = Modifier.padding(bottom = 8.dp),
+                )
+                OutlinedTextField(
+                    value = anthropicKey,
+                    onValueChange = { anthropicKey = it; saved = false },
+                    label = { Text("Anthropic (Claude) API key") },
+                    visualTransformation = PasswordVisualTransformation(),
+                    singleLine = true,
+                    modifier = Modifier.fillMaxWidth(),
+                )
+                OutlinedTextField(
+                    value = openAiKey,
+                    onValueChange = { openAiKey = it; saved = false },
+                    label = { Text("OpenAI / compatible API key") },
+                    visualTransformation = PasswordVisualTransformation(),
+                    singleLine = true,
+                    modifier = Modifier
+                        .fillMaxWidth()
+                        .padding(top = 8.dp),
+                )
+            }
         }
     }
 
@@ -104,6 +160,8 @@ fun VoiceCaptureSettings(
                     voiceSettings.setAnthropicKey(anthropicKey)
                     voiceSettings.setOpenAiKey(openAiKey)
                     voiceSettings.setLlmEnabled(llmEnabled)
+                    voiceSettings.setUseDeviceStt(useDeviceStt)
+                    voiceSettings.setUseDeviceLlm(useDeviceLlm)
                     saved = true
                     onRebuildPipeline()
                 },
diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/DirectSpeechProvider.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/DirectSpeechProvider.kt
new file mode 100644
index 0000000..8c19dad
--- /dev/null
+++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/DirectSpeechProvider.kt
@@ -0,0 +1,15 @@
+// Copyright (c) 2026 Tyler Stapler
+// SPDX-License-Identifier: Elastic-2.0
+package dev.stapler.stelekit.voice
+
+import kotlinx.coroutines.flow.Flow
+
+/** Combines recording and transcription in a single step (e.g. Android SpeechRecognizer). */
+interface DirectSpeechProvider {
+    /** Records and transcribes; suspends until the user stops or silence is detected. */
+    suspend fun listen(): TranscriptResult
+    /** Signals an in-progress listen to stop and return results. */
+    suspend fun stopListening() {}
+    /** Optional RMS amplitude stream for animated feedback. */
+    val amplitudeFlow: Flow<Float>? get() = null
+}
diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt
index 6f9e76d..e8e0ba7 100644
--- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt
+++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt
@@ -35,7 +35,8 @@ class VoiceCaptureViewModel(
         when (_state.value) {
             is VoiceCaptureState.Idle -> startPipeline()
             is VoiceCaptureState.Recording -> scope.launch {
-                pipeline.audioRecorder.stopRecording()
+                pipeline.directSpeechProvider?.stopListening()
+                    ?: pipeline.audioRecorder.stopRecording()
             }
             else -> Unit
         }
@@ -57,51 +58,55 @@ class VoiceCaptureViewModel(
 
     private fun startPipeline() {
         pipelineJob = scope.launch {
-            var file: PlatformAudioFile? = null
-            try {
-                _state.value = VoiceCaptureState.Recording
-                val result = pipeline.audioRecorder.startRecording()
-                file = result
-
-                if (result.isEmpty) {
+            _state.value = VoiceCaptureState.Recording
+            val transcriptResult = if (pipeline.directSpeechProvider != null) {
+                pipeline.directSpeechProvider.listen()
+            } else {
+                recordAndTranscribe()
+            }
+            when (transcriptResult) {
+                null -> return@launch  // error already set inside recordAndTranscribe
+                TranscriptResult.Empty -> {
+                    _state.value = VoiceCaptureState.Error(
+                        PipelineStage.TRANSCRIBING, "Nothing was captured — try again"
+                    )
+                }
+                is TranscriptResult.Failure.ApiError -> {
+                    _state.value = VoiceCaptureState.Error(
+                        PipelineStage.TRANSCRIBING, transcriptResult.message
+                    )
+                }
+                TranscriptResult.Failure.NetworkError -> {
+                    _state.value = VoiceCaptureState.Error(
+                        PipelineStage.TRANSCRIBING, "Network error — check your connection"
+                    )
+                }
+                TranscriptResult.Failure.PermissionDenied -> {
                     _state.value = VoiceCaptureState.Error(
                         PipelineStage.RECORDING, "Microphone permission denied"
                     )
-                    return@launch
                 }
+                is TranscriptResult.Success -> processTranscript(transcriptResult.text.trim())
+            }
+        }
+    }
 
-                _state.value = VoiceCaptureState.Transcribing
-                val audioData = pipeline.audioRecorder.readBytes(result)
-                when (val sttResult = pipeline.sttProvider.transcribe(audioData)) {
-                    TranscriptResult.Empty -> {
-                        _state.value = VoiceCaptureState.Error(
-                            PipelineStage.TRANSCRIBING, "Nothing was captured — try again"
-                        )
-                        return@launch
-                    }
-                    is TranscriptResult.Failure.ApiError -> {
-                        _state.value = VoiceCaptureState.Error(
-                            PipelineStage.TRANSCRIBING, sttResult.message
-                        )
-                        return@launch
-                    }
-                    TranscriptResult.Failure.NetworkError -> {
-                        _state.value = VoiceCaptureState.Error(
-                            PipelineStage.TRANSCRIBING, "Network error — check your connection"
-                        )
-                        return@launch
-                    }
-                    TranscriptResult.Failure.PermissionDenied -> {
-                        _state.value = VoiceCaptureState.Error(
-                            PipelineStage.RECORDING, "Microphone permission denied"
-                        )
-                        return@launch
-                    }
-                    is TranscriptResult.Success -> processTranscript(sttResult.text.trim())
-                }
-            } finally {
-                file?.takeIf { !it.isEmpty }?.let { pipeline.audioRecorder.deleteRecording(it) }
+    /** Records via [AudioRecorder] then transcribes; returns null and sets error state on failure. */
+    private suspend fun recordAndTranscribe(): TranscriptResult? {
+        var file: PlatformAudioFile? = null
+        return try {
+            val result = pipeline.audioRecorder.startRecording()
+            file = result
+            if (result.isEmpty) {
+                _state.value = VoiceCaptureState.Error(
+                    PipelineStage.RECORDING, "Microphone permission denied"
+                )
+                return null
             }
+            _state.value = VoiceCaptureState.Transcribing
+            pipeline.sttProvider.transcribe(pipeline.audioRecorder.readBytes(result))
+        } finally {
+            file?.takeIf { !it.isEmpty }?.let { pipeline.audioRecorder.deleteRecording(it) }
         }
     }
 
diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt
index 961d47e..a485db1 100644
--- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt
+++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt
@@ -20,4 +20,9 @@ class VoicePipelineConfig(
     val llmProvider: LlmFormatterProvider = NoOpLlmFormatterProvider(),
     val systemPrompt: String = DEFAULT_VOICE_SYSTEM_PROMPT,
     val minWordCount: Int = 10,
-)
+    /** When set, replaces the (record → STT) two-step path with a single integrated listen. */
+    val directSpeechProvider: DirectSpeechProvider? = null,
+) {
+    /** Amplitude flow for waveform animation: prefers directSpeechProvider, falls back to audioRecorder. */
+    val effectiveAmplitudeFlow get() = directSpeechProvider?.amplitudeFlow ?: audioRecorder.amplitudeFlow
+}
diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineFactory.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineFactory.kt
index 6d98f04..288d45f 100644
--- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineFactory.kt
+++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineFactory.kt
@@ -2,7 +2,12 @@
 // SPDX-License-Identifier: Elastic-2.0
 package dev.stapler.stelekit.voice
 
-fun buildVoicePipeline(audioRecorder: AudioRecorder, settings: VoiceSettings): VoicePipelineConfig {
+fun buildVoicePipeline(
+    audioRecorder: AudioRecorder,
+    settings: VoiceSettings,
+    directSpeechProvider: DirectSpeechProvider? = null,
+    deviceLlmProvider: LlmFormatterProvider? = null,
+): VoicePipelineConfig {
     val sttProvider: SpeechToTextProvider = settings.getWhisperApiKey()
         ?.let { WhisperSpeechToTextProvider.withDefaults(it) }
         ?: SpeechToTextProvider { _ ->
@@ -13,10 +18,17 @@ fun buildVoicePipeline(audioRecorder: AudioRecorder, settings: VoiceSettings): V
         }
     val llmProvider: LlmFormatterProvider = if (!settings.getLlmEnabled()) {
         NoOpLlmFormatterProvider()
+    } else if (deviceLlmProvider != null && settings.getUseDeviceLlm()) {
+        deviceLlmProvider
     } else {
         settings.getAnthropicKey()?.let { ClaudeLlmFormatterProvider.withDefaults(it) }
             ?: settings.getOpenAiKey()?.let { OpenAiLlmFormatterProvider.withDefaults(it) }
             ?: NoOpLlmFormatterProvider()
     }
-    return VoicePipelineConfig(audioRecorder = audioRecorder, sttProvider = sttProvider, llmProvider = llmProvider)
+    return VoicePipelineConfig(
+        audioRecorder = audioRecorder,
+        sttProvider = sttProvider,
+        llmProvider = llmProvider,
+        directSpeechProvider = directSpeechProvider,
+    )
 }
diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt
index ada7a42..e127bbd 100644
--- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt
+++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt
@@ -30,10 +30,24 @@ class VoiceSettings(private val platformSettings: PlatformSettings) {
     fun setLlmEnabled(enabled: Boolean) =
         platformSettings.putBoolean(KEY_LLM_ENABLED, enabled)
 
+    fun getUseDeviceStt(): Boolean =
+        platformSettings.getBoolean(KEY_USE_DEVICE_STT, false)
+
+    fun setUseDeviceStt(enabled: Boolean) =
+        platformSettings.putBoolean(KEY_USE_DEVICE_STT, enabled)
+
+    fun getUseDeviceLlm(): Boolean =
+        platformSettings.getBoolean(KEY_USE_DEVICE_LLM, false)
+
+    fun setUseDeviceLlm(enabled: Boolean) =
+        platformSettings.putBoolean(KEY_USE_DEVICE_LLM, enabled)
+
     companion object {
         private const val KEY_WHISPER = "voice.whisper_key"
         private const val KEY_ANTHROPIC = "voice.anthropic_key"
         private const val KEY_OPENAI = "voice.openai_key"
         private const val KEY_LLM_ENABLED = "voice.llm_enabled"
+        private const val KEY_USE_DEVICE_STT = "voice.use_device_stt"
+        private const val KEY_USE_DEVICE_LLM = "voice.use_device_llm"
     }
 }

From b72c42045f7fa86e667c24a9a5393738007e0a9f Mon Sep 17 00:00:00 2001
From: Tyler Stapler <tystapler@gmail.com>
Date: Fri, 24 Apr 2026 11:16:18 -0700
Subject: [PATCH 2/6] fix(android): bump kmp minSdk to 26 for genai-prompt
 library

genai-prompt:1.0.0-beta2 declares minSdkVersion 26 in its manifest.
The kmp library module had its own android { defaultConfig { minSdk } }
block at 24, causing processDebugUnitTestManifest to fail.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 kmp/build.gradle.kts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kmp/build.gradle.kts b/kmp/build.gradle.kts
index 695f466..c6c7ab2 100644
--- a/kmp/build.gradle.kts
+++ b/kmp/build.gradle.kts
@@ -629,7 +629,7 @@ android {
     namespace = "dev.stapler.stelekit"
 
     defaultConfig {
-        minSdk = 24
+        minSdk = 26
     }
 
     compileOptions {

From dc66ebeaed21d53721a2a9231388ab754f6e3bbe Mon Sep 17 00:00:00 2001
From: Tyler Stapler <tystapler@gmail.com>
Date: Fri, 24 Apr 2026 11:25:48 -0700
Subject: [PATCH 3/6] fix(ios): replace Dispatchers.IO with
 PlatformDispatcher.DB in commonMain

PerformanceDashboard.kt used Dispatchers.IO directly in commonMain,
which is a JVM-only symbol. compileCommonMainKotlinMetadata fails when
it encounters it because Dispatchers.IO is absent from the multiplatform
metadata API surface. queryPercentiles() is a blocking SQLite call so
PlatformDispatcher.DB is the correct dispatcher per the project's own
dispatcher matrix.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/ci-ios.yml                  | 21 +++++++++----------
 .../ui/components/PerformanceDashboard.kt     |  4 ++--
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/ci-ios.yml b/.github/workflows/ci-ios.yml
index c91e685..2f06905 100644
--- a/.github/workflows/ci-ios.yml
+++ b/.github/workflows/ci-ios.yml
@@ -27,17 +27,16 @@ jobs:
   ios-framework:
     name: iOS Framework Link Check
     runs-on: macos-latest
-    # Two pre-existing blockers prevent iOS compilation from passing:
-    # 1. Gradle issue #17559 — classloader mismatch: in a multi-project build where :kmp uses
-    #    kotlin-multiplatform and :androidApp uses AGP, KotlinNativeBundleBuildService is loaded
-    #    by different classloaders. The KGP sets the service on tasks via
-    #    `Property<KotlinNativeBundleBuildService>.value(provider)` which Gradle 8.8+ rejects
-    #    when the property and provider types are the same class from different loaders. Fix
-    #    requires JetBrains to annotate with @ServiceReference or use Property<Any> upstream.
-    #    No Kotlin version (2.1.x, 2.2.x, 2.3.x) contains this fix. Affects all iOS tasks.
-    # 2. commonMain contains JVM-specific symbols (java.*, System, Dispatchers.IO, OpenTelemetry)
-    #    that fail metadata compilation against the full multiplatform API surface.
-    # Neither issue is introduced by this PR. Mark the job non-blocking until they are fixed.
+    # One pre-existing blocker prevents full iOS compilation from passing:
+    # Gradle issue #17559 — classloader mismatch: in a multi-project build where :kmp uses
+    # kotlin-multiplatform and :androidApp uses AGP, KotlinNativeBundleBuildService is loaded
+    # by different classloaders. The KGP sets the service on tasks via
+    # `Property<KotlinNativeBundleBuildService>.value(provider)` which Gradle 8.8+ rejects
+    # when the property and provider types are the same class from different loaders. Fix
+    # requires JetBrains to annotate with @ServiceReference or use Property<Any> upstream.
+    # No Kotlin version (2.1.x, 2.2.x, 2.3.x) contains this fix. Affects compileKotlinIos*
+    # tasks but NOT compileCommonMainKotlinMetadata (which is what we run here).
+    # Keep non-blocking until we can verify compileCommonMainKotlinMetadata passes consistently.
     continue-on-error: true
     if: github.event.pull_request.draft == false
 
diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/PerformanceDashboard.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/PerformanceDashboard.kt
index 9a6987a..0cfa3c6 100644
--- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/PerformanceDashboard.kt
+++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/PerformanceDashboard.kt
@@ -36,7 +36,7 @@ import dev.stapler.stelekit.performance.RingBufferSpanExporter
 import dev.stapler.stelekit.performance.SerializedSpan
 import dev.stapler.stelekit.performance.SpanRepository
 import dev.stapler.stelekit.performance.TraceEvent
-import kotlinx.coroutines.Dispatchers
+import dev.stapler.stelekit.coroutines.PlatformDispatcher
 import kotlinx.coroutines.delay
 import kotlinx.coroutines.launch
 import kotlinx.coroutines.withContext
@@ -82,7 +82,7 @@ private fun HistogramsTab(histogramWriter: HistogramWriter?) {
     val summaries by produceState<Map<String, PercentileSummary>>(emptyMap(), histogramWriter) {
         while (true) {
             if (histogramWriter != null) {
-                val result = withContext(Dispatchers.IO) {
+                val result = withContext(PlatformDispatcher.DB) {
                     operations
                         .mapNotNull { op -> histogramWriter.queryPercentiles(op)?.let { op to it } }
                         .toMap()

From fd74bb3a42118a1eab106b78e437d2e530932df3 Mon Sep 17 00:00:00 2001
From: Tyler Stapler <tystapler@gmail.com>
Date: Fri, 24 Apr 2026 12:51:50 -0700
Subject: [PATCH 4/6] fix(ci): exclude Kotlin IC state from Gradle home cache
 in Android job

Kotlin incremental compilation stores per-project metadata in
~/.gradle/caches/kotlin-build-*/. When a previous CI run compiled
PlatformSettings.kt with `: Settings` (commit dc1b51be9) the IC state
was saved in the Gradle home cache restored by setup-gradle. Subsequent
runs restore that stale metadata even though the source no longer has
the supertype, causing a spurious expect/actual mismatch:

  expect: PlatformSettings : Settings
  actual: PlatformSettings : Any

Excluding caches/kotlin-build-* forces a clean IC state per run,
eliminating cross-branch metadata pollution.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/ci.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3ba19e8..a4b5300 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -75,6 +75,9 @@ jobs:
         with:
           gradle-home-cache-cleanup: true
           cache-encryption-key: ${{ secrets.GRADLE_ENCRYPTION_KEY }}
+          # Exclude Kotlin IC state — stale metadata across branches causes spurious
+          # expect/actual mismatch errors (e.g. PlatformSettings supertype mismatch).
+          gradle-home-cache-excludes: caches/kotlin-build-*
 
       - name: Run Android unit tests
         run: ./gradlew :kmp:testDebugUnitTest --no-daemon --build-cache

From edbf46ce81800367486b452b49aae1c70dba301e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 25 Apr 2026 00:12:33 +0000
Subject: [PATCH 5/6] fix: address all PR review comments

- MainActivity: rebuild voice pipeline automatically when deviceLlmAvailable
  flips via LaunchedEffect(deviceLlmAvailable)
- AndroidAudioRecorder: move flag resets before permission check and add
  stopRequested guard after permission grant to prevent spurious recordings
- AndroidSpeechRecognizerProvider: move invokeOnCancellation before
  mainHandler.post and wrap recognizer creation/start in try/catch so
  thrown exceptions don't leave the coroutine stuck forever
- VoiceCaptureViewModelTest: add 4 tests covering DirectSpeechProvider path
  (success, PermissionDenied, Empty, cancel)
- kmp/build.gradle.kts: minSdk=26 already aligned (no change needed)"

Agent-Logs-Url: https://github.com/tstapler/stelekit/sessions/c6a0afad-d76c-48da-90b0-3565a04aeb3f

Co-authored-by: tstapler <3860386+tstapler@users.noreply.github.com>
---
 .../dev/stapler/stelekit/MainActivity.kt      |   3 +
 .../stelekit/voice/AndroidAudioRecorder.kt    |   8 +-
 .../voice/AndroidSpeechRecognizerProvider.kt  | 111 ++++++++++--------
 .../voice/VoiceCaptureViewModelTest.kt        |  76 ++++++++++++
 4 files changed, 146 insertions(+), 52 deletions(-)

diff --git a/androidApp/src/main/kotlin/dev/stapler/stelekit/MainActivity.kt b/androidApp/src/main/kotlin/dev/stapler/stelekit/MainActivity.kt
index d1e7a60..52332d2 100644
--- a/androidApp/src/main/kotlin/dev/stapler/stelekit/MainActivity.kt
+++ b/androidApp/src/main/kotlin/dev/stapler/stelekit/MainActivity.kt
@@ -139,6 +139,9 @@ class MainActivity : ComponentActivity() {
                 if (deviceLlmAvailable && voiceSettings.getUseDeviceLlm()) mlKitProvider else null,
             )
             var voicePipeline by remember { mutableStateOf(buildPipeline()) }
+            LaunchedEffect(deviceLlmAvailable) {
+                voicePipeline = buildPipeline()
+            }
             StelekitApp(
                 fileSystem = fileSystem,
                 graphPath = fileSystem.getDefaultGraphPath(),
diff --git a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidAudioRecorder.kt b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidAudioRecorder.kt
index d82ab22..a2bd1e9 100644
--- a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidAudioRecorder.kt
+++ b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidAudioRecorder.kt
@@ -55,12 +55,16 @@ class AndroidAudioRecorder(
     @Volatile private var pauseRequested = false
 
     override suspend fun startRecording(): PlatformAudioFile = withContext(Dispatchers.IO) {
+        stopRequested = false
+        pauseRequested = false
+
         if (requestMicPermission != null && !requestMicPermission()) {
             return@withContext PlatformAudioFile("")
         }
 
-        stopRequested = false
-        pauseRequested = false
+        if (stopRequested) {
+            return@withContext PlatformAudioFile("")
+        }
 
         val outputFile = File(context.cacheDir, "voice_${System.currentTimeMillis()}.m4a")
         val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
diff --git a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt
index 159a91a..62e8bf8 100644
--- a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt
+++ b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt
@@ -35,63 +35,74 @@ class AndroidSpeechRecognizerProvider(private val context: Context) : DirectSpee
     private val mainHandler = Handler(Looper.getMainLooper())
 
     override suspend fun listen(): TranscriptResult = suspendCancellableCoroutine { cont ->
-        mainHandler.post {
-            val recognizer = SpeechRecognizer.createSpeechRecognizer(context)
-            activeRecognizer = recognizer
-
-            recognizer.setRecognitionListener(object : RecognitionListener {
-                override fun onReadyForSpeech(params: Bundle?) {}
-                override fun onBeginningOfSpeech() {}
-                override fun onBufferReceived(buffer: ByteArray?) {}
-                override fun onEndOfSpeech() {}
-                override fun onEvent(eventType: Int, params: Bundle?) {}
-                override fun onPartialResults(partialResults: Bundle?) {}
-
-                override fun onRmsChanged(rmsdB: Float) {
-                    // Map roughly -2..10 dB → 0..1
-                    _amplitudeFlow.value = ((rmsdB + 2f) / 12f).coerceIn(0f, 1f)
-                }
-
-                override fun onResults(results: Bundle?) {
-                    _amplitudeFlow.value = 0f
+        cont.invokeOnCancellation {
+            mainHandler.post {
+                activeRecognizer?.let {
+                    it.cancel()
+                    it.destroy()
                     activeRecognizer = null
-                    recognizer.destroy()
-                    if (!cont.isActive) return
-                    val text = results
-                        ?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
-                        ?.firstOrNull()
-                    Log.d(TAG, "onResults: text=${text?.take(80)}")
-                    if (text.isNullOrBlank()) cont.resume(TranscriptResult.Empty)
-                    else cont.resume(TranscriptResult.Success(text))
                 }
+                _amplitudeFlow.value = 0f
+            }
+        }
 
-                override fun onError(error: Int) {
-                    _amplitudeFlow.value = 0f
-                    activeRecognizer = null
-                    recognizer.destroy()
-                    if (!cont.isActive) return
-                    Log.w(TAG, "onError: code=$error")
-                    cont.resume(mapError(error))
-                }
-            })
+        mainHandler.post {
+            var recognizer: SpeechRecognizer? = null
+            try {
+                recognizer = SpeechRecognizer.createSpeechRecognizer(context)
+                activeRecognizer = recognizer
 
-            val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
-                putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
-                putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, true)
-                putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
-                putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 3_000L)
-                putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 1_500L)
-            }
-            recognizer.startListening(intent)
+                recognizer.setRecognitionListener(object : RecognitionListener {
+                    override fun onReadyForSpeech(params: Bundle?) {}
+                    override fun onBeginningOfSpeech() {}
+                    override fun onBufferReceived(buffer: ByteArray?) {}
+                    override fun onEndOfSpeech() {}
+                    override fun onEvent(eventType: Int, params: Bundle?) {}
+                    override fun onPartialResults(partialResults: Bundle?) {}
 
-            cont.invokeOnCancellation {
-                mainHandler.post {
-                    activeRecognizer?.let {
-                        it.cancel()
-                        it.destroy()
+                    override fun onRmsChanged(rmsdB: Float) {
+                        // Map roughly -2..10 dB → 0..1
+                        _amplitudeFlow.value = ((rmsdB + 2f) / 12f).coerceIn(0f, 1f)
+                    }
+
+                    override fun onResults(results: Bundle?) {
+                        _amplitudeFlow.value = 0f
                         activeRecognizer = null
+                        recognizer.destroy()
+                        if (!cont.isActive) return
+                        val text = results
+                            ?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
+                            ?.firstOrNull()
+                        Log.d(TAG, "onResults: text=${text?.take(80)}")
+                        if (text.isNullOrBlank()) cont.resume(TranscriptResult.Empty)
+                        else cont.resume(TranscriptResult.Success(text))
                     }
-                    _amplitudeFlow.value = 0f
+
+                    override fun onError(error: Int) {
+                        _amplitudeFlow.value = 0f
+                        activeRecognizer = null
+                        recognizer.destroy()
+                        if (!cont.isActive) return
+                        Log.w(TAG, "onError: code=$error")
+                        cont.resume(mapError(error))
+                    }
+                })
+
+                val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
+                    putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
+                    putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, true)
+                    putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
+                    putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 3_000L)
+                    putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 1_500L)
+                }
+                recognizer.startListening(intent)
+            } catch (t: Throwable) {
+                _amplitudeFlow.value = 0f
+                activeRecognizer = null
+                recognizer?.destroy()
+                Log.w(TAG, "Failed to start speech recognition", t)
+                if (cont.isActive) {
+                    cont.resume(mapError(SpeechRecognizer.ERROR_CLIENT))
                 }
             }
         }
diff --git a/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt b/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt
index 0803022..60723b4 100644
--- a/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt
+++ b/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt
@@ -413,6 +413,82 @@ class VoiceCaptureViewModelTest {
         assertIs<VoiceCaptureState.Done>(vm.state.first())
     }
 
+    // --- DirectSpeechProvider path ---
+
+    @Test
+    fun `directSpeechProvider success path reaches Done state`() = runTest {
+        val transcript = "this is a test transcript with more than ten words total here"
+        val fakeDirectProvider = object : DirectSpeechProvider {
+            override suspend fun listen(): TranscriptResult = TranscriptResult.Success(transcript)
+        }
+        val vm = VoiceCaptureViewModel(
+            VoicePipelineConfig(directSpeechProvider = fakeDirectProvider),
+            makeJournalService(), this,
+        )
+
+        vm.onMicTapped()
+        advanceUntilIdle()
+
+        assertIs<VoiceCaptureState.Done>(vm.state.first())
+    }
+
+    @Test
+    fun `directSpeechProvider PermissionDenied emits Error at RECORDING`() = runTest {
+        val fakeDirectProvider = object : DirectSpeechProvider {
+            override suspend fun listen(): TranscriptResult = TranscriptResult.Failure.PermissionDenied
+        }
+        val vm = VoiceCaptureViewModel(
+            VoicePipelineConfig(directSpeechProvider = fakeDirectProvider),
+            makeJournalService(), this,
+        )
+
+        vm.onMicTapped()
+        advanceUntilIdle()
+
+        val state = vm.state.first()
+        assertIs<VoiceCaptureState.Error>(state)
+        assertEquals(PipelineStage.RECORDING, state.stage)
+    }
+
+    @Test
+    fun `directSpeechProvider Empty result emits Error at TRANSCRIBING`() = runTest {
+        val fakeDirectProvider = object : DirectSpeechProvider {
+            override suspend fun listen(): TranscriptResult = TranscriptResult.Empty
+        }
+        val vm = VoiceCaptureViewModel(
+            VoicePipelineConfig(directSpeechProvider = fakeDirectProvider),
+            makeJournalService(), this,
+        )
+
+        vm.onMicTapped()
+        advanceUntilIdle()
+
+        val state = vm.state.first()
+        assertIs<VoiceCaptureState.Error>(state)
+        assertEquals(PipelineStage.TRANSCRIBING, state.stage)
+    }
+
+    @Test
+    fun `directSpeechProvider cancel during Recording resets to Idle`() = runTest {
+        val fakeDirectProvider = object : DirectSpeechProvider {
+            override suspend fun listen(): TranscriptResult {
+                delay(10_000)
+                return TranscriptResult.Empty
+            }
+        }
+        val vm = VoiceCaptureViewModel(
+            VoicePipelineConfig(directSpeechProvider = fakeDirectProvider),
+            makeJournalService(), this,
+        )
+
+        vm.onMicTapped()
+        delay(1)
+        assertIs<VoiceCaptureState.Recording>(vm.state.first())
+
+        vm.cancel()
+        assertIs<VoiceCaptureState.Idle>(vm.state.first())
+    }
+
     @Test
     fun `transcript over 10000 chars is truncated before LLM`() = runTest {
         val longTranscript = "word ".repeat(2_500) // 12,500 chars

From 9c0825ec56c142afcbd04b11ed5a45fb94517d40 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 25 Apr 2026 00:14:06 +0000
Subject: [PATCH 6/6] fix: guard against early cancellation before
 SpeechRecognizer is created

Add an isActive check inside mainHandler.post after setting activeRecognizer
so that if cancellation fires before the post runs (cleanup was a no-op),
the recognizer is destroyed immediately rather than starting a zombie session."

Agent-Logs-Url: https://github.com/tstapler/stelekit/sessions/c6a0afad-d76c-48da-90b0-3565a04aeb3f

Co-authored-by: tstapler <3860386+tstapler@users.noreply.github.com>
---
 .../stelekit/voice/AndroidSpeechRecognizerProvider.kt      | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt
index 62e8bf8..84e556c 100644
--- a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt
+++ b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt
@@ -52,6 +52,13 @@ class AndroidSpeechRecognizerProvider(private val context: Context) : DirectSpee
                 recognizer = SpeechRecognizer.createSpeechRecognizer(context)
                 activeRecognizer = recognizer
 
+                // Guard against cancellation that fired before this post ran
+                if (!cont.isActive) {
+                    recognizer.destroy()
+                    activeRecognizer = null
+                    return@post
+                }
+
                 recognizer.setRecognitionListener(object : RecognitionListener {
                     override fun onReadyForSpeech(params: Bundle?) {}
                     override fun onBeginningOfSpeech() {}