Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions .github/workflows/ci-ios.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,16 @@ jobs:
ios-framework:
name: iOS Framework Link Check
runs-on: macos-latest
# Two pre-existing blockers prevent iOS compilation from passing:
# 1. Gradle issue #17559 — classloader mismatch: in a multi-project build where :kmp uses
# kotlin-multiplatform and :androidApp uses AGP, KotlinNativeBundleBuildService is loaded
# by different classloaders. The KGP sets the service on tasks via
# `Property<KotlinNativeBundleBuildService>.value(provider)` which Gradle 8.8+ rejects
# when the property and provider types are the same class from different loaders. Fix
# requires JetBrains to annotate with @ServiceReference or use Property<Any> upstream.
# No Kotlin version (2.1.x, 2.2.x, 2.3.x) contains this fix. Affects all iOS tasks.
# 2. commonMain contains JVM-specific symbols (java.*, System, Dispatchers.IO, OpenTelemetry)
# that fail metadata compilation against the full multiplatform API surface.
# Neither issue is introduced by this PR. Mark the job non-blocking until they are fixed.
# One pre-existing blocker prevents full iOS compilation from passing:
# Gradle issue #17559 — classloader mismatch: in a multi-project build where :kmp uses
# kotlin-multiplatform and :androidApp uses AGP, KotlinNativeBundleBuildService is loaded
# by different classloaders. The KGP sets the service on tasks via
# `Property<KotlinNativeBundleBuildService>.value(provider)` which Gradle 8.8+ rejects
# when the property and provider types are the same class from different loaders. Fix
# requires JetBrains to annotate with @ServiceReference or use Property<Any> upstream.
# No Kotlin version (2.1.x, 2.2.x, 2.3.x) contains this fix. Affects compileKotlinIos*
# tasks but NOT compileCommonMainKotlinMetadata (which is what we run here).
# Keep non-blocking until we can verify compileCommonMainKotlinMetadata passes consistently.
continue-on-error: true
if: github.event.pull_request.draft == false

Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ jobs:
with:
gradle-home-cache-cleanup: true
cache-encryption-key: ${{ secrets.GRADLE_ENCRYPTION_KEY }}
# Exclude Kotlin IC state — stale metadata across branches causes spurious
# expect/actual mismatch errors (e.g. PlatformSettings supertype mismatch).
gradle-home-cache-excludes: caches/kotlin-build-*

- name: Run Android unit tests
run: ./gradlew :kmp:testDebugUnitTest --no-daemon --build-cache
Expand Down
3 changes: 2 additions & 1 deletion androidApp/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ android {

defaultConfig {
applicationId = "dev.stapler.stelekit"
minSdk = 24
minSdk = 26
targetSdk = 36
versionCode = 1
versionName = (findProperty("appVersion") as? String ?: "0.1.0")
Expand Down Expand Up @@ -66,4 +66,5 @@ dependencies {
implementation(platform("androidx.compose:compose-bom:2024.09.02"))
implementation("androidx.compose.ui:ui")
implementation("androidx.compose.material3:material3")

}
63 changes: 60 additions & 3 deletions androidApp/src/main/kotlin/dev/stapler/stelekit/MainActivity.kt
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
package dev.stapler.stelekit

import android.Manifest
import android.content.Intent
import android.content.pm.PackageManager
import android.net.Uri
import android.os.Bundle
import android.util.Log
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.activity.enableEdgeToEdge
import androidx.activity.result.contract.ActivityResultContracts
import androidx.core.content.ContextCompat
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
Expand All @@ -16,14 +19,26 @@ import dev.stapler.stelekit.domain.UrlFetcherAndroid
import dev.stapler.stelekit.platform.PlatformFileSystem
import dev.stapler.stelekit.platform.PlatformSettings
import dev.stapler.stelekit.ui.StelekitApp
import android.speech.SpeechRecognizer
import androidx.compose.runtime.LaunchedEffect
import dev.stapler.stelekit.voice.AndroidAudioRecorder
import dev.stapler.stelekit.voice.AndroidSpeechRecognizerProvider
import dev.stapler.stelekit.voice.MlKitLlmFormatterProvider
import dev.stapler.stelekit.voice.VoiceSettings
import dev.stapler.stelekit.voice.buildVoicePipeline
import kotlinx.coroutines.CompletableDeferred

class MainActivity : ComponentActivity() {

private var pendingFolderPick: CompletableDeferred<String?>? = null
private var pendingMicPermission: CompletableDeferred<Boolean>? = null

private val micPermissionLauncher = registerForActivityResult(
ActivityResultContracts.RequestPermission()
) { granted ->
pendingMicPermission?.complete(granted)
pendingMicPermission = null
}

private val folderPickerLauncher = registerForActivityResult(
ActivityResultContracts.OpenDocumentTree()
Expand Down Expand Up @@ -102,21 +117,63 @@ class MainActivity : ComponentActivity() {
}

setContent {
val audioRecorder = remember { AndroidAudioRecorder(this@MainActivity.applicationContext) }
val fileSystem = remember {
PlatformFileSystem().apply {
init(this@MainActivity) {
val deferred = CompletableDeferred<String?>()
pendingFolderPick = deferred
// Pre-fill the picker with the last known folder so "Reconnect" UX is smooth
val hintUri = getStoredTreeUri()
runOnUiThread { folderPickerLauncher.launch(hintUri) }
deferred.await()
}
}
}
val audioRecorder = remember { AndroidAudioRecorder(this@MainActivity.applicationContext, this@MainActivity::requestMicrophonePermission) }
val voiceSettings = remember { VoiceSettings(PlatformSettings()) }
var voicePipeline by remember { mutableStateOf(buildVoicePipeline(audioRecorder, voiceSettings)) }
val deviceSttAvailable = remember { AndroidSpeechRecognizerProvider.isAvailable(this@MainActivity.applicationContext) }
val deviceSttProvider = remember {
if (deviceSttAvailable) AndroidSpeechRecognizerProvider(this@MainActivity.applicationContext) else null
}
val mlKitProvider = remember { MlKitLlmFormatterProvider.create() }
var deviceLlmAvailable by remember { mutableStateOf(false) }
LaunchedEffect(Unit) {
deviceLlmAvailable = mlKitProvider?.checkEligible() ?: false
}
fun buildPipeline() = buildVoicePipeline(
audioRecorder,
voiceSettings,
if (deviceSttAvailable && voiceSettings.getUseDeviceStt()) deviceSttProvider else null,
if (deviceLlmAvailable && voiceSettings.getUseDeviceLlm()) mlKitProvider else null,
)
var voicePipeline by remember { mutableStateOf(buildPipeline()) }
Copy link

Copilot AI Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deviceLlmAvailable is resolved asynchronously in LaunchedEffect, but voicePipeline is only built once and then rebuilt only via onRebuildVoicePipeline. If a user already has “Use on-device LLM” enabled, they’ll still start with the cloud/no-op pipeline until they manually open Settings and hit Save. Consider rebuilding the pipeline automatically when deviceLlmAvailable flips to true (or pass mlKitProvider regardless of deviceLlmAvailable and let the provider handle unsupported/downloading states).

Suggested change
var voicePipeline by remember { mutableStateOf(buildPipeline()) }
var voicePipeline by remember { mutableStateOf(buildPipeline()) }
LaunchedEffect(deviceLlmAvailable) {
voicePipeline = buildPipeline()
}

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in commit edbf46c: added LaunchedEffect(deviceLlmAvailable) in MainActivity that calls onRebuildVoicePipeline() whenever the flag flips, so the pipeline is rebuilt automatically without requiring a manual settings round-trip.

LaunchedEffect(deviceLlmAvailable) {
voicePipeline = buildPipeline()
}
StelekitApp(
fileSystem = fileSystem,
graphPath = fileSystem.getDefaultGraphPath(),
graphManager = app.graphManager,
urlFetcher = UrlFetcherAndroid(),
voicePipeline = voicePipeline,
voiceSettings = voiceSettings,
onRebuildVoicePipeline = { voicePipeline = buildVoicePipeline(audioRecorder, voiceSettings) },
onRebuildVoicePipeline = { voicePipeline = buildPipeline() },
deviceSttAvailable = deviceSttAvailable,
deviceLlmAvailable = deviceLlmAvailable,
)
}
}

private suspend fun requestMicrophonePermission(): Boolean {
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO)
== PackageManager.PERMISSION_GRANTED
) return true
val deferred = CompletableDeferred<Boolean>()
pendingMicPermission = deferred
runOnUiThread { micPermissionLauncher.launch(Manifest.permission.RECORD_AUDIO) }
return deferred.await()
}

companion object {
private const val TAG = "MainActivity"
/** Authority for AOSP ExternalStorageProvider — the only provider supported in v1. */
Expand Down
5 changes: 4 additions & 1 deletion kmp/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ kotlin {
// Encrypted SharedPreferences for API key storage
implementation("androidx.security:security-crypto:1.1.0-alpha06")

// On-device LLM via Gemini Nano (Pixel 9+ and AICore-enabled OEM flagships)
implementation("com.google.mlkit:genai-prompt:1.0.0-beta2")

// Jetpack Glance — Compose-based home screen widget API
// Use 1.1.1 (not 1.1.0) to pick up a protobuf security fix.
implementation("androidx.glance:glance-appwidget:1.1.1")
Expand Down Expand Up @@ -643,7 +646,7 @@ android {
namespace = "dev.stapler.stelekit"

defaultConfig {
minSdk = 24
minSdk = 26
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ import kotlin.math.sqrt

private const val TAG = "AndroidAudioRecorder"

class AndroidAudioRecorder(private val context: Context) : AudioRecorder {
class AndroidAudioRecorder(
private val context: Context,
/** Called before recording starts; must return true if RECORD_AUDIO permission is granted. */
private val requestMicPermission: (suspend () -> Boolean)? = null,
) : AudioRecorder {

companion object {
private const val SAMPLE_RATE = 16_000
Expand Down Expand Up @@ -54,6 +58,14 @@ class AndroidAudioRecorder(private val context: Context) : AudioRecorder {
stopRequested = false
pauseRequested = false

if (requestMicPermission != null && !requestMicPermission()) {
return@withContext PlatformAudioFile("")
}

if (stopRequested) {
return@withContext PlatformAudioFile("")
}

val outputFile = File(context.cacheDir, "voice_${System.currentTimeMillis()}.m4a")
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright (c) 2026 Tyler Stapler
// SPDX-License-Identifier: Elastic-2.0
package dev.stapler.stelekit.voice

import android.content.Context
import android.content.Intent
import android.os.Bundle
import android.os.Handler
import android.os.Looper
import android.speech.RecognitionListener
import android.speech.RecognizerIntent
import android.speech.SpeechRecognizer
import android.util.Log
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.suspendCancellableCoroutine
import kotlinx.coroutines.withContext
import kotlin.coroutines.resume

private const val TAG = "AndroidSpeechRecognizer"

class AndroidSpeechRecognizerProvider(private val context: Context) : DirectSpeechProvider {

companion object {
fun isAvailable(context: Context): Boolean =
SpeechRecognizer.isRecognitionAvailable(context)
}

private val _amplitudeFlow = MutableStateFlow(0f)
override val amplitudeFlow: Flow<Float> = _amplitudeFlow.asStateFlow()

@Volatile private var activeRecognizer: SpeechRecognizer? = null
private val mainHandler = Handler(Looper.getMainLooper())

override suspend fun listen(): TranscriptResult = suspendCancellableCoroutine { cont ->
cont.invokeOnCancellation {
mainHandler.post {
activeRecognizer?.let {
it.cancel()
it.destroy()
activeRecognizer = null
}
_amplitudeFlow.value = 0f
}
}

mainHandler.post {
var recognizer: SpeechRecognizer? = null
try {
recognizer = SpeechRecognizer.createSpeechRecognizer(context)
activeRecognizer = recognizer

// Guard against cancellation that fired before this post ran
if (!cont.isActive) {
recognizer.destroy()
activeRecognizer = null
return@post
}

recognizer.setRecognitionListener(object : RecognitionListener {
override fun onReadyForSpeech(params: Bundle?) {}
override fun onBeginningOfSpeech() {}
override fun onBufferReceived(buffer: ByteArray?) {}
override fun onEndOfSpeech() {}
override fun onEvent(eventType: Int, params: Bundle?) {}
override fun onPartialResults(partialResults: Bundle?) {}

override fun onRmsChanged(rmsdB: Float) {
// Map roughly -2..10 dB → 0..1
_amplitudeFlow.value = ((rmsdB + 2f) / 12f).coerceIn(0f, 1f)
}

override fun onResults(results: Bundle?) {
_amplitudeFlow.value = 0f
activeRecognizer = null
recognizer.destroy()
if (!cont.isActive) return
val text = results
?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
?.firstOrNull()
Log.d(TAG, "onResults: text=${text?.take(80)}")
if (text.isNullOrBlank()) cont.resume(TranscriptResult.Empty)
else cont.resume(TranscriptResult.Success(text))
}

override fun onError(error: Int) {
_amplitudeFlow.value = 0f
activeRecognizer = null
recognizer.destroy()
if (!cont.isActive) return
Log.w(TAG, "onError: code=$error")
cont.resume(mapError(error))
}
})

val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, true)
putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 3_000L)
putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 1_500L)
}
recognizer.startListening(intent)
} catch (t: Throwable) {
_amplitudeFlow.value = 0f
activeRecognizer = null
recognizer?.destroy()
Log.w(TAG, "Failed to start speech recognition", t)
if (cont.isActive) {
cont.resume(mapError(SpeechRecognizer.ERROR_CLIENT))
}
}
}
}

override suspend fun stopListening() {
withContext(Dispatchers.Main) {
activeRecognizer?.stopListening()
}
}

private fun mapError(error: Int): TranscriptResult = when (error) {
SpeechRecognizer.ERROR_NO_MATCH,
SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> TranscriptResult.Empty
SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> TranscriptResult.Failure.PermissionDenied
SpeechRecognizer.ERROR_NETWORK,
SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> TranscriptResult.Failure.NetworkError
else -> TranscriptResult.Failure.ApiError(error, "Speech recognition error (code $error)")
}
}
Loading
Loading