diff --git a/openless-all/app/src-tauri/Info.plist b/openless-all/app/src-tauri/Info.plist index b1711f9e..36833691 100644 --- a/openless-all/app/src-tauri/Info.plist +++ b/openless-all/app/src-tauri/Info.plist @@ -8,5 +8,7 @@ OpenLess 需要辅助功能权限来监听全局快捷键并把识别结果粘贴到当前光标位置。 NSAppleEventsUsageDescription OpenLess 需要发送按键事件,把识别结果粘贴到当前光标位置。 + NSSpeechRecognitionUsageDescription + OpenLess 使用 macOS 系统语音识别来在本地把你的语音转成文字(无需联网、无需凭据)。 diff --git a/openless-all/app/src-tauri/build.rs b/openless-all/app/src-tauri/build.rs index fef616ba..fe64d59f 100644 --- a/openless-all/app/src-tauri/build.rs +++ b/openless-all/app/src-tauri/build.rs @@ -77,4 +77,8 @@ fn build_qwen_asr_macos() { // BLAS = Accelerate println!("cargo:rustc-link-lib=framework=Accelerate"); + + // Apple Speech 本地 ASR(issue #574):apple_speech_provider 用 + // SFSpeechRecognizer / SFSpeechURLRecognitionRequest,符号在 Speech.framework。 + println!("cargo:rustc-link-lib=framework=Speech"); } diff --git a/openless-all/app/src-tauri/src/asr/local/apple_speech_provider.rs b/openless-all/app/src-tauri/src/asr/local/apple_speech_provider.rs new file mode 100644 index 00000000..04d18545 --- /dev/null +++ b/openless-all/app/src-tauri/src/asr/local/apple_speech_provider.rs @@ -0,0 +1,433 @@ +//! Apple Speech 本地 ASR 适配器(macOS,issue #574)。 +//! +//! 把 Apple 的 `SFSpeechRecognizer` 当作第 4 个本地 provider,接入链路与 +//! `LocalQwenAsr` 完全同形:实现 `crate::recorder::AudioConsumer` 把 PCM +//! 累进缓冲,`transcribe()` 返回 `RawTranscript{text, duration_ms}`。 +//! +//! **首版批处理**:把缓冲的 16k/mono/16-bit PCM 用 `encode_wav_16k_mono` +//! 写成临时 wav,喂给 `SFSpeechURLRecognitionRequest`。这样避开 +//! `AVAudioPCMBuffer` / `AVAudioFormat` 的 objc2 桥接,换取实现确定性。 +//! 实时 partial 流式列为后续增量,不在本次范围。 +//! +//! 权限走 `SFSpeechRecognizer.requestAuthorization:`(completion handler +//! block),范式照抄 `permissions.rs` 的 `requestAccessForMediaType:`。 +//! 未授权时 `transcribe()` 返回清晰错误。 +//! +//! 非 macOS 平台不编译本模块(见 `mod.rs` 的 cfg 门控)。 + +#![cfg(target_os = "macos")] + +use std::sync::mpsc; +use std::time::Duration; + +use anyhow::{anyhow, bail, Context, Result}; +use block2::RcBlock; +use objc2::msg_send; +use objc2::runtime::{AnyClass, AnyObject, Bool}; +use parking_lot::Mutex; + +use crate::asr::wav::encode_wav_16k_mono; +use crate::asr::RawTranscript; + +/// `SFSpeechRecognizerAuthorizationStatus`(NS_ENUM(NSInteger))。 +const SF_AUTH_NOT_DETERMINED: i64 = 0; +const SF_AUTH_DENIED: i64 = 1; +const SF_AUTH_RESTRICTED: i64 = 2; +const SF_AUTH_AUTHORIZED: i64 = 3; + +/// 等待识别 / 授权回调的兜底超时。识别本身另有 coordinator 侧动态超时; +/// 这里只防 block 永不回调导致线程永久阻塞。 +const RECOGNITION_WAIT: Duration = Duration::from_secs(60); +const AUTHORIZATION_WAIT: Duration = Duration::from_secs(30); + +pub struct AppleSpeechAsr { + /// 16-bit LE PCM 字节缓冲(recorder 推什么我们存什么)。与 LocalQwenAsr 同形。 + buffer: Mutex>, +} + +impl AppleSpeechAsr { + pub fn new() -> Self { + Self { + buffer: Mutex::new(Vec::new()), + } + } + + /// 当前缓冲音频时长(毫秒)。与 LocalQwenAsr::buffer_duration_ms 对齐, + /// coordinator 用它给本地 provider 计算动态超时。不消费缓冲。 + pub fn buffer_duration_ms(&self) -> u64 { + (self.buffer.lock().len() as u64 / 2) * 1000 / 16_000 + } + + /// stop 时调用:把缓冲编码成临时 wav,喂给 `SFSpeechURLRecognitionRequest`, + /// 把异步结果同步化后返回。 + /// + /// 失败时**保留** buffer(与 WhisperBatchASR / LocalQwenAsr 一致):凭据无关, + /// 但权限被拒 / 识别失败时不该把用户录音直接丢掉。仅成功路径清缓冲。 + pub async fn transcribe(&self) -> Result { + // clone 而非 take:会话末调用一次,几 MB 可接受;失败时缓冲仍在。 + let pcm = self.buffer.lock().clone(); + if pcm.is_empty() { + return Ok(RawTranscript { + text: String::new(), + duration_ms: 0, + }); + } + let duration_ms = (pcm.len() as u64 / 2) * 1000 / 16_000; + + // SFSpeechRecognizer 是阻塞且基于 objc runloop 的同步桥接;放到 + // spawn_blocking 不占 tokio runtime。与 LocalQwenAsr 走同一个 Tauri + // 持有的 runtime handle。 + let result = + tauri::async_runtime::spawn_blocking(move || transcribe_pcm_blocking(&pcm, duration_ms)) + .await + .context("apple-speech transcribe spawn_blocking join 失败")?; + + if result.is_ok() { + self.buffer.lock().clear(); + } + result + } + + pub fn cancel(&self) { + self.buffer.lock().clear(); + } +} + +impl Default for AppleSpeechAsr { + fn default() -> Self { + Self::new() + } +} + +impl crate::recorder::AudioConsumer for AppleSpeechAsr { + fn consume_pcm_chunk(&self, pcm: &[u8]) { + self.buffer.lock().extend_from_slice(pcm); + } +} + +/// 把 PCM 写成临时 wav,确保授权,跑批处理识别,删临时文件,返回结果。 +/// 在 spawn_blocking 线程内同步执行。 +fn transcribe_pcm_blocking(pcm: &[u8], duration_ms: u64) -> Result { + ensure_authorized()?; + + let samples: Vec = pcm + .chunks_exact(2) + .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]])) + .collect(); + let wav = encode_wav_16k_mono(&samples); + + // 临时 wav:唯一文件名避免并发会话碰撞;用完即删(RAII guard)。 + let path = std::env::temp_dir().join(format!( + "openless-apple-speech-{}-{}.wav", + std::process::id(), + unique_suffix() + )); + std::fs::write(&path, &wav).with_context(|| format!("写临时 wav 失败: {}", path.display()))?; + let _cleanup = TempFileGuard(&path); + + let path_str = path + .to_str() + .ok_or_else(|| anyhow!("临时 wav 路径含非 UTF-8 字符: {}", path.display()))?; + let text = recognize_file(path_str)?; + + Ok(RawTranscript { text, duration_ms }) +} + +/// 当前授权未确定时弹系统授权框并等待;最终非 authorized 一律返回清晰错误。 +fn ensure_authorized() -> Result<()> { + let cls = speech_recognizer_class()?; + + // SFSpeechRecognizer.authorizationStatus(类方法)。 + // SAFETY: `cls` 是已查到的 `SFSpeechRecognizer` 类对象;`authorizationStatus` + // 是无参类方法,返回 NSInteger(i64)。 + let status: i64 = unsafe { msg_send![cls, authorizationStatus] }; + if status == SF_AUTH_AUTHORIZED { + return Ok(()); + } + if status == SF_AUTH_DENIED { + bail!("语音识别权限被拒绝,请在 系统设置 → 隐私与安全性 → 语音识别 中允许 OpenLess"); + } + if status == SF_AUTH_RESTRICTED { + bail!("此设备的语音识别功能受限(可能由家长控制或 MDM 策略禁用)"); + } + if status != SF_AUTH_NOT_DETERMINED { + bail!("语音识别授权状态未知: {status}"); + } + + // NotDetermined:弹系统授权框并同步等待回调。block 范式照抄 permissions.rs。 + let (tx, rx) = mpsc::channel(); + let block = RcBlock::new(move |granted_status: i64| { + let _ = tx.send(granted_status); + }); + log::info!("[apple-speech] requesting SFSpeechRecognizer authorization"); + // SAFETY: `requestAuthorization:` 接收一个 `void(^)(SFSpeechRecognizerAuthorizationStatus)` + // block,回调参数是 NSInteger(i64)。`&*block` 是 block2 的稳定指针,block 本体 + // 由 `block` 持有到本作用域结束 —— 回调在系统弹框被用户应答后触发,发生在 + // `rx.recv_timeout` 返回之前,因此 block 生命周期足够覆盖回调。 + let _: () = unsafe { msg_send![cls, requestAuthorization: &*block] }; + + let granted = match rx.recv_timeout(AUTHORIZATION_WAIT) { + Ok(s) => s, + Err(err) => bail!("等待语音识别授权超时或失败: {err}"), + }; + match granted { + SF_AUTH_AUTHORIZED => Ok(()), + SF_AUTH_DENIED => { + bail!("语音识别权限被拒绝,请在 系统设置 → 隐私与安全性 → 语音识别 中允许 OpenLess") + } + SF_AUTH_RESTRICTED => bail!("此设备的语音识别功能受限"), + other => bail!("语音识别未获授权(状态 {other})"), + } +} + +/// 用 `SFSpeechURLRecognitionRequest` 对给定 wav 文件做一次批处理识别, +/// 把 `recognitionTaskWithRequest:resultHandler:` 的异步回调同步化。 +fn recognize_file(wav_path: &str) -> Result { + let recognizer = create_recognizer()?; + + // recognizer.isAvailable —— 识别引擎当前是否可用(首次可能在下载语言资源)。 + // SAFETY: `recognizer` 是有效的 `SFSpeechRecognizer` 实例;`isAvailable` 无参,返回 BOOL。 + let available: Bool = unsafe { msg_send![recognizer, isAvailable] }; + if !available.as_bool() { + bail!("当前语言的语音识别暂不可用(系统可能正在准备识别资源,请稍后重试)"); + } + + let url = file_url(wav_path)?; + let request = create_url_request(url)?; + + let (tx, rx) = mpsc::channel::(); + // resultHandler: void(^)(SFSpeechRecognitionResult *result, NSError *error) + let block = RcBlock::new(move |result: *mut AnyObject, error: *mut AnyObject| { + let outcome = build_outcome(result, error); + // 只取第一个 final(或第一个 error)。后续重复回调忽略。 + if outcome.is_terminal() { + let _ = tx.send(outcome); + } + }); + + log::info!("[apple-speech] starting recognitionTaskWithRequest"); + // SAFETY: `recognizer` 有效;`request` 是有效的 `SFSpeechURLRecognitionRequest`; + // `&*block` 是稳定 block 指针,block 本体被 `block` 持有至本作用域结束。 + // 返回的 `SFSpeechRecognitionTask` 我们不持有(自身被 recognizer 强引用直到完成)。 + let _task: *mut AnyObject = unsafe { + msg_send![ + recognizer, + recognitionTaskWithRequest: request, + resultHandler: &*block + ] + }; + + match rx.recv_timeout(RECOGNITION_WAIT) { + Ok(RecognitionOutcome::Final(text)) => Ok(text), + Ok(RecognitionOutcome::Failed(message)) => bail!("语音识别失败: {message}"), + Ok(RecognitionOutcome::Pending) => unreachable!("Pending 不会被发送"), + Err(err) => bail!("等待语音识别结果超时或失败: {err}"), + } +} + +/// 识别回调的归一化结果。 +enum RecognitionOutcome { + /// 还没拿到 final(partial),不发送。 + Pending, + Final(String), + Failed(String), +} + +impl RecognitionOutcome { + fn is_terminal(&self) -> bool { + !matches!(self, RecognitionOutcome::Pending) + } +} + +/// 从 `(result, error)` 回调参数提取最终文本或错误。 +fn build_outcome(result: *mut AnyObject, error: *mut AnyObject) -> RecognitionOutcome { + if !error.is_null() { + return RecognitionOutcome::Failed(ns_error_description(error)); + } + if result.is_null() { + return RecognitionOutcome::Failed("识别返回空结果".to_string()); + } + // result.isFinal —— 只有 final 才取文本;partial 让上层继续等。 + // SAFETY: `result` 非空,是 `SFSpeechRecognitionResult`;`isFinal` 无参返回 BOOL。 + let is_final: Bool = unsafe { msg_send![result, isFinal] }; + if !is_final.as_bool() { + return RecognitionOutcome::Pending; + } + // result.bestTranscription.formattedString → NSString → Rust String。 + // SAFETY: `result` 是 final 的 `SFSpeechRecognitionResult`,`bestTranscription` + // 非空(final 结果保证有 transcription);`formattedString` 返回 NSString。 + let transcription: *mut AnyObject = unsafe { msg_send![result, bestTranscription] }; + if transcription.is_null() { + return RecognitionOutcome::Final(String::new()); + } + let formatted: *mut AnyObject = unsafe { msg_send![transcription, formattedString] }; + RecognitionOutcome::Final(ns_string_to_rust(formatted)) +} + +fn speech_recognizer_class() -> Result<&'static AnyClass> { + AnyClass::get("SFSpeechRecognizer") + .ok_or_else(|| anyhow!("SFSpeechRecognizer 类不可用(需要 macOS 10.15+ 并链接 Speech.framework)")) +} + +/// `[[SFSpeechRecognizer alloc] init]` —— 用系统当前 locale。 +fn create_recognizer() -> Result<*mut AnyObject> { + let cls = speech_recognizer_class()?; + // SAFETY: `cls` 是 `SFSpeechRecognizer` 类;`alloc` 返回未初始化实例, + // `init` 对其初始化,返回的实例由本函数所有权移交调用方(随后被 ARC 管理)。 + let recognizer: *mut AnyObject = unsafe { + let alloc: *mut AnyObject = msg_send![cls, alloc]; + msg_send![alloc, init] + }; + if recognizer.is_null() { + bail!("无法创建 SFSpeechRecognizer(当前系统语言可能不支持语音识别)"); + } + Ok(recognizer) +} + +/// `[NSURL fileURLWithPath:]`。 +fn file_url(path: &str) -> Result<*mut AnyObject> { + let ns_path = ns_string_from_str(path)?; + let cls = AnyClass::get("NSURL").ok_or_else(|| anyhow!("NSURL 类不可用"))?; + // SAFETY: `cls` 是 NSURL;`fileURLWithPath:` 接收 NSString(`ns_path` 有效), + // 返回 autoreleased NSURL(在 spawn_blocking 线程的隐式 autorelease 池存活)。 + let url: *mut AnyObject = unsafe { msg_send![cls, fileURLWithPath: ns_path] }; + if url.is_null() { + bail!("构造文件 URL 失败: {path}"); + } + Ok(url) +} + +/// `[[SFSpeechURLRecognitionRequest alloc] initWithURL:]`。 +fn create_url_request(url: *mut AnyObject) -> Result<*mut AnyObject> { + let cls = AnyClass::get("SFSpeechURLRecognitionRequest") + .ok_or_else(|| anyhow!("SFSpeechURLRecognitionRequest 类不可用"))?; + // SAFETY: `cls` 是请求类;`alloc`+`initWithURL:` 用有效 `url` 初始化请求实例。 + let request: *mut AnyObject = unsafe { + let alloc: *mut AnyObject = msg_send![cls, alloc]; + msg_send![alloc, initWithURL: url] + }; + if request.is_null() { + bail!("构造 SFSpeechURLRecognitionRequest 失败"); + } + Ok(request) +} + +/// `[NSString stringWithUTF8String:]`。`s` 不能含内部 NUL。 +fn ns_string_from_str(s: &str) -> Result<*mut AnyObject> { + let c = std::ffi::CString::new(s).context("字符串含 NUL,无法构造 NSString")?; + let cls = AnyClass::get("NSString").ok_or_else(|| anyhow!("NSString 类不可用"))?; + // SAFETY: `cls` 是 NSString;`stringWithUTF8String:` 接收以 NUL 结尾的 C 字符串 + // (`c.as_ptr()` 在 `c` 存活期间有效,本调用同步完成,NSString 会拷贝内容)。 + let ns: *mut AnyObject = unsafe { msg_send![cls, stringWithUTF8String: c.as_ptr()] }; + if ns.is_null() { + bail!("stringWithUTF8String 返回 nil"); + } + Ok(ns) +} + +/// NSString → Rust String(经 `UTF8String`)。nil 返回空串。 +fn ns_string_to_rust(ns: *mut AnyObject) -> String { + if ns.is_null() { + return String::new(); + } + // SAFETY: `ns` 非空,是 NSString;`UTF8String` 返回指向 NSString 内部、以 NUL + // 结尾的 UTF-8 缓冲,在自动释放池存活期间有效。立即拷贝成 owned String。 + let ptr: *const std::os::raw::c_char = unsafe { msg_send![ns, UTF8String] }; + if ptr.is_null() { + return String::new(); + } + // SAFETY: `ptr` 是有效、以 NUL 结尾的 C 字符串(来自 NSString.UTF8String)。 + unsafe { std::ffi::CStr::from_ptr(ptr) } + .to_string_lossy() + .into_owned() +} + +/// NSError → 可读字符串(`localizedDescription`)。 +fn ns_error_description(error: *mut AnyObject) -> String { + if error.is_null() { + return "未知错误".to_string(); + } + // SAFETY: `error` 非空,是 NSError;`localizedDescription` 返回 NSString。 + let desc: *mut AnyObject = unsafe { msg_send![error, localizedDescription] }; + let message = ns_string_to_rust(desc); + if message.is_empty() { + "未知错误".to_string() + } else { + message + } +} + +/// 进程内单调递增后缀,避免同进程内并发临时 wav 文件名碰撞。 +fn unique_suffix() -> u64 { + use std::sync::atomic::{AtomicU64, Ordering}; + static COUNTER: AtomicU64 = AtomicU64::new(0); + COUNTER.fetch_add(1, Ordering::Relaxed) +} + +/// 临时文件 RAII 清理:transcribe 返回(成功或失败)时删除 wav。 +struct TempFileGuard<'a>(&'a std::path::Path); + +impl Drop for TempFileGuard<'_> { + fn drop(&mut self) { + if let Err(err) = std::fs::remove_file(self.0) { + log::warn!( + "[apple-speech] 删除临时 wav 失败 {}: {err}", + self.0.display() + ); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::recorder::AudioConsumer; + + #[test] + fn buffer_duration_tracks_consumed_pcm() { + let asr = AppleSpeechAsr::new(); + assert_eq!(asr.buffer_duration_ms(), 0); + // 16k * 2 bytes/sample * 1s = 32000 bytes。 + asr.consume_pcm_chunk(&vec![0u8; 32_000]); + assert_eq!(asr.buffer_duration_ms(), 1_000); + asr.consume_pcm_chunk(&vec![0u8; 16_000]); + assert_eq!(asr.buffer_duration_ms(), 1_500); + } + + #[test] + fn cancel_clears_buffer() { + let asr = AppleSpeechAsr::new(); + asr.consume_pcm_chunk(&vec![0u8; 32_000]); + asr.cancel(); + assert_eq!(asr.buffer_duration_ms(), 0); + } + + #[tokio::test] + async fn transcribe_empty_buffer_returns_empty() { + let asr = AppleSpeechAsr::new(); + let transcript = asr.transcribe().await.unwrap(); + assert_eq!(transcript.text, ""); + assert_eq!(transcript.duration_ms, 0); + } + + #[test] + fn temp_file_guard_removes_file_on_drop() { + let path = std::env::temp_dir().join(format!( + "openless-apple-speech-test-{}.wav", + unique_suffix() + )); + std::fs::write(&path, b"x").unwrap(); + assert!(path.exists()); + { + let _guard = TempFileGuard(&path); + } + assert!(!path.exists()); + } + + #[test] + fn unique_suffix_is_monotonic() { + let a = unique_suffix(); + let b = unique_suffix(); + assert!(b > a); + } +} diff --git a/openless-all/app/src-tauri/src/asr/local/mod.rs b/openless-all/app/src-tauri/src/asr/local/mod.rs index ec75ed62..627f3ecf 100644 --- a/openless-all/app/src-tauri/src/asr/local/mod.rs +++ b/openless-all/app/src-tauri/src/asr/local/mod.rs @@ -29,11 +29,16 @@ pub use sherpa_provider::SherpaOnnxAsr; #[allow(unused_imports)] pub use sherpa_runtime::SherpaOnnxRuntime; +#[cfg(target_os = "macos")] +mod apple_speech_provider; #[cfg(target_os = "macos")] mod qwen_engine; #[cfg(target_os = "macos")] mod qwen_ffi; +#[cfg(target_os = "macos")] +#[allow(unused_imports)] +pub use apple_speech_provider::AppleSpeechAsr; #[cfg(target_os = "macos")] pub use local_provider::LocalQwenAsr; #[cfg(target_os = "macos")] @@ -48,3 +53,14 @@ pub const PROVIDER_ID: &str = "local-qwen3"; pub fn is_local_qwen3(id: &str) -> bool { id == PROVIDER_ID } + +/// Apple Speech(SFSpeechRecognizer)本地 ASR 的 provider id;与前端 +/// ASR_PRESETS 的 id 对齐(issue #574)。该字符串在所有平台都可被识别, +/// 但 provider 实现只在 macOS 编译;非 macOS 上由上层判为 not-configured / +/// 不可用(见 commands / coordinator 的平台门控)。 +pub const APPLE_SPEECH_PROVIDER_ID: &str = "apple-speech"; + +#[allow(dead_code)] +pub fn is_apple_speech(id: &str) -> bool { + id == APPLE_SPEECH_PROVIDER_ID +} diff --git a/openless-all/app/src-tauri/src/commands/credentials.rs b/openless-all/app/src-tauri/src/commands/credentials.rs index 350d1e3e..1778199e 100644 --- a/openless-all/app/src-tauri/src/commands/credentials.rs +++ b/openless-all/app/src-tauri/src/commands/credentials.rs @@ -29,6 +29,7 @@ pub(crate) fn asr_configured_for_provider(provider: &str, snap: &CredentialsSnap return volcengine_configured(snap); } if provider == crate::asr::local::PROVIDER_ID + || active_apple_speech_asr_is_supported(provider) || active_foundry_asr_is_supported(provider) || active_sherpa_asr_is_supported(provider) { @@ -168,6 +169,11 @@ pub async fn set_active_asr_provider( { return Err("sherpa-onnx local ASR is only available on Windows".to_string()); } + if provider == crate::asr::local::APPLE_SPEECH_PROVIDER_ID + && !active_apple_speech_asr_is_supported(&provider) + { + return Err("Apple Speech recognition is only available on macOS".to_string()); + } if CredentialsVault::get_active_asr() == provider { return Ok(()); } diff --git a/openless-all/app/src-tauri/src/commands/providers.rs b/openless-all/app/src-tauri/src/commands/providers.rs index 9fed8852..2f302e8e 100644 --- a/openless-all/app/src-tauri/src/commands/providers.rs +++ b/openless-all/app/src-tauri/src/commands/providers.rs @@ -246,10 +246,23 @@ async fn validate_bailian_asr_provider() -> Result<(), String> { pub(crate) fn active_asr_is_keyless_for_validation(provider: &str) -> bool { provider == crate::asr::local::PROVIDER_ID + || active_apple_speech_asr_is_supported(provider) || active_foundry_asr_is_supported(provider) || active_sherpa_asr_is_supported(provider) } +pub(crate) fn active_apple_speech_asr_is_supported(provider: &str) -> bool { + #[cfg(target_os = "macos")] + { + provider == crate::asr::local::APPLE_SPEECH_PROVIDER_ID + } + #[cfg(not(target_os = "macos"))] + { + let _ = provider; + false + } +} + pub(crate) fn active_foundry_asr_is_supported(provider: &str) -> bool { #[cfg(target_os = "windows")] { diff --git a/openless-all/app/src-tauri/src/coordinator.rs b/openless-all/app/src-tauri/src/coordinator.rs index 1579dc3f..926a260b 100644 --- a/openless-all/app/src-tauri/src/coordinator.rs +++ b/openless-all/app/src-tauri/src/coordinator.rs @@ -119,6 +119,10 @@ pub(crate) enum ActiveAsr { /// 本地 Qwen3-ASR;只在 macOS + 模型已下载时可达。 #[cfg(target_os = "macos")] Local(Arc), + /// Apple Speech(SFSpeechRecognizer)系统本地 ASR;只在 macOS 可达。 + /// 无模型下载、无凭据,首次使用弹系统授权(issue #574)。 + #[cfg(target_os = "macos")] + AppleSpeech(Arc), } fn asr_transcribe_uses_global_timeout(asr: &ActiveAsr) -> bool { diff --git a/openless-all/app/src-tauri/src/coordinator/asr_setup.rs b/openless-all/app/src-tauri/src/coordinator/asr_setup.rs index f1b47e1b..326a5036 100644 --- a/openless-all/app/src-tauri/src/coordinator/asr_setup.rs +++ b/openless-all/app/src-tauri/src/coordinator/asr_setup.rs @@ -75,6 +75,19 @@ pub(crate) fn ensure_asr_credentials() -> Result<(), String> { } } + // Apple Speech 没有"凭据"也没有要下载的模型,只需:macOS 平台。 + // 系统语音识别资源由 OS 管理,首次使用时弹授权框(见 apple_speech_provider)。 + if crate::asr::local::is_apple_speech(&active_asr) { + #[cfg(not(target_os = "macos"))] + { + return Err("Apple Speech 语音识别仅支持 macOS".to_string()); + } + #[cfg(target_os = "macos")] + { + return Ok(()); + } + } + if crate::asr::local::foundry::is_foundry_local_whisper(&active_asr) { #[cfg(not(target_os = "windows"))] { @@ -124,6 +137,10 @@ pub(crate) fn is_keyless_local_asr_provider(id: &str) -> bool { if crate::asr::local::is_local_qwen3(id) { return true; } + #[cfg(target_os = "macos")] + if crate::asr::local::is_apple_speech(id) { + return true; + } #[cfg(target_os = "windows")] { crate::asr::local::foundry::is_foundry_local_whisper(id) @@ -261,6 +278,13 @@ pub(crate) async fn build_local_qwen3( Ok(Arc::new(crate::asr::local::LocalQwenAsr::new(app, engine))) } +/// 构建 Apple Speech provider。与 build_local_qwen3 不同:无模型、无缓存、无 +/// AppHandle 依赖,授权/识别由 provider 内部按需处理(首次弹系统授权框)。 +#[cfg(target_os = "macos")] +pub(crate) fn build_apple_speech() -> Arc { + Arc::new(crate::asr::local::AppleSpeechAsr::new()) +} + pub(crate) enum QaAsrStart { Volcengine { asr: Arc, @@ -390,6 +414,14 @@ pub(crate) async fn build_qa_asr_start( return Ok(QaAsrStart::Ready { active, consumer }); } + #[cfg(target_os = "macos")] + if crate::asr::local::is_apple_speech(active_asr) { + let local = build_apple_speech(); + let active = ActiveAsr::AppleSpeech(Arc::clone(&local)); + let consumer: Arc = local; + return Ok(QaAsrStart::Ready { active, consumer }); + } + match active_asr_provider_kind(active_asr) { ActiveAsrProviderKind::Bailian => Ok(QaAsrStart::Bailian { asr: Arc::new(BailianRealtimeASR::new(read_bailian_credentials())), diff --git a/openless-all/app/src-tauri/src/coordinator/dictation_end.rs b/openless-all/app/src-tauri/src/coordinator/dictation_end.rs index 09439d9b..25978fdb 100644 --- a/openless-all/app/src-tauri/src/coordinator/dictation_end.rs +++ b/openless-all/app/src-tauri/src/coordinator/dictation_end.rs @@ -359,6 +359,65 @@ pub(crate) async fn end_session(inner: &Arc) -> Result<(), String> { } } } + // Apple Speech:系统语音识别,无模型加载耗时。批处理 transcribe 受音频 + // 长度影响,沿用 local_qwen_transcribe_timeout 的动态超时公式(基础 15s + // 兜短录音,长录音按音频 0.6 倍 + 10s 余量),coordinator 侧再加一层防线。 + #[cfg(target_os = "macos")] + ActiveAsr::AppleSpeech(local) => { + debug_assert!(uses_global_timeout); + let audio_secs = (local.buffer_duration_ms() as f64) / 1000.0; + let timeout_duration = local_qwen_transcribe_timeout(audio_secs); + log::info!( + "[coord] Apple Speech transcribe: audio={:.2}s timeout={}s", + audio_secs, + timeout_duration.as_secs() + ); + match tokio::time::timeout(timeout_duration, local.transcribe()).await { + Ok(Ok(r)) => r, + Ok(Err(e)) => { + if inner.state.lock().cancelled { + log::info!( + "[coord] Apple Speech transcribe cancelled — discarding transcript" + ); + restore_prepared_windows_ime_session(inner, current_session_id); + set_phase_idle_if_session_matches(inner, current_session_id); + return Ok(()); + } + log::error!("[coord] Apple Speech transcribe failed: {e:#}"); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some(format!("本地识别失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + Err(_) => { + log::error!( + "[coord] Apple Speech 动态超时 {}s(音频 {:.2}s)", + timeout_duration.as_secs(), + audio_secs + ); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some("识别超时".to_string()), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err("apple-speech global timeout".to_string()); + } + } + } }; // ASR 完成后 cancel 检查:用户在 transcribe 进行中按 Esc 时,这里就会命中。 diff --git a/openless-all/app/src-tauri/src/coordinator/dictation_session.rs b/openless-all/app/src-tauri/src/coordinator/dictation_session.rs index 480883af..3ab2589c 100644 --- a/openless-all/app/src-tauri/src/coordinator/dictation_session.rs +++ b/openless-all/app/src-tauri/src/coordinator/dictation_session.rs @@ -209,6 +209,22 @@ pub(crate) async fn begin_session(inner: &Arc) -> Result<(), String> { return Ok(()); } + // Apple Speech:无模型加载,构建即用;停止录音后整段批处理识别,再复用 + // 现有 polish / insert / history 收尾路径(与 local-qwen3 同形)。 + #[cfg(target_os = "macos")] + if crate::asr::local::is_apple_speech(&active_asr) { + let local = build_apple_speech(); + store_asr_for_session( + inner, + current_session_id, + ActiveAsr::AppleSpeech(Arc::clone(&local)), + ); + let consumer: Arc = local; + start_recorder_and_enter_listening(inner, current_session_id, &active_asr, consumer) + .await?; + return Ok(()); + } + if is_bailian_provider(&active_asr) { let asr = Arc::new(BailianRealtimeASR::new(read_bailian_credentials())); let bridge = Arc::new(DeferredAsrBridge::new()); diff --git a/openless-all/app/src-tauri/src/coordinator/qa_session.rs b/openless-all/app/src-tauri/src/coordinator/qa_session.rs index bd439ede..7afa3db9 100644 --- a/openless-all/app/src-tauri/src/coordinator/qa_session.rs +++ b/openless-all/app/src-tauri/src/coordinator/qa_session.rs @@ -455,6 +455,40 @@ pub(crate) async fn end_qa_session(inner: &Arc) -> Result<(), String> { } } } + #[cfg(target_os = "macos")] + ActiveAsr::AppleSpeech(local) => { + debug_assert!(uses_global_timeout); + let audio_secs = (local.buffer_duration_ms() as f64) / 1000.0; + let timeout_duration = local_qwen_transcribe_timeout(audio_secs); + log::info!( + "[coord] QA Apple Speech transcribe: audio={:.2}s timeout={}s", + audio_secs, + timeout_duration.as_secs() + ); + match tokio::time::timeout(timeout_duration, local.transcribe()).await { + Ok(Ok(r)) => r, + Ok(Err(e)) => { + if inner.qa_state.lock().cancelled { + log::info!( + "[coord] QA Apple Speech transcribe cancelled — discarding transcript" + ); + finish_qa_idle_silently(inner); + return Ok(()); + } + log::error!("[coord] QA Apple Speech transcribe failed: {e:#}"); + finish_qa_with_error(inner, format!("本地识别失败: {e}")); + return Err(e.to_string()); + } + Err(_) => { + log::error!( + "[coord] QA Apple Speech transcribe timeout after {}s", + timeout_duration.as_secs() + ); + finish_qa_with_error(inner, "本地识别超时".to_string()); + return Err("apple-speech transcribe timeout".to_string()); + } + } + } }; // cancel race:用户在 transcribe 中按 Esc / dismiss → 静默退出。 diff --git a/openless-all/app/src-tauri/src/coordinator/resources.rs b/openless-all/app/src-tauri/src/coordinator/resources.rs index 72b91a99..71183687 100644 --- a/openless-all/app/src-tauri/src/coordinator/resources.rs +++ b/openless-all/app/src-tauri/src/coordinator/resources.rs @@ -75,6 +75,8 @@ pub(super) fn cancel_active_asr(asr: ActiveAsr) { ActiveAsr::SherpaOnnxLocal(local) => local.cancel(), #[cfg(target_os = "macos")] ActiveAsr::Local(local) => local.cancel(), + #[cfg(target_os = "macos")] + ActiveAsr::AppleSpeech(local) => local.cancel(), } } diff --git a/openless-all/app/src/i18n/en.ts b/openless-all/app/src/i18n/en.ts index 326e2c07..be4a7f62 100644 --- a/openless-all/app/src/i18n/en.ts +++ b/openless-all/app/src/i18n/en.ts @@ -698,6 +698,7 @@ export const en: typeof zhCN = { asrSherpaOnnxLocal: 'Local sherpa-onnx (experimental)', asrFoundryLocalWhisper: 'Local Whisper (Foundry Local)', asrLocalQwen3: 'Local Qwen3-ASR', + asrAppleSpeech: 'Apple Speech (macOS)', }, volcengineAppKeyLabel: 'APP ID', volcengineAccessKeyLabel: 'Access Token', @@ -1000,6 +1001,9 @@ export const en: typeof zhCN = { modelDir: 'Model directory', revealDir: 'Open directory', deleteConfirm: 'Delete local model files for {{name}}? This cannot be undone.', + appleSpeechTitle: 'Apple Speech recognition (macOS)', + appleSpeechDesc: "Transcribe speech locally using macOS's built-in speech recognition: no model download, no API key, no network. A zero-credential local fallback when your cloud ASR is unreliable. macOS will prompt for speech recognition permission on first use.", + appleSpeechUse: 'Use Apple Speech', qwenTitle: 'Qwen3-ASR model manager', qwenExperimentalBadge: 'Experimental', engineUnavailable: 'The Qwen3-ASR inference engine is not bundled on this platform. You can still download models, but Qwen3-ASR cannot be activated here yet.', diff --git a/openless-all/app/src/i18n/ja.ts b/openless-all/app/src/i18n/ja.ts index c3d695c0..bc2ce0ed 100644 --- a/openless-all/app/src/i18n/ja.ts +++ b/openless-all/app/src/i18n/ja.ts @@ -700,6 +700,7 @@ export const ja: typeof zhCN = { asrSherpaOnnxLocal: 'ローカル sherpa-onnx(実験的)', asrFoundryLocalWhisper: 'ローカル Whisper(Foundry Local)', asrLocalQwen3: 'ローカル Qwen3-ASR', + asrAppleSpeech: 'Apple 音声認識 (macOS)', }, volcengineAppKeyLabel: 'APP ID', volcengineAccessKeyLabel: 'Access Token', @@ -1002,6 +1003,9 @@ export const ja: typeof zhCN = { modelDir: 'モデルフォルダ', revealDir: 'フォルダを開く', deleteConfirm: '{{name}} のローカルモデルファイルを削除しますか?この操作は取り消せません。', + appleSpeechTitle: 'Apple 音声認識(macOS)', + appleSpeechDesc: 'macOS 標準の音声認識を使ってローカルで文字起こしします。モデルのダウンロード・API キー・ネットワークは不要。クラウド ASR が不安定なときの認証情報不要なローカルフォールバックです。初回利用時に音声認識の許可ダイアログが表示されます。', + appleSpeechUse: 'Apple 音声認識を使う', qwenTitle: 'Qwen3-ASR モデル管理', qwenExperimentalBadge: '実験的', engineUnavailable: '現在のプラットフォームには Qwen3-ASR 推論エンジンが同梱されていません。モデルのダウンロードは可能ですが、ここではまだ Qwen3-ASR を有効化できません。', diff --git a/openless-all/app/src/i18n/ko.ts b/openless-all/app/src/i18n/ko.ts index 0633be29..974d6948 100644 --- a/openless-all/app/src/i18n/ko.ts +++ b/openless-all/app/src/i18n/ko.ts @@ -700,6 +700,7 @@ export const ko: typeof zhCN = { asrSherpaOnnxLocal: '로컬 sherpa-onnx(실험적)', asrFoundryLocalWhisper: '로컬 Whisper(Foundry Local)', asrLocalQwen3: '로컬 Qwen3-ASR', + asrAppleSpeech: 'Apple 음성 (macOS)', }, volcengineAppKeyLabel: 'APP ID', volcengineAccessKeyLabel: 'Access Token', @@ -1002,6 +1003,9 @@ export const ko: typeof zhCN = { modelDir: '모델 폴더', revealDir: '폴더 열기', deleteConfirm: '{{name}} 로컬 모델 파일을 삭제할까요? 되돌릴 수 없습니다.', + appleSpeechTitle: 'Apple 음성 인식(macOS)', + appleSpeechDesc: 'macOS 기본 음성 인식을 사용해 로컬에서 음성을 텍스트로 변환합니다. 모델 다운로드, API 키, 네트워크가 모두 필요 없습니다. 클라우드 ASR이 불안정할 때 자격 증명이 필요 없는 로컬 폴백입니다. 처음 사용할 때 음성 인식 권한 요청이 표시됩니다.', + appleSpeechUse: 'Apple 음성 사용', qwenTitle: 'Qwen3-ASR 모델 관리', qwenExperimentalBadge: '실험적', engineUnavailable: '현재 플랫폼에는 Qwen3-ASR 추론 엔진이 포함되어 있지 않습니다. 모델은 다운로드할 수 있지만 여기서는 아직 Qwen3-ASR 을 활성화할 수 없습니다.', diff --git a/openless-all/app/src/i18n/zh-CN.ts b/openless-all/app/src/i18n/zh-CN.ts index d7844014..2e8860fd 100644 --- a/openless-all/app/src/i18n/zh-CN.ts +++ b/openless-all/app/src/i18n/zh-CN.ts @@ -696,6 +696,7 @@ export const zhCN = { asrSherpaOnnxLocal: '本地 sherpa-onnx(实验性)', asrFoundryLocalWhisper: '本地 Whisper(Foundry Local)', asrLocalQwen3: '本地 Qwen3-ASR', + asrAppleSpeech: 'Apple 语音 (macOS)', }, volcengineAppKeyLabel: 'APP ID', volcengineAccessKeyLabel: 'Access Token', @@ -998,6 +999,9 @@ export const zhCN = { modelDir: '模型目录', revealDir: '打开目录', deleteConfirm: '确定删除 {{name}} 的本地模型文件吗?此操作无法撤销。', + appleSpeechTitle: 'Apple 语音识别(macOS)', + appleSpeechDesc: '使用 macOS 系统自带的语音识别在本地把语音转成文字:无需下载模型、无需 API Key、无需联网。云端 ASR 网络不稳时的零凭据本地兜底。首次使用会弹出系统语音识别授权。', + appleSpeechUse: '使用 Apple 语音', qwenTitle: 'Qwen3-ASR 模型管理', qwenExperimentalBadge: '实验性', engineUnavailable: '当前平台暂未集成 Qwen3-ASR 推理引擎。可下载模型,但暂时无法启用 Qwen3-ASR。', diff --git a/openless-all/app/src/i18n/zh-TW.ts b/openless-all/app/src/i18n/zh-TW.ts index f01e6be2..1342f200 100644 --- a/openless-all/app/src/i18n/zh-TW.ts +++ b/openless-all/app/src/i18n/zh-TW.ts @@ -698,6 +698,7 @@ export const zhTW: typeof zhCN = { asrSherpaOnnxLocal: '本地 sherpa-onnx(實驗性)', asrFoundryLocalWhisper: '本地 Whisper(Foundry Local)', asrLocalQwen3: '本地 Qwen3-ASR', + asrAppleSpeech: 'Apple 語音 (macOS)', }, volcengineAppKeyLabel: 'APP ID', volcengineAccessKeyLabel: 'Access Token', @@ -1000,6 +1001,9 @@ export const zhTW: typeof zhCN = { modelDir: '模型目錄', revealDir: '開啟目錄', deleteConfirm: '確定刪除 {{name}} 的本地模型檔案嗎?此操作無法復原。', + appleSpeechTitle: 'Apple 語音辨識(macOS)', + appleSpeechDesc: '使用 macOS 系統內建的語音辨識在本地將語音轉成文字:無需下載模型、無需 API Key、無需連網。雲端 ASR 網路不穩時的零憑證本地後備。首次使用會跳出系統語音辨識授權。', + appleSpeechUse: '使用 Apple 語音', qwenTitle: 'Qwen3-ASR 模型管理', qwenExperimentalBadge: '實驗性', engineUnavailable: '當前平臺暫未集成 Qwen3-ASR 推理引擎。可下載模型,但暫時無法啟用 Qwen3-ASR。', diff --git a/openless-all/app/src/pages/LocalAsr.tsx b/openless-all/app/src/pages/LocalAsr.tsx index 000bd83a..5c5624a8 100644 --- a/openless-all/app/src/pages/LocalAsr.tsx +++ b/openless-all/app/src/pages/LocalAsr.tsx @@ -741,6 +741,23 @@ export function LocalAsr({ embedded = false }: LocalAsrProps = {}) { } } + // Apple Speech(macOS 系统语音识别):无模型下载、无凭据,只需把 active + // provider 切到 "apple-speech"。复用 setActiveAsrProvider IPC(后端持久化), + // 再 updatePrefs 同步本地受控状态。 + const handleUseAppleSpeech = async () => { + try { + setError(null) + await setActiveAsrProvider("apple-speech") + await updatePrefs((current) => + current.activeAsrProvider === "apple-speech" + ? current + : { ...current, activeAsrProvider: "apple-speech" }, + ) + } catch (e) { + setError(e instanceof Error ? e.message : String(e)) + } + } + const applyModelsBaseDir = async (modelsBaseDir: string | null) => { setStorageBusy(true) try { @@ -1428,6 +1445,7 @@ export function LocalAsr({ embedded = false }: LocalAsrProps = {}) { sherpaStatus?.available === true || (foundryPlatformAvailable && sherpaStatus?.available !== false) const sherpaDefault = prefs?.activeAsrProvider === "sherpa-onnx-local" + const appleSpeechActive = prefs?.activeAsrProvider === "apple-speech" const selectedSherpaModel = SHERPA_ONNX_ASR_MODELS.find( (model) => model.alias === selectedSherpaAlias, @@ -2800,6 +2818,66 @@ export function LocalAsr({ embedded = false }: LocalAsrProps = {}) { ))} )} + + {/* Apple Speech(macOS 系统语音识别):无下载、无凭据,零网络兜底。 + issue #574。和 Qwen3 模型行平级摆一张卡片即可。 */} + {IS_MAC && ( + +
+
+
+
+ {t("localAsr.appleSpeechTitle")} +
+ {appleSpeechActive && ( + + {t("localAsr.activeBadge")} + + )} +
+
+ {t("localAsr.appleSpeechDesc")} +
+
+ void handleUseAppleSpeech()} + > + {appleSpeechActive + ? t("localAsr.activeBadge") + : t("localAsr.appleSpeechUse")} + +
+
+ )} ) } diff --git a/openless-all/app/src/pages/Overview.tsx b/openless-all/app/src/pages/Overview.tsx index 118ef703..84be5e92 100644 --- a/openless-all/app/src/pages/Overview.tsx +++ b/openless-all/app/src/pages/Overview.tsx @@ -35,6 +35,7 @@ const ASR_NAME_KEY_BY_ID: Record = { 'foundry-local-whisper': 'asrFoundryLocalWhisper', 'sherpa-onnx-local': 'asrSherpaOnnxLocal', 'local-qwen3': 'asrLocalQwen3', + 'apple-speech': 'asrAppleSpeech', }; const LLM_NAME_KEY_BY_ID: Record = { diff --git a/openless-all/app/src/pages/settings/ProvidersSection.tsx b/openless-all/app/src/pages/settings/ProvidersSection.tsx index e9d453a2..cd691585 100644 --- a/openless-all/app/src/pages/settings/ProvidersSection.tsx +++ b/openless-all/app/src/pages/settings/ProvidersSection.tsx @@ -166,6 +166,8 @@ const ASR_PRESETS: ReadonlyArray<{ id: AsrPresetId; nameKey: string; baseUrl: st // 模型在「高级 → 本地模型」里下载与切换。 { id: 'sherpa-onnx-local', nameKey: 'asrSherpaOnnxLocal', baseUrl: '', model: '' }, { id: 'local-qwen3', nameKey: 'asrLocalQwen3', baseUrl: '', model: '' }, + // Apple 系统语音识别(macOS):无 baseUrl/model、无下载、无凭据。 + { id: 'apple-speech', nameKey: 'asrAppleSpeech', baseUrl: '', model: '' }, ]; export function ProvidersSection() { @@ -188,11 +190,12 @@ export function ProvidersSection() { const [asrModelRevision, setAsrModelRevision] = useState(0); const os = detectOS(); // 主 ASR 下拉只列云端选项;本地推理(local-qwen3 / foundry-local-whisper / - // sherpa-onnx-local)移到「高级 → 本地模型」,防止新手误开 CPU 推理。 + // sherpa-onnx-local / apple-speech)移到「高级 → 本地模型」,防止新手误开 CPU 推理。 const visibleAsrPresets = ASR_PRESETS.filter( p => p.id !== 'foundry-local-whisper' && p.id !== 'local-qwen3' - && p.id !== 'sherpa-onnx-local', + && p.id !== 'sherpa-onnx-local' + && p.id !== 'apple-speech', ); useEffect(() => { @@ -381,7 +384,8 @@ export function ProvidersSection() { const isLocked = committedAsrProvider === 'local-qwen3' || committedAsrProvider === 'foundry-local-whisper' || - committedAsrProvider === 'sherpa-onnx-local'; + committedAsrProvider === 'sherpa-onnx-local' || + committedAsrProvider === 'apple-speech'; const selectedValue: AsrPresetId = isLocked ? committedAsrProvider : asrProvider; // 跨机器同步异常兜底:committed 是本地但不在 visibleAsrPresets 里时,受控 // select 会回退到首项造成假象 —— 补一个 disabled option 让 select 找到当前值。 @@ -395,7 +399,9 @@ export function ProvidersSection() { ? 'asrFoundryLocalWhisper' : anomalousLocal === 'sherpa-onnx-local' ? 'asrSherpaOnnxLocal' - : null; + : anomalousLocal === 'apple-speech' + ? 'asrAppleSpeech' + : null; return (
- ) : committedAsrProvider === 'local-qwen3' || committedAsrProvider === 'foundry-local-whisper' || committedAsrProvider === 'sherpa-onnx-local' ? ( + ) : committedAsrProvider === 'local-qwen3' || committedAsrProvider === 'foundry-local-whisper' || committedAsrProvider === 'sherpa-onnx-local' || committedAsrProvider === 'apple-speech' ? ( // 用户已经在用本地 ASR——dropdown 行的 asrProviderTakenOver 已经把 // "在高级中切换或禁用"讲清楚了,body 不再重复。 // 模型管理 UI 唯一入口在「高级 → 本地模型」里的 。 diff --git a/openless-all/app/src/pages/settings/shared.tsx b/openless-all/app/src/pages/settings/shared.tsx index b6abe4aa..83192d7c 100644 --- a/openless-all/app/src/pages/settings/shared.tsx +++ b/openless-all/app/src/pages/settings/shared.tsx @@ -153,3 +153,4 @@ export type AsrPresetId = | "foundry-local-whisper" | "sherpa-onnx-local" | "local-qwen3" + | "apple-speech"