Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions openless-all/app/src-tauri/src/asr/mimo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ impl MimoBatchASR {
result
}

/// #613: 从外部 PCM 数据直接转写(不经过内部 buffer)。用于历史重转录场景。
pub async fn transcribe_pcm(&self, pcm: &[u8]) -> Result<RawTranscript> {
self.transcribe_inner(pcm).await
}

async fn transcribe_inner(&self, pcm: &[u8]) -> Result<RawTranscript> {
if self.api_key.trim().is_empty() {
anyhow::bail!("MiMo API key missing");
Expand Down
58 changes: 58 additions & 0 deletions openless-all/app/src-tauri/src/asr/wav.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,63 @@
//! WAV helpers for ASR providers that accept complete audio files.

/// Decode a RIFF WAV file to 16-bit PCM samples. Returns `Err` if the WAV header is
/// invalid, the format is not 16-bit mono PCM, or the sample rate is not supported.
/// Used by retranscribe to extract raw PCM from archived recording files.
pub fn decode_wav_to_pcm_i16(wav_bytes: &[u8]) -> Result<Vec<i16>, String> {
if wav_bytes.len() < 44 {
return Err("wav too short for valid header".into());
}
if &wav_bytes[0..4] != b"RIFF" || &wav_bytes[8..12] != b"WAVE" {
return Err("not a valid RIFF WAV file".into());
}
if &wav_bytes[12..16] != b"fmt " {
return Err("missing fmt chunk".into());
}
let audio_format = u16::from_le_bytes([wav_bytes[20], wav_bytes[21]]);
if audio_format != 1 {
return Err(format!("unsupported audio format {audio_format} (expected PCM=1)"));
}
let num_channels = u16::from_le_bytes([wav_bytes[22], wav_bytes[23]]);
let sample_rate = u32::from_le_bytes([wav_bytes[24], wav_bytes[25], wav_bytes[26], wav_bytes[27]]);
let bits_per_sample = u16::from_le_bytes([wav_bytes[34], wav_bytes[35]]);
if num_channels != 1 || bits_per_sample != 16 {
return Err(format!(
"expected mono 16-bit PCM, got {num_channels}ch {bits_per_sample}-bit"
));
}
// Accept 8k/16k/48k; resampling not needed for most ASR APIs (they handle it server-side).
if sample_rate != 8000 && sample_rate != 16_000 && sample_rate != 44_100 && sample_rate != 48_000 {
log::warn!("[wav] unusual sample rate {sample_rate} Hz — ASR may reject");
}
// Find the data chunk (skip past fmt chunk).
let mut offset = 36;
while offset + 8 <= wav_bytes.len() {
let chunk_id = &wav_bytes[offset..offset + 4];
let chunk_size = u32::from_le_bytes([
wav_bytes[offset + 4],
wav_bytes[offset + 5],
wav_bytes[offset + 6],
wav_bytes[offset + 7],
]) as usize;
if chunk_id == b"data" {
let data_start = offset + 8;
let data_end = (data_start + chunk_size).min(wav_bytes.len());
let pcm_bytes = &wav_bytes[data_start..data_end];
let samples: Vec<i16> = pcm_bytes
.chunks_exact(2)
.map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]]))
.collect();
return Ok(samples);
}
offset += 8 + chunk_size;
// Align to 2-byte boundary as per WAV spec.
if chunk_size % 2 != 0 {
offset += 1;
}
}
Err("no data chunk found in WAV".into())
}

/// Encode 16 kHz / mono / 16-bit little-endian PCM samples as a RIFF WAV file.
pub fn encode_wav_16k_mono(samples: &[i16]) -> Vec<u8> {
let sample_rate: u32 = 16_000;
Expand Down
5 changes: 5 additions & 0 deletions openless-all/app/src-tauri/src/asr/whisper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ impl WhisperBatchASR {
result
}

/// #613: 从外部 PCM 数据直接转写(不经过内部 buffer)。用于历史重转录场景。
pub async fn transcribe_pcm(&self, pcm: &[u8]) -> Result<RawTranscript> {
self.transcribe_inner(pcm).await
}

async fn transcribe_inner(&self, pcm: &[u8]) -> Result<RawTranscript> {
if self.api_key.is_empty() {
anyhow::bail!("Whisper API key missing");
Expand Down
97 changes: 97 additions & 0 deletions openless-all/app/src-tauri/src/commands/history.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,103 @@ pub fn clear_history(coord: CoordinatorState<'_>) -> Result<(), String> {
coord.history().clear().map_err(|e| e.to_string())
}

/// #613: 对一条 ASR 转录失败的历史条目,用归档的 WAV 文件重新转写。
///
/// 工作流:
/// 1. 校验 session_id(UUID-v4 白名单)
/// 2. 从 history JSON 查找原条目
/// 3. 读取归档 WAV → 解码为 PCM
/// 4. 按当前 `active_asr_provider` 构造 ASR 并转写
/// 5. 成功 → 更新历史条目(rawTranscript + 清除 errorCode);失败 → 返回错误,原条目不修改
///
/// 目前支持的 ASR 提供商:
/// - Whisper / MiMo(HTTP batch)— 直接 transcribe PCM
/// - 其他(Volcengine/Bailian/本地模型)— 暂时返回 "unsupported provider" 错误
#[tauri::command]
pub async fn retranscribe_history_entry(
session_id: String,
) -> Result<DictationSession, String> {
if !is_valid_session_id(&session_id) {
return Err("invalid session id".into());
}

// Read WAV file — use non-CoordinatorState path (standalone command)
let wav_path = crate::persistence::recording_path_for_session(&session_id)
.map_err(|e| e.to_string())?;
if !wav_path.exists() {
return Err("recording not found".into());
}
let wav_bytes = tokio::fs::read(&wav_path).await.map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
"recording not found".into()
} else {
format!("read wav failed: {e}")
}
})?;

// Decode WAV → PCM bytes (16-bit little-endian interleaved)
let pcm_samples =
crate::asr::wav::decode_wav_to_pcm_i16(&wav_bytes)?;
let pcm_bytes: Vec<u8> = pcm_samples
.iter()
.flat_map(|s| s.to_le_bytes())
.collect();

// Determine ASR provider from prefs
let prefs = crate::persistence::PreferencesStore::new()
.map_err(|e| e.to_string())?
.get();
let provider = &prefs.active_asr_provider;

let raw = transcribe_pcm_from_wav(&pcm_bytes, provider).await?;

// Update the history entry
let history = crate::persistence::HistoryStore::new().map_err(|e| e.to_string())?;
let Some(mut entry) = history.find_entry(&session_id).map_err(|e| e.to_string())? else {
return Err("history entry not found".into());
};
entry.raw_transcript = raw.text;
entry.error_code = None;
history
.update_entry(&session_id, entry.clone())
.map_err(|e| e.to_string())?;

Ok(entry)
}

/// Core transcription engine dispatch: pick the right ASR provider based on the
/// `active_asr_provider` string and call its batch transcription method with raw PCM.
async fn transcribe_pcm_from_wav(
pcm: &[u8],
provider: &str,
) -> Result<crate::asr::RawTranscript, String> {
match provider {
"whisper" => {
let creds = crate::coordinator::read_whisper_credentials();
let asr = crate::asr::WhisperBatchASR::new(
creds.0,
creds.1,
creds.2,
None, // prompt: retranscribe uses None — hotword context unavailable
None, // no chunk limit
false, // verbose_json: false for retranscribe
);
asr.transcribe_pcm(pcm).await.map_err(|e| e.to_string())
}
"mimo" => {
let creds = crate::coordinator::read_mimo_credentials();
let asr = crate::asr::MimoBatchASR::new(creds.0, creds.1, creds.2);
asr.transcribe_pcm(pcm).await.map_err(|e| e.to_string())
}
// All other providers currently unsupported for file-based retranscription.
// See issue #613 discussion: Volcengine/Bailian use WebSocket streaming,
// local models need runtime access that isn't available from this standalone command.
_ => Err(format!(
"当前 ASR 提供商 \"{provider}\" 不支持文件重转录。请切换到 Whisper 或 MiMo 后重试。"
)),
}
}

/// 读取某次会话的原始麦克风 wav 字节流。仅当用户开过
/// `prefs.record_audio_for_debug` 并且这条 session 是开关打开后录的,才会有文件。
/// 文件名规约:`<data_dir>/recordings/<session_id>.wav`,与 DictationSession.id 同名。
Expand Down
2 changes: 2 additions & 0 deletions openless-all/app/src-tauri/src/coordinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ pub(crate) use capsule::*;
pub(crate) use dictation_end::*;
pub(crate) use dictation_session::*;
pub(crate) use dictation_streaming::*;
// #613: re-export credential readers for retranscribe_history_entry IPC command.
pub(crate) use llm_pipeline::{read_bailian_credentials, read_mimo_credentials, read_volc_credentials, read_whisper_credentials};
pub(crate) use dictation_voice_agent::*;
pub(crate) use hotkey_supervisors::*;
pub(crate) use ime_insertion::*;
Expand Down
Loading
Loading