microsoft · rui-ren · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/samples/rust/Cargo.toml b/samples/rust/Cargo.toml
@@ -4,6 +4,7 @@ members = [
     "tool-calling-foundry-local",
     "native-chat-completions",
     "audio-transcription-example",
+    "live-audio-transcription-example",
     "tutorial-chat-assistant",
     "tutorial-document-summarizer",
     "tutorial-tool-calling",

diff --git a/samples/rust/live-audio-transcription-example/Cargo.toml b/samples/rust/live-audio-transcription-example/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "live-audio-transcription-example"
+version = "0.1.0"
+edition = "2024"
+description = "Live audio transcription (streaming) example using the Foundry Local Rust SDK"
+
+[dependencies]
+foundry-local-sdk = { path = "../../../sdk/rust" }
+tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
+tokio-stream = "0.1"
+cpal = "0.15"
diff --git a/samples/rust/live-audio-transcription-example/src/main.rs b/samples/rust/live-audio-transcription-example/src/main.rs
@@ -0,0 +1,292 @@
+// Live Audio Transcription — Foundry Local Rust SDK Example
+//
+// Demonstrates real-time microphone-to-text using:
+//   Microphone (cpal) → SDK → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor)
+//
+// Usage:
+//   cargo run              # Live microphone transcription (press ENTER to stop)
+//   cargo run -- --synth   # Use synthetic 440Hz sine wave instead of microphone
+
+use std::env;
+use std::io::{self, Write};
+use std::sync::Arc;
+
+use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
+use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
+use tokio_stream::StreamExt;
+
+const ALIAS: &str = "nemotron";
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let use_synth = env::args().any(|a| a == "--synth");
+
+    println!("===========================================================");
+    println!("   Foundry Local -- Live Audio Transcription Demo (Rust)");
+    println!("===========================================================");
+    println!();
+
+    // ── 1. Resolve e2e-test-pkgs path ────────────────────────────────────
+    let exe_dir = env::current_exe()?.parent().unwrap().to_path_buf();
+
+    let manifest_dir = env!("CARGO_MANIFEST_DIR");
+    let e2e_pkgs = std::path::PathBuf::from(manifest_dir)
+        .join("..")
+        .join("e2e-test-pkgs");
+
+    let (core_path, model_cache_dir) = if e2e_pkgs.exists() {
+        let core = e2e_pkgs
+            .canonicalize()
+            .expect("Failed to canonicalize e2e-test-pkgs path");
+        let models = core.join("models");
+        println!("Using e2e-test-pkgs:");
+        println!("  Core DLLs: {}", core.display());
+        println!("  Models:    {}", models.display());
+        (
+            core.to_string_lossy().into_owned(),
+            models.to_string_lossy().into_owned(),
+        )
+    } else {
+        println!("Using default paths (exe directory)");
+        (
+            exe_dir.to_string_lossy().into_owned(),
+            exe_dir.join("models").to_string_lossy().into_owned(),
+        )
+    };
+
+    // ── 2. Initialise the manager ────────────────────────────────────────
+    let config = FoundryLocalConfig::new("foundry_local_samples")
+        .library_path(&core_path)
+        .model_cache_dir(&model_cache_dir)
+        .additional_setting("Bootstrap", "false");
+
+    let manager = FoundryLocalManager::create(config)?;
+    println!("✓ FoundryLocalManager initialized\n");
+
+    // ── 3. Get the nemotron model ────────────────────────────────────────
+    let model = manager.catalog().get_model(ALIAS).await?;
+    println!("Model: {} (id: {})", model.alias(), model.id());
+
+    if !model.is_cached().await? {
+        println!("Downloading model...");
+        model
+            .download(Some(|progress: f64| {
+                print!("\r  {progress:.1}%");
+                io::stdout().flush().ok();
+            }))
+            .await?;
+        println!();
+    }
+
+    println!("Loading model...");
+    model.load().await?;
+    println!("✓ Model loaded\n");
+
+    // ── 4. Create live transcription session ─────────────────────────────
+    let audio_client = model.create_audio_client();
+    let session = Arc::new(audio_client.create_live_transcription_session());
+
+    println!("Starting live transcription session...");
+    session.start(None).await?;
+    println!("✓ Session started\n");
+
+    // ── 5. Start reading transcription results in background ─────────────
+    let mut stream = session.get_transcription_stream().await?;
+    let read_task = tokio::spawn(async move {
+        let mut count = 0usize;
+        while let Some(result) = stream.next().await {
+            match result {
+                Ok(r) => {
+                    let text = &r.content[0].text;
+                    if r.is_final {
+                        println!();
+                        println!("  [FINAL] {text}");
+                        io::stdout().flush().ok();
+                    } else if !text.is_empty() {
+                        print!("{text}");
+                        io::stdout().flush().ok();
+                    }
+                    count += 1;
+                }
+                Err(e) => {
+                    eprintln!("\n  [ERROR] Stream error: {e}");
+                    break;
+                }
+            }
+        }
+        count
+    });
+
+    if use_synth {
+        // ── 6a. Synthetic audio mode ─────────────────────────────────────
+        println!("Generating synthetic PCM audio (440Hz sine wave, 3 seconds)...\n");
+
+        println!("===========================================================");
+        println!("  PUSHING AUDIO → SDK → Core → onnxruntime-genai");
+        println!("===========================================================\n");
+
+        let pcm_data = generate_sine_wave_pcm(16000, 3, 440.0);
+        let chunk_size = 16000 / 10 * 2; // 100ms chunks
+        let mut chunks_pushed = 0;
+        for offset in (0..pcm_data.len()).step_by(chunk_size) {
+            let end = std::cmp::min(offset + chunk_size, pcm_data.len());
+            session.append(&pcm_data[offset..end], None).await?;
+            chunks_pushed += 1;
+        }
+        println!("Pushed {chunks_pushed} chunks ({} bytes)", pcm_data.len());
+    } else {
+        // ── 6b. Live microphone mode ─────────────────────────────────────
+        let host = cpal::default_host();
+        let device = host
+            .default_input_device()
+            .expect("No input audio device available");
+        println!("Microphone: {}", device.name().unwrap_or_default());
+
+        let default_config = device.default_input_config()?;
+        println!(
+            "Device default: {} Hz, {} ch, {:?}",
+            default_config.sample_rate().0,
+            default_config.channels(),
+            default_config.sample_format()
+        );
+
+        let device_rate = default_config.sample_rate().0;
+        let device_channels = default_config.channels();
+        // BufferSize::Default lets the OS/driver choose the optimal buffer
+        // size for the device, typically ~10ms worth of samples.
+        let mic_config: cpal::StreamConfig = default_config.into();
+
+        // Use a sync channel to forward audio from the cpal callback thread
+        // to the async runtime. This avoids Arc-cloning the session and
+        // spawning a tokio task per mic callback.
+        let (audio_tx, mut audio_rx) = tokio::sync::mpsc::channel::<Vec<u8>>(100);
+
+        let input_stream = device.build_input_stream(
+            &mic_config,
+            move |data: &[f32], _: &cpal::InputCallbackInfo| {
+                let bytes = convert_audio(data, device_channels, device_rate);
+                if !bytes.is_empty() {
+                    let _ = audio_tx.try_send(bytes);
+                }
+            },
+            |err| eprintln!("Microphone stream error: {err}"),
+            None,
+        )?;
+
+        input_stream.play()?;
+
+        println!();
+        println!("===========================================================");
+        println!("  LIVE TRANSCRIPTION ACTIVE");
+        println!("  Speak into your microphone.");
+        println!("  Transcription appears in real-time.");
+        println!("  Press ENTER to stop recording.");
+        println!("===========================================================");
+        println!();
+
+        // Forward audio from channel to the SDK session in a background task
+        let session_for_forward = Arc::clone(&session);
+        let forward_task = tokio::spawn(async move {
+            while let Some(bytes) = audio_rx.recv().await {
+                if let Err(e) = session_for_forward.append(&bytes, None).await {
+                    eprintln!("Append error: {e}");
+                    break;
+                }
+            }
+        });
+
+        // Block until user presses ENTER
+        let mut line = String::new();
+        io::stdin().read_line(&mut line)?;
+
+        drop(input_stream);
+        // Close the channel so forward_task exits
+        // (input_stream drop closes cpal → callback stops → audio_tx dropped)
+        forward_task.await?;
+        println!("Microphone stopped.");
+    }
+
+    // ── 7. Stop session and wait for results ─────────────────────────────
+    println!("\nStopping session (flushing remaining audio)...");
+    session.stop(None).await?;
+    println!("✓ Session stopped\n");
+
+    let result_count = read_task.await?;
+
+    println!("===========================================================");
+    println!("  Total transcription results: {result_count}");
+    println!("===========================================================");
+
+    // ── 8. Cleanup ───────────────────────────────────────────────────────
+    println!("\nUnloading model...");
+    model.unload().await?;
+    println!("Done.");
+
+    Ok(())
+}
+
+/// Convert raw f32 audio samples to 16kHz/mono/16-bit PCM bytes.
+///
+/// Handles stereo-to-mono mixing and sample rate conversion.
+fn convert_audio(data: &[f32], channels: u16, sample_rate: u32) -> Vec<u8> {
+    // Mix to mono if multi-channel
+    let mono: Vec<f32> = if channels > 1 {
+        data.chunks(channels as usize)
+            .map(|frame| frame.iter().sum::<f32>() / channels as f32)
+            .collect()
+    } else {
+        data.to_vec()
+    };
+
+    // Resample to 16kHz if needed
+    let resampled = if sample_rate != 16000 {
+        resample(&mono, sample_rate, 16000)
+    } else {
+        mono
+    };
+
+    // Convert f32 → 16-bit signed little-endian bytes
+    let mut bytes = Vec::with_capacity(resampled.len() * 2);
+    for &s in &resampled {
+        let clamped = s.clamp(-1.0, 1.0);
+        let sample = (clamped * i16::MAX as f32) as i16;
+        bytes.extend_from_slice(&sample.to_le_bytes());
+    }
+    bytes
+}
+
+/// Generate synthetic PCM audio (sine wave, 16kHz, 16-bit signed little-endian, mono).
+fn generate_sine_wave_pcm(sample_rate: i32, duration_seconds: i32, frequency: f64) -> Vec<u8> {
+    let total_samples = (sample_rate * duration_seconds) as usize;
+    let mut pcm_bytes = vec![0u8; total_samples * 2];
+
+    for i in 0..total_samples {
+        let t = i as f64 / sample_rate as f64;
+        let sample =
+            (i16::MAX as f64 * 0.5 * (2.0 * std::f64::consts::PI * frequency * t).sin()) as i16;
+        let bytes = sample.to_le_bytes();
+        pcm_bytes[i * 2] = bytes[0];
+        pcm_bytes[i * 2 + 1] = bytes[1];
+    }
+
+    pcm_bytes
+}
+
+/// Simple linear-interpolation resampler (e.g. 48kHz → 16kHz).
+fn resample(input: &[f32], from_rate: u32, to_rate: u32) -> Vec<f32> {
+    if from_rate == to_rate {
+        return input.to_vec();
+    }
+    let ratio = from_rate as f64 / to_rate as f64;
+    let out_len = (input.len() as f64 / ratio).ceil() as usize;
+    let mut output = Vec::with_capacity(out_len);
+    for i in 0..out_len {
+        let src_idx = i as f64 * ratio;
+        let idx = src_idx as usize;
+        let frac = src_idx - idx as f64;
+        let s0 = input[idx.min(input.len() - 1)];
+        let s1 = input[(idx + 1).min(input.len() - 1)];
+        output.push(s0 + (s1 - s0) * frac as f32);
+    }
+    output
+}
diff --git a/sdk/rust/Cargo.toml b/sdk/rust/Cargo.toml
@@ -21,6 +21,7 @@ serde_json = "1"
 thiserror = "2"
 tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] }
 tokio-stream = "0.1"
+tokio-util = "0.7"
 futures-core = "0.3"
 reqwest = { version = "0.12", features = ["json"] }
 urlencoding = "2"