Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 189 additions & 0 deletions samples/js/live-audio-transcription-example/app.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// Live Audio Transcription Example — Foundry Local JS SDK
//
// Demonstrates real-time microphone-to-text using the JS SDK.
// Requires: npm install foundry-local-sdk naudiodon2
//
// Usage: node app.js

import { FoundryLocalManager } from 'foundry-local-sdk';

console.log('╔══════════════════════════════════════════════════════════╗');
console.log('║ Foundry Local — Live Audio Transcription (JS SDK) ║');
console.log('╚══════════════════════════════════════════════════════════╝');
console.log();

// Initialize the Foundry Local SDK
console.log('Initializing Foundry Local SDK...');
const manager = FoundryLocalManager.create({
appName: 'foundry_local_live_audio',
logLevel: 'info'
});
console.log('✓ SDK initialized');

// Get and load the nemotron model
const modelAlias = 'nemotron';
let model = await manager.catalog.getModel(modelAlias);
if (!model) {
console.error(`ERROR: Model "${modelAlias}" not found in catalog.`);
process.exit(1);
}

console.log(`Found model: ${model.id}`);
console.log('Downloading model (if needed)...');
await model.download((progress) => {
process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
});
console.log('\n✓ Model downloaded');

console.log('Loading model...');
await model.load();
console.log('✓ Model loaded');

// Create live transcription session (same pattern as C# sample).
const audioClient = model.createAudioClient();
const session = audioClient.createLiveTranscriptionSession();

session.settings.sampleRate = 16000; // Default is 16000; shown here for clarity
session.settings.channels = 1;
session.settings.bitsPerSample = 16;
session.settings.language = 'en';

console.log('Starting streaming session...');
await session.start();
console.log('✓ Session started');

// Read transcription results in background
const readPromise = (async () => {
try {
for await (const result of session.getTranscriptionStream()) {
const text = result.content?.[0]?.text;
if (!text) continue;

// `is_final` is a transcript-state marker only. It should not stop the app.
if (result.is_final) {
process.stdout.write(`\n [FINAL] ${text}\n`);
} else {
process.stdout.write(text);
}
}
} catch (err) {
if (err.name !== 'AbortError') {
console.error('Stream error:', err.message);
}
}
})();

// --- Microphone capture ---
// This example uses naudiodon2 for cross-platform audio capture.
// Install with: npm install naudiodon2
//
// If you prefer a different audio library, just push PCM bytes
// (16-bit signed LE, mono, 16kHz) via session.append().

let audioInput;
try {
const { default: portAudio } = await import('naudiodon2');

audioInput = portAudio.AudioIO({
inOptions: {
channelCount: session.settings.channels,
sampleFormat: session.settings.bitsPerSample === 16
? portAudio.SampleFormat16Bit
: portAudio.SampleFormat32Bit,
sampleRate: session.settings.sampleRate,
// Larger chunk size lowers callback frequency and reduces overflow risk.
framesPerBuffer: 3200,
// Allow deeper native queue during occasional event-loop stalls.
maxQueue: 64
}
});

const appendQueue = [];
let pumping = false;
let warnedQueueDrop = false;

const pumpAudio = async () => {
if (pumping) return;
pumping = true;
try {
while (appendQueue.length > 0) {
const pcm = appendQueue.shift();
await session.append(pcm);
}
} catch (err) {
console.error('append error:', err.message);
} finally {
pumping = false;
// Handle race where new data arrived after loop exit.
if (appendQueue.length > 0) {
void pumpAudio();
}
}
};

audioInput.on('data', (buffer) => {
// Single copy: slice the underlying ArrayBuffer to get an independent Uint8Array.
const copy = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength).slice();

// Keep a bounded queue to avoid unbounded memory growth.
if (appendQueue.length >= 100) {
appendQueue.shift();
if (!warnedQueueDrop) {
warnedQueueDrop = true;
console.warn('Audio append queue overflow; dropping oldest chunk to keep stream alive.');
}
}

appendQueue.push(copy);
void pumpAudio();
});

console.log();
console.log('════════════════════════════════════════════════════════════');
console.log(' LIVE TRANSCRIPTION ACTIVE');
console.log(' Speak into your microphone.');
console.log(' Press Ctrl+C to stop.');
console.log('════════════════════════════════════════════════════════════');
console.log();

audioInput.start();
} catch (err) {
console.warn('⚠ Could not initialize microphone (naudiodon2 may not be installed).');
console.warn(' Install with: npm install naudiodon2');
console.warn(' Falling back to synthetic audio test...');
console.warn();

// Fallback: push 2 seconds of synthetic PCM (440Hz sine wave)
const sampleRate = session.settings.sampleRate;
const duration = 2;
const totalSamples = sampleRate * duration;
const pcmBytes = new Uint8Array(totalSamples * 2);
for (let i = 0; i < totalSamples; i++) {
const t = i / sampleRate;
const sample = Math.round(32767 * 0.5 * Math.sin(2 * Math.PI * 440 * t));
pcmBytes[i * 2] = sample & 0xFF;
pcmBytes[i * 2 + 1] = (sample >> 8) & 0xFF;
}

// Push in 100ms chunks
const chunkSize = (sampleRate / 10) * 2;
for (let offset = 0; offset < pcmBytes.length; offset += chunkSize) {
const len = Math.min(chunkSize, pcmBytes.length - offset);
await session.append(pcmBytes.slice(offset, offset + len));
}

console.log('✓ Synthetic audio pushed');
}

// Handle graceful shutdown
process.on('SIGINT', async () => {
console.log('\n\nStopping...');
if (audioInput) {
audioInput.quit();
}
await session.stop();
await readPromise;
await model.unload();
console.log('✓ Done');
process.exit(0);
});
Loading