Files
clientsflow/frontend/server/utils/whisper.ts
Ruslan Bakiev fe4bd59248 refactor(whisper): use shared whisper service instead of local model
Replace @xenova/transformers in-process model with HTTP calls to shared
whisper-asr-webservice container (http://whisper:9000). Converts PCM16
samples to WAV and sends to /asr endpoint. Env: WHISPER_URL.
2026-03-07 10:51:40 +07:00

70 lines
2.2 KiB
TypeScript

type WhisperTranscribeInput = {
samples: Float32Array;
sampleRate: number;
language?: string;
};
function getWhisperUrl() {
return (process.env.WHISPER_URL ?? "http://whisper:9000").replace(/\/+$/, "");
}
function getWhisperLanguage() {
const value = (process.env.CF_WHISPER_LANGUAGE ?? "ru").trim();
return value || "ru";
}
function pcmToWav(samples: Float32Array, sampleRate: number): Buffer {
const numChannels = 1;
const bitsPerSample = 16;
const byteRate = sampleRate * numChannels * (bitsPerSample / 8);
const blockAlign = numChannels * (bitsPerSample / 8);
const dataSize = samples.length * (bitsPerSample / 8);
const headerSize = 44;
const buffer = Buffer.alloc(headerSize + dataSize);
// RIFF header
buffer.write("RIFF", 0);
buffer.writeUInt32LE(36 + dataSize, 4);
buffer.write("WAVE", 8);
// fmt chunk
buffer.write("fmt ", 12);
buffer.writeUInt32LE(16, 16);
buffer.writeUInt16LE(1, 20); // PCM
buffer.writeUInt16LE(numChannels, 22);
buffer.writeUInt32LE(sampleRate, 24);
buffer.writeUInt32LE(byteRate, 28);
buffer.writeUInt16LE(blockAlign, 32);
buffer.writeUInt16LE(bitsPerSample, 34);
// data chunk
buffer.write("data", 36);
buffer.writeUInt32LE(dataSize, 40);
for (let i = 0; i < samples.length; i += 1) {
const s = Math.max(-1, Math.min(1, samples[i] ?? 0));
const val = s < 0 ? s * 0x8000 : s * 0x7fff;
buffer.writeInt16LE(Math.round(val), headerSize + i * 2);
}
return buffer;
}
export async function transcribeWithWhisper(input: WhisperTranscribeInput) {
const wav = pcmToWav(input.samples, input.sampleRate);
const language = (input.language ?? getWhisperLanguage()) || "ru";
const url = `${getWhisperUrl()}/asr?task=transcribe&language=${language}&output=json`;
const formData = new FormData();
formData.append("audio_file", new Blob([wav], { type: "audio/wav" }), "audio.wav");
const response = await fetch(url, { method: "POST", body: formData });
if (!response.ok) {
const detail = await response.text().catch(() => "");
throw new Error(`Whisper service error ${response.status}: ${detail}`);
}
const result = (await response.json()) as { text?: string };
return String(result?.text ?? "").trim();
}