type WhisperTranscribeInput = { samples: Float32Array; sampleRate: number; language?: string; }; function getWhisperUrl() { return (process.env.WHISPER_URL ?? "http://whisper:9000").replace(/\/+$/, ""); } function getWhisperLanguage() { const value = (process.env.CF_WHISPER_LANGUAGE ?? "ru").trim(); return value || "ru"; } function pcmToWav(samples: Float32Array, sampleRate: number): Buffer { const numChannels = 1; const bitsPerSample = 16; const byteRate = sampleRate * numChannels * (bitsPerSample / 8); const blockAlign = numChannels * (bitsPerSample / 8); const dataSize = samples.length * (bitsPerSample / 8); const headerSize = 44; const buffer = Buffer.alloc(headerSize + dataSize); // RIFF header buffer.write("RIFF", 0); buffer.writeUInt32LE(36 + dataSize, 4); buffer.write("WAVE", 8); // fmt chunk buffer.write("fmt ", 12); buffer.writeUInt32LE(16, 16); buffer.writeUInt16LE(1, 20); // PCM buffer.writeUInt16LE(numChannels, 22); buffer.writeUInt32LE(sampleRate, 24); buffer.writeUInt32LE(byteRate, 28); buffer.writeUInt16LE(blockAlign, 32); buffer.writeUInt16LE(bitsPerSample, 34); // data chunk buffer.write("data", 36); buffer.writeUInt32LE(dataSize, 40); for (let i = 0; i < samples.length; i += 1) { const s = Math.max(-1, Math.min(1, samples[i] ?? 0)); const val = s < 0 ? s * 0x8000 : s * 0x7fff; buffer.writeInt16LE(Math.round(val), headerSize + i * 2); } return buffer; } export async function transcribeWithWhisper(input: WhisperTranscribeInput) { const wav = pcmToWav(input.samples, input.sampleRate); const language = (input.language ?? getWhisperLanguage()) || "ru"; const url = `${getWhisperUrl()}/asr?task=transcribe&language=${language}&output=json`; const formData = new FormData(); formData.append("audio_file", new Blob([wav], { type: "audio/wav" }), "audio.wav"); const response = await fetch(url, { method: "POST", body: formData }); if (!response.ok) { const detail = await response.text().catch(() => ""); throw new Error(`Whisper service error ${response.status}: ${detail}`); } const result = (await response.json()) as { text?: string }; return String(result?.text ?? "").trim(); }