refactor(voice): extract chat dictation into reusable component

This commit is contained in:
Ruslan Bakiev
2026-02-23 19:43:00 +07:00
parent c1e8f912d1
commit c5d3a90413
3 changed files with 282 additions and 210 deletions

View File

@@ -0,0 +1,92 @@
function getAudioContextCtor(): typeof AudioContext {
const Ctor = (window.AudioContext || (window as any).webkitAudioContext) as typeof AudioContext | undefined;
if (!Ctor) {
throw new Error("Web Audio API is not supported in this browser");
}
return Ctor;
}
function toMonoFloat32(buffer: AudioBuffer) {
if (buffer.numberOfChannels <= 1) return buffer.getChannelData(0).slice();
const length = buffer.length;
const output = new Float32Array(length);
for (let i = 0; i < length; i += 1) {
let sum = 0;
for (let ch = 0; ch < buffer.numberOfChannels; ch += 1) {
sum += buffer.getChannelData(ch)[i] ?? 0;
}
output[i] = sum / buffer.numberOfChannels;
}
return output;
}
function resampleFloat32Linear(input: Float32Array, fromRate: number, toRate: number) {
if (fromRate === toRate) return input;
const ratio = fromRate / toRate;
const outLength = Math.max(1, Math.round(input.length / ratio));
const out = new Float32Array(outLength);
for (let i = 0; i < outLength; i += 1) {
const src = i * ratio;
const left = Math.floor(src);
const right = Math.min(input.length - 1, left + 1);
const frac = src - left;
out[i] = (input[left] ?? 0) * (1 - frac) + (input[right] ?? 0) * frac;
}
return out;
}
function floatToPcm16Bytes(input: Float32Array) {
const out = new Uint8Array(input.length * 2);
const view = new DataView(out.buffer);
for (let i = 0; i < input.length; i += 1) {
const sample = Math.max(-1, Math.min(1, input[i] ?? 0));
const value = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
view.setInt16(i * 2, Math.round(value), true);
}
return out;
}
function bytesToBase64(bytes: Uint8Array) {
let binary = "";
const chunk = 0x8000;
for (let i = 0; i < bytes.length; i += chunk) {
binary += String.fromCharCode(...bytes.subarray(i, i + chunk));
}
return btoa(binary);
}
async function decodeAudioBlobToPcm16(blob: Blob) {
const AudioContextCtor = getAudioContextCtor();
const context = new AudioContextCtor();
try {
const arrayBuffer = await blob.arrayBuffer();
const decoded = await context.decodeAudioData(arrayBuffer);
const mono = toMonoFloat32(decoded);
const targetSampleRate = 16000;
const resampled = resampleFloat32Linear(mono, decoded.sampleRate, targetSampleRate);
const pcm16 = floatToPcm16Bytes(resampled);
return {
audioBase64: bytesToBase64(pcm16),
sampleRate: targetSampleRate,
};
} finally {
await context.close();
}
}
export function isVoiceCaptureSupported() {
if (typeof window === "undefined") return false;
if (typeof navigator === "undefined") return false;
return typeof MediaRecorder !== "undefined" && Boolean(navigator.mediaDevices?.getUserMedia);
}
export async function transcribeAudioBlob(blob: Blob) {
const payload = await decodeAudioBlobToPcm16(blob);
const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
method: "POST",
body: payload,
});
return String(result?.text ?? "").trim();
}