refactor(voice): extract chat dictation into reusable component

2026-02-23 19:43:00 +07:00
parent c1e8f912d1
commit c5d3a90413
3 changed files with 282 additions and 210 deletions
--- a/frontend/app/composables/useVoiceTranscription.ts
+++ b/frontend/app/composables/useVoiceTranscription.ts
@@ -0,0 +1,92 @@
+function getAudioContextCtor(): typeof AudioContext {
+  const Ctor = (window.AudioContext || (window as any).webkitAudioContext) as typeof AudioContext | undefined;
+  if (!Ctor) {
+    throw new Error("Web Audio API is not supported in this browser");
+  }
+  return Ctor;
+}
+
+function toMonoFloat32(buffer: AudioBuffer) {
+  if (buffer.numberOfChannels <= 1) return buffer.getChannelData(0).slice();
+
+  const length = buffer.length;
+  const output = new Float32Array(length);
+  for (let i = 0; i < length; i += 1) {
+    let sum = 0;
+    for (let ch = 0; ch < buffer.numberOfChannels; ch += 1) {
+      sum += buffer.getChannelData(ch)[i] ?? 0;
+    }
+    output[i] = sum / buffer.numberOfChannels;
+  }
+  return output;
+}
+
+function resampleFloat32Linear(input: Float32Array, fromRate: number, toRate: number) {
+  if (fromRate === toRate) return input;
+
+  const ratio = fromRate / toRate;
+  const outLength = Math.max(1, Math.round(input.length / ratio));
+  const out = new Float32Array(outLength);
+  for (let i = 0; i < outLength; i += 1) {
+    const src = i * ratio;
+    const left = Math.floor(src);
+    const right = Math.min(input.length - 1, left + 1);
+    const frac = src - left;
+    out[i] = (input[left] ?? 0) * (1 - frac) + (input[right] ?? 0) * frac;
+  }
+  return out;
+}
+
+function floatToPcm16Bytes(input: Float32Array) {
+  const out = new Uint8Array(input.length * 2);
+  const view = new DataView(out.buffer);
+  for (let i = 0; i < input.length; i += 1) {
+    const sample = Math.max(-1, Math.min(1, input[i] ?? 0));
+    const value = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
+    view.setInt16(i * 2, Math.round(value), true);
+  }
+  return out;
+}
+
+function bytesToBase64(bytes: Uint8Array) {
+  let binary = "";
+  const chunk = 0x8000;
+  for (let i = 0; i < bytes.length; i += chunk) {
+    binary += String.fromCharCode(...bytes.subarray(i, i + chunk));
+  }
+  return btoa(binary);
+}
+
+async function decodeAudioBlobToPcm16(blob: Blob) {
+  const AudioContextCtor = getAudioContextCtor();
+  const context = new AudioContextCtor();
+  try {
+    const arrayBuffer = await blob.arrayBuffer();
+    const decoded = await context.decodeAudioData(arrayBuffer);
+    const mono = toMonoFloat32(decoded);
+    const targetSampleRate = 16000;
+    const resampled = resampleFloat32Linear(mono, decoded.sampleRate, targetSampleRate);
+    const pcm16 = floatToPcm16Bytes(resampled);
+    return {
+      audioBase64: bytesToBase64(pcm16),
+      sampleRate: targetSampleRate,
+    };
+  } finally {
+    await context.close();
+  }
+}
+
+export function isVoiceCaptureSupported() {
+  if (typeof window === "undefined") return false;
+  if (typeof navigator === "undefined") return false;
+  return typeof MediaRecorder !== "undefined" && Boolean(navigator.mediaDevices?.getUserMedia);
+}
+
+export async function transcribeAudioBlob(blob: Blob) {
+  const payload = await decodeAudioBlobToPcm16(blob);
+  const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
+    method: "POST",
+    body: payload,
+  });
+  return String(result?.text ?? "").trim();
+}