function getAudioContextCtor(): typeof AudioContext { const Ctor = (window.AudioContext || (window as any).webkitAudioContext) as typeof AudioContext | undefined; if (!Ctor) { throw new Error("Web Audio API is not supported in this browser"); } return Ctor; } function toMonoFloat32(buffer: AudioBuffer) { if (buffer.numberOfChannels <= 1) return buffer.getChannelData(0).slice(); const length = buffer.length; const output = new Float32Array(length); for (let i = 0; i < length; i += 1) { let sum = 0; for (let ch = 0; ch < buffer.numberOfChannels; ch += 1) { sum += buffer.getChannelData(ch)[i] ?? 0; } output[i] = sum / buffer.numberOfChannels; } return output; } function resampleFloat32Linear(input: Float32Array, fromRate: number, toRate: number) { if (fromRate === toRate) return input; const ratio = fromRate / toRate; const outLength = Math.max(1, Math.round(input.length / ratio)); const out = new Float32Array(outLength); for (let i = 0; i < outLength; i += 1) { const src = i * ratio; const left = Math.floor(src); const right = Math.min(input.length - 1, left + 1); const frac = src - left; out[i] = (input[left] ?? 0) * (1 - frac) + (input[right] ?? 0) * frac; } return out; } function floatToPcm16Bytes(input: Float32Array) { const out = new Uint8Array(input.length * 2); const view = new DataView(out.buffer); for (let i = 0; i < input.length; i += 1) { const sample = Math.max(-1, Math.min(1, input[i] ?? 0)); const value = sample < 0 ? sample * 0x8000 : sample * 0x7fff; view.setInt16(i * 2, Math.round(value), true); } return out; } function bytesToBase64(bytes: Uint8Array) { let binary = ""; const chunk = 0x8000; for (let i = 0; i < bytes.length; i += chunk) { binary += String.fromCharCode(...bytes.subarray(i, i + chunk)); } return btoa(binary); } async function decodeAudioBlobToPcm16(blob: Blob) { const AudioContextCtor = getAudioContextCtor(); const context = new AudioContextCtor(); try { const arrayBuffer = await blob.arrayBuffer(); const decoded = await context.decodeAudioData(arrayBuffer); const mono = toMonoFloat32(decoded); const targetSampleRate = 16000; const resampled = resampleFloat32Linear(mono, decoded.sampleRate, targetSampleRate); const pcm16 = floatToPcm16Bytes(resampled); return { audioBase64: bytesToBase64(pcm16), sampleRate: targetSampleRate, }; } finally { await context.close(); } } export function isVoiceCaptureSupported() { if (typeof window === "undefined") return false; if (typeof navigator === "undefined") return false; return typeof MediaRecorder !== "undefined" && Boolean(navigator.mediaDevices?.getUserMedia); } export async function transcribeAudioBlob(blob: Blob) { const payload = await decodeAudioBlobToPcm16(blob); const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", { method: "POST", body: payload, }); return String(result?.text ?? "").trim(); }