import { readBody } from "h3"; import { getAuthContext } from "../utils/auth"; import { transcribeWithWhisper } from "../utils/whisper"; type TranscribeBody = { audioBase64?: string; sampleRate?: number; language?: string; }; function decodeBase64Pcm16(audioBase64: string) { const pcmBuffer = Buffer.from(audioBase64, "base64"); if (pcmBuffer.length < 2) return new Float32Array(); const sampleCount = Math.floor(pcmBuffer.length / 2); const out = new Float32Array(sampleCount); for (let i = 0; i < sampleCount; i += 1) { const lo = pcmBuffer[i * 2]!; const hi = pcmBuffer[i * 2 + 1]!; const int16 = (hi << 8) | lo; const signed = int16 >= 0x8000 ? int16 - 0x10000 : int16; out[i] = signed / 32768; } return out; } export default defineEventHandler(async (event) => { await getAuthContext(event); const body = await readBody(event); const audioBase64 = String(body?.audioBase64 ?? "").trim(); const sampleRateRaw = Number(body?.sampleRate ?? 0); const language = String(body?.language ?? "").trim() || undefined; if (!audioBase64) { throw createError({ statusCode: 400, statusMessage: "audioBase64 is required" }); } if (!Number.isFinite(sampleRateRaw) || sampleRateRaw < 8000 || sampleRateRaw > 48000) { throw createError({ statusCode: 400, statusMessage: "sampleRate must be between 8000 and 48000" }); } const samples = decodeBase64Pcm16(audioBase64); if (!samples.length) { throw createError({ statusCode: 400, statusMessage: "Audio is empty" }); } const maxSamples = Math.floor(sampleRateRaw * 120); if (samples.length > maxSamples) { throw createError({ statusCode: 413, statusMessage: "Audio is too long (max 120s)" }); } const text = await transcribeWithWhisper({ samples, sampleRate: sampleRateRaw, language, }); return { text }; });