Files
clientsflow/frontend/server/api/pilot-transcribe.post.ts
2026-02-20 12:10:25 +07:00

63 lines
1.8 KiB
TypeScript

import { readBody } from "h3";
import { getAuthContext } from "../utils/auth";
import { transcribeWithWhisper } from "../utils/whisper";
type TranscribeBody = {
audioBase64?: string;
sampleRate?: number;
language?: string;
};
function decodeBase64Pcm16(audioBase64: string) {
const pcmBuffer = Buffer.from(audioBase64, "base64");
if (pcmBuffer.length < 2) return new Float32Array();
const sampleCount = Math.floor(pcmBuffer.length / 2);
const out = new Float32Array(sampleCount);
for (let i = 0; i < sampleCount; i += 1) {
const lo = pcmBuffer[i * 2]!;
const hi = pcmBuffer[i * 2 + 1]!;
const int16 = (hi << 8) | lo;
const signed = int16 >= 0x8000 ? int16 - 0x10000 : int16;
out[i] = signed / 32768;
}
return out;
}
export default defineEventHandler(async (event) => {
await getAuthContext(event);
const body = await readBody<TranscribeBody>(event);
const audioBase64 = String(body?.audioBase64 ?? "").trim();
const sampleRateRaw = Number(body?.sampleRate ?? 0);
const language = String(body?.language ?? "").trim() || undefined;
if (!audioBase64) {
throw createError({ statusCode: 400, statusMessage: "audioBase64 is required" });
}
if (!Number.isFinite(sampleRateRaw) || sampleRateRaw < 8000 || sampleRateRaw > 48000) {
throw createError({ statusCode: 400, statusMessage: "sampleRate must be between 8000 and 48000" });
}
const samples = decodeBase64Pcm16(audioBase64);
if (!samples.length) {
throw createError({ statusCode: 400, statusMessage: "Audio is empty" });
}
const maxSamples = Math.floor(sampleRateRaw * 120);
if (samples.length > maxSamples) {
throw createError({ statusCode: 413, statusMessage: "Audio is too long (max 120s)" });
}
const text = await transcribeWithWhisper({
samples,
sampleRate: sampleRateRaw,
language,
});
return { text };
});