type WhisperTranscribeInput = { samples: Float32Array; sampleRate: number; language?: string; }; let whisperPipelinePromise: Promise | null = null; let transformersPromise: Promise | null = null; function getWhisperModelId() { return (process.env.CF_WHISPER_MODEL ?? "Xenova/whisper-small").trim() || "Xenova/whisper-small"; } function getWhisperLanguage() { const value = (process.env.CF_WHISPER_LANGUAGE ?? "ru").trim(); return value || "ru"; } async function getWhisperPipeline() { if (!transformersPromise) { transformersPromise = import("@xenova/transformers"); } const { env, pipeline } = await transformersPromise; if (!whisperPipelinePromise) { env.allowRemoteModels = true; env.allowLocalModels = true; env.cacheDir = "/app/.data/transformers"; const modelId = getWhisperModelId(); whisperPipelinePromise = pipeline("automatic-speech-recognition", modelId); } return whisperPipelinePromise; } export async function transcribeWithWhisper(input: WhisperTranscribeInput) { const transcriber = (await getWhisperPipeline()) as any; const result = await transcriber( input.samples, { sampling_rate: input.sampleRate, language: (input.language ?? getWhisperLanguage()) || "ru", task: "transcribe", chunk_length_s: 20, stride_length_s: 5, return_timestamps: false, }, ); const text = String((result as any)?.text ?? "").trim(); return text; }