Files
clientsflow/Frontend/server/utils/whisper.ts

54 lines
1.4 KiB
TypeScript

type WhisperTranscribeInput = {
samples: Float32Array;
sampleRate: number;
language?: string;
};
let whisperPipelinePromise: Promise<any> | null = null;
let transformersPromise: Promise<any> | null = null;
function getWhisperModelId() {
return (process.env.CF_WHISPER_MODEL ?? "Xenova/whisper-small").trim() || "Xenova/whisper-small";
}
function getWhisperLanguage() {
const value = (process.env.CF_WHISPER_LANGUAGE ?? "ru").trim();
return value || "ru";
}
async function getWhisperPipeline() {
if (!transformersPromise) {
transformersPromise = import("@xenova/transformers");
}
const { env, pipeline } = await transformersPromise;
if (!whisperPipelinePromise) {
env.allowRemoteModels = true;
env.allowLocalModels = true;
env.cacheDir = "/app/.data/transformers";
const modelId = getWhisperModelId();
whisperPipelinePromise = pipeline("automatic-speech-recognition", modelId);
}
return whisperPipelinePromise;
}
export async function transcribeWithWhisper(input: WhisperTranscribeInput) {
const transcriber = (await getWhisperPipeline()) as any;
const result = await transcriber(
input.samples,
{
sampling_rate: input.sampleRate,
language: (input.language ?? getWhisperLanguage()) || "ru",
task: "transcribe",
chunk_length_s: 20,
stride_length_s: 5,
return_timestamps: false,
},
);
const text = String((result as any)?.text ?? "").trim();
return text;
}