refactor(voice): extract chat dictation into reusable component
This commit is contained in:
92
frontend/app/composables/useVoiceTranscription.ts
Normal file
92
frontend/app/composables/useVoiceTranscription.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
function getAudioContextCtor(): typeof AudioContext {
|
||||
const Ctor = (window.AudioContext || (window as any).webkitAudioContext) as typeof AudioContext | undefined;
|
||||
if (!Ctor) {
|
||||
throw new Error("Web Audio API is not supported in this browser");
|
||||
}
|
||||
return Ctor;
|
||||
}
|
||||
|
||||
function toMonoFloat32(buffer: AudioBuffer) {
|
||||
if (buffer.numberOfChannels <= 1) return buffer.getChannelData(0).slice();
|
||||
|
||||
const length = buffer.length;
|
||||
const output = new Float32Array(length);
|
||||
for (let i = 0; i < length; i += 1) {
|
||||
let sum = 0;
|
||||
for (let ch = 0; ch < buffer.numberOfChannels; ch += 1) {
|
||||
sum += buffer.getChannelData(ch)[i] ?? 0;
|
||||
}
|
||||
output[i] = sum / buffer.numberOfChannels;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
function resampleFloat32Linear(input: Float32Array, fromRate: number, toRate: number) {
|
||||
if (fromRate === toRate) return input;
|
||||
|
||||
const ratio = fromRate / toRate;
|
||||
const outLength = Math.max(1, Math.round(input.length / ratio));
|
||||
const out = new Float32Array(outLength);
|
||||
for (let i = 0; i < outLength; i += 1) {
|
||||
const src = i * ratio;
|
||||
const left = Math.floor(src);
|
||||
const right = Math.min(input.length - 1, left + 1);
|
||||
const frac = src - left;
|
||||
out[i] = (input[left] ?? 0) * (1 - frac) + (input[right] ?? 0) * frac;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function floatToPcm16Bytes(input: Float32Array) {
|
||||
const out = new Uint8Array(input.length * 2);
|
||||
const view = new DataView(out.buffer);
|
||||
for (let i = 0; i < input.length; i += 1) {
|
||||
const sample = Math.max(-1, Math.min(1, input[i] ?? 0));
|
||||
const value = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
|
||||
view.setInt16(i * 2, Math.round(value), true);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function bytesToBase64(bytes: Uint8Array) {
|
||||
let binary = "";
|
||||
const chunk = 0x8000;
|
||||
for (let i = 0; i < bytes.length; i += chunk) {
|
||||
binary += String.fromCharCode(...bytes.subarray(i, i + chunk));
|
||||
}
|
||||
return btoa(binary);
|
||||
}
|
||||
|
||||
async function decodeAudioBlobToPcm16(blob: Blob) {
|
||||
const AudioContextCtor = getAudioContextCtor();
|
||||
const context = new AudioContextCtor();
|
||||
try {
|
||||
const arrayBuffer = await blob.arrayBuffer();
|
||||
const decoded = await context.decodeAudioData(arrayBuffer);
|
||||
const mono = toMonoFloat32(decoded);
|
||||
const targetSampleRate = 16000;
|
||||
const resampled = resampleFloat32Linear(mono, decoded.sampleRate, targetSampleRate);
|
||||
const pcm16 = floatToPcm16Bytes(resampled);
|
||||
return {
|
||||
audioBase64: bytesToBase64(pcm16),
|
||||
sampleRate: targetSampleRate,
|
||||
};
|
||||
} finally {
|
||||
await context.close();
|
||||
}
|
||||
}
|
||||
|
||||
export function isVoiceCaptureSupported() {
|
||||
if (typeof window === "undefined") return false;
|
||||
if (typeof navigator === "undefined") return false;
|
||||
return typeof MediaRecorder !== "undefined" && Boolean(navigator.mediaDevices?.getUserMedia);
|
||||
}
|
||||
|
||||
export async function transcribeAudioBlob(blob: Blob) {
|
||||
const payload = await decodeAudioBlobToPcm16(blob);
|
||||
const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
|
||||
method: "POST",
|
||||
body: payload,
|
||||
});
|
||||
return String(result?.text ?? "").trim();
|
||||
}
|
||||
Reference in New Issue
Block a user