refactor(voice): extract chat dictation into reusable component
This commit is contained in:
@@ -4,6 +4,7 @@ import CrmAuthLoading from "~~/app/components/workspace/auth/CrmAuthLoading.vue"
|
|||||||
import CrmCalendarPanel from "~~/app/components/workspace/calendar/CrmCalendarPanel.vue";
|
import CrmCalendarPanel from "~~/app/components/workspace/calendar/CrmCalendarPanel.vue";
|
||||||
import CrmCommunicationsContextSidebar from "~~/app/components/workspace/communications/CrmCommunicationsContextSidebar.vue";
|
import CrmCommunicationsContextSidebar from "~~/app/components/workspace/communications/CrmCommunicationsContextSidebar.vue";
|
||||||
import CrmCommunicationsListSidebar from "~~/app/components/workspace/communications/CrmCommunicationsListSidebar.vue";
|
import CrmCommunicationsListSidebar from "~~/app/components/workspace/communications/CrmCommunicationsListSidebar.vue";
|
||||||
|
import CrmVoiceDictationButton from "~~/app/components/workspace/communications/CrmVoiceDictationButton.client.vue";
|
||||||
import CrmDocumentsPanel from "~~/app/components/workspace/documents/CrmDocumentsPanel.vue";
|
import CrmDocumentsPanel from "~~/app/components/workspace/documents/CrmDocumentsPanel.vue";
|
||||||
import CrmWorkspaceTopbar from "~~/app/components/workspace/header/CrmWorkspaceTopbar.vue";
|
import CrmWorkspaceTopbar from "~~/app/components/workspace/header/CrmWorkspaceTopbar.vue";
|
||||||
import CrmPilotSidebar from "~~/app/components/workspace/pilot/CrmPilotSidebar.vue";
|
import CrmPilotSidebar from "~~/app/components/workspace/pilot/CrmPilotSidebar.vue";
|
||||||
@@ -42,6 +43,7 @@ import {
|
|||||||
formatDocumentScope,
|
formatDocumentScope,
|
||||||
isDocumentLinkedToContact,
|
isDocumentLinkedToContact,
|
||||||
} from "~~/app/composables/useWorkspaceDocuments";
|
} from "~~/app/composables/useWorkspaceDocuments";
|
||||||
|
import { isVoiceCaptureSupported, transcribeAudioBlob } from "~~/app/composables/useVoiceTranscription";
|
||||||
import { Chat as AiChat } from "@ai-sdk/vue";
|
import { Chat as AiChat } from "@ai-sdk/vue";
|
||||||
import { DefaultChatTransport, isTextUIPart, type UIMessage } from "ai";
|
import { DefaultChatTransport, isTextUIPart, type UIMessage } from "ai";
|
||||||
type TabId = "communications" | "documents";
|
type TabId = "communications" | "documents";
|
||||||
@@ -1463,98 +1465,6 @@ function appendPilotTranscript(text: string) {
|
|||||||
return merged;
|
return merged;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getAudioContextCtor(): typeof AudioContext {
|
|
||||||
const ctor = (globalThis as any).AudioContext ?? (globalThis as any).webkitAudioContext;
|
|
||||||
if (!ctor) {
|
|
||||||
throw new Error("AudioContext is not supported in this browser");
|
|
||||||
}
|
|
||||||
return ctor as typeof AudioContext;
|
|
||||||
}
|
|
||||||
|
|
||||||
function toMonoFloat32(buffer: AudioBuffer) {
|
|
||||||
if (buffer.numberOfChannels === 1) {
|
|
||||||
return buffer.getChannelData(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
const out = new Float32Array(buffer.length);
|
|
||||||
for (let channel = 0; channel < buffer.numberOfChannels; channel += 1) {
|
|
||||||
const input = buffer.getChannelData(channel);
|
|
||||||
for (let i = 0; i < buffer.length; i += 1) {
|
|
||||||
const prev = out[i] ?? 0;
|
|
||||||
out[i] = prev + (input[i] ?? 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (let i = 0; i < out.length; i += 1) {
|
|
||||||
out[i] = (out[i] ?? 0) / buffer.numberOfChannels;
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
function resampleFloat32Linear(input: Float32Array, fromRate: number, toRate: number) {
|
|
||||||
if (fromRate === toRate) return input;
|
|
||||||
const ratio = fromRate / toRate;
|
|
||||||
const outLength = Math.max(1, Math.round(input.length / ratio));
|
|
||||||
const out = new Float32Array(outLength);
|
|
||||||
|
|
||||||
for (let i = 0; i < outLength; i += 1) {
|
|
||||||
const position = i * ratio;
|
|
||||||
const left = Math.floor(position);
|
|
||||||
const right = Math.min(input.length - 1, left + 1);
|
|
||||||
const frac = position - left;
|
|
||||||
out[i] = (input[left] ?? 0) * (1 - frac) + (input[right] ?? 0) * frac;
|
|
||||||
}
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
function floatToPcm16Bytes(input: Float32Array) {
|
|
||||||
const out = new Uint8Array(input.length * 2);
|
|
||||||
const view = new DataView(out.buffer);
|
|
||||||
for (let i = 0; i < input.length; i += 1) {
|
|
||||||
const sample = Math.max(-1, Math.min(1, input[i] ?? 0));
|
|
||||||
const value = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
|
|
||||||
view.setInt16(i * 2, Math.round(value), true);
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
function bytesToBase64(bytes: Uint8Array) {
|
|
||||||
let binary = "";
|
|
||||||
const chunk = 0x8000;
|
|
||||||
for (let i = 0; i < bytes.length; i += chunk) {
|
|
||||||
binary += String.fromCharCode(...bytes.subarray(i, i + chunk));
|
|
||||||
}
|
|
||||||
return btoa(binary);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function decodeAudioBlobToPcm16(blob: Blob) {
|
|
||||||
const AudioContextCtor = getAudioContextCtor();
|
|
||||||
const context = new AudioContextCtor();
|
|
||||||
try {
|
|
||||||
const arrayBuffer = await blob.arrayBuffer();
|
|
||||||
const decoded = await context.decodeAudioData(arrayBuffer);
|
|
||||||
const mono = toMonoFloat32(decoded);
|
|
||||||
const targetSampleRate = 16000;
|
|
||||||
const resampled = resampleFloat32Linear(mono, decoded.sampleRate, targetSampleRate);
|
|
||||||
const pcm16 = floatToPcm16Bytes(resampled);
|
|
||||||
return {
|
|
||||||
audioBase64: bytesToBase64(pcm16),
|
|
||||||
sampleRate: targetSampleRate,
|
|
||||||
};
|
|
||||||
} finally {
|
|
||||||
await context.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function transcribeAudioBlob(blob: Blob) {
|
|
||||||
const payload = await decodeAudioBlobToPcm16(blob);
|
|
||||||
const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
|
|
||||||
method: "POST",
|
|
||||||
body: payload,
|
|
||||||
});
|
|
||||||
return String(result?.text ?? "").trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
async function transcribeRecordedPilotAudio(blob: Blob) {
|
async function transcribeRecordedPilotAudio(blob: Blob) {
|
||||||
pilotMicError.value = null;
|
pilotMicError.value = null;
|
||||||
pilotTranscribing.value = true;
|
pilotTranscribing.value = true;
|
||||||
@@ -2242,10 +2152,7 @@ if (process.server) {
|
|||||||
|
|
||||||
onMounted(() => {
|
onMounted(() => {
|
||||||
pilotHeaderText.value = pilotHeaderPhrases[Math.floor(Math.random() * pilotHeaderPhrases.length)] ?? "Every step moves you forward";
|
pilotHeaderText.value = pilotHeaderPhrases[Math.floor(Math.random() * pilotHeaderPhrases.length)] ?? "Every step moves you forward";
|
||||||
pilotMicSupported.value =
|
pilotMicSupported.value = isVoiceCaptureSupported();
|
||||||
typeof navigator !== "undefined" &&
|
|
||||||
typeof MediaRecorder !== "undefined" &&
|
|
||||||
Boolean(navigator.mediaDevices?.getUserMedia);
|
|
||||||
lifecycleClock = setInterval(() => {
|
lifecycleClock = setInterval(() => {
|
||||||
lifecycleNowMs.value = Date.now();
|
lifecycleNowMs.value = Date.now();
|
||||||
}, 15000);
|
}, 15000);
|
||||||
@@ -2295,7 +2202,6 @@ onMounted(() => {
|
|||||||
|
|
||||||
onBeforeUnmount(() => {
|
onBeforeUnmount(() => {
|
||||||
stopCrmRealtime();
|
stopCrmRealtime();
|
||||||
stopCommRecording(true);
|
|
||||||
if (pilotRecording.value) {
|
if (pilotRecording.value) {
|
||||||
stopPilotRecording("fill");
|
stopPilotRecording("fill");
|
||||||
}
|
}
|
||||||
@@ -3496,11 +3402,6 @@ const eventCloseError = ref<Record<string, string>>({});
|
|||||||
const eventArchiveRecordingById = ref<Record<string, boolean>>({});
|
const eventArchiveRecordingById = ref<Record<string, boolean>>({});
|
||||||
const eventArchiveTranscribingById = ref<Record<string, boolean>>({});
|
const eventArchiveTranscribingById = ref<Record<string, boolean>>({});
|
||||||
const eventArchiveMicErrorById = ref<Record<string, string>>({});
|
const eventArchiveMicErrorById = ref<Record<string, string>>({});
|
||||||
let commMediaRecorder: MediaRecorder | null = null;
|
|
||||||
let commRecorderStream: MediaStream | null = null;
|
|
||||||
let commRecorderMimeType = "audio/webm";
|
|
||||||
let commRecordingChunks: Blob[] = [];
|
|
||||||
let commDiscardOnStop = false;
|
|
||||||
let eventArchiveMediaRecorder: MediaRecorder | null = null;
|
let eventArchiveMediaRecorder: MediaRecorder | null = null;
|
||||||
let eventArchiveRecorderStream: MediaStream | null = null;
|
let eventArchiveRecorderStream: MediaStream | null = null;
|
||||||
let eventArchiveRecorderMimeType = "audio/webm";
|
let eventArchiveRecorderMimeType = "audio/webm";
|
||||||
@@ -3508,7 +3409,6 @@ let eventArchiveChunks: Blob[] = [];
|
|||||||
let eventArchiveTargetEventId = "";
|
let eventArchiveTargetEventId = "";
|
||||||
|
|
||||||
watch(selectedCommThreadId, () => {
|
watch(selectedCommThreadId, () => {
|
||||||
stopCommRecording(true);
|
|
||||||
stopEventArchiveRecording();
|
stopEventArchiveRecording();
|
||||||
destroyAllCommCallWaves();
|
destroyAllCommCallWaves();
|
||||||
callTranscriptOpen.value = {};
|
callTranscriptOpen.value = {};
|
||||||
@@ -3517,6 +3417,8 @@ watch(selectedCommThreadId, () => {
|
|||||||
callTranscriptError.value = {};
|
callTranscriptError.value = {};
|
||||||
commPinnedOnly.value = false;
|
commPinnedOnly.value = false;
|
||||||
commDraft.value = "";
|
commDraft.value = "";
|
||||||
|
commRecording.value = false;
|
||||||
|
commTranscribing.value = false;
|
||||||
commMicError.value = "";
|
commMicError.value = "";
|
||||||
commComposerMode.value = "message";
|
commComposerMode.value = "message";
|
||||||
commQuickMenuOpen.value = false;
|
commQuickMenuOpen.value = false;
|
||||||
@@ -4245,12 +4147,7 @@ async function transcribeCallItem(item: CommItem) {
|
|||||||
if (!res.ok) throw new Error(`Audio fetch failed: ${res.status}`);
|
if (!res.ok) throw new Error(`Audio fetch failed: ${res.status}`);
|
||||||
return res.blob();
|
return res.blob();
|
||||||
});
|
});
|
||||||
const payload = await decodeAudioBlobToPcm16(audioBlob);
|
const text = await transcribeAudioBlob(audioBlob);
|
||||||
const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
|
|
||||||
method: "POST",
|
|
||||||
body: payload,
|
|
||||||
});
|
|
||||||
const text = String(result?.text ?? "").trim();
|
|
||||||
callTranscriptText.value[itemId] = text || "(empty transcript)";
|
callTranscriptText.value[itemId] = text || "(empty transcript)";
|
||||||
await gqlFetch<{ updateCommunicationTranscript: { ok: boolean; id: string } }>(updateCommunicationTranscriptMutation, {
|
await gqlFetch<{ updateCommunicationTranscript: { ok: boolean; id: string } }>(updateCommunicationTranscriptMutation, {
|
||||||
id: itemId,
|
id: itemId,
|
||||||
@@ -4603,99 +4500,12 @@ async function sendCommMessage() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function startCommRecording() {
|
function onCommDictationTranscript(text: string) {
|
||||||
if (commRecording.value || commTranscribing.value) return;
|
const next = String(text ?? "").trim();
|
||||||
|
if (!next) return;
|
||||||
|
const previous = String(commDraft.value ?? "").trim();
|
||||||
|
commDraft.value = previous ? `${previous} ${next}` : next;
|
||||||
commMicError.value = "";
|
commMicError.value = "";
|
||||||
if (!pilotMicSupported.value) {
|
|
||||||
commMicError.value = "Recording is not supported in this browser";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
||||||
const preferredMime = "audio/webm;codecs=opus";
|
|
||||||
const recorder = MediaRecorder.isTypeSupported(preferredMime)
|
|
||||||
? new MediaRecorder(stream, { mimeType: preferredMime })
|
|
||||||
: new MediaRecorder(stream);
|
|
||||||
|
|
||||||
commRecorderStream = stream;
|
|
||||||
commRecorderMimeType = recorder.mimeType || "audio/webm";
|
|
||||||
commMediaRecorder = recorder;
|
|
||||||
commRecordingChunks = [];
|
|
||||||
commDiscardOnStop = false;
|
|
||||||
commRecording.value = true;
|
|
||||||
|
|
||||||
recorder.ondataavailable = (event: BlobEvent) => {
|
|
||||||
if (event.data?.size) commRecordingChunks.push(event.data);
|
|
||||||
};
|
|
||||||
|
|
||||||
recorder.onstop = async () => {
|
|
||||||
const discard = commDiscardOnStop;
|
|
||||||
commDiscardOnStop = false;
|
|
||||||
commRecording.value = false;
|
|
||||||
commMediaRecorder = null;
|
|
||||||
|
|
||||||
if (commRecorderStream) {
|
|
||||||
commRecorderStream.getTracks().forEach((track) => track.stop());
|
|
||||||
commRecorderStream = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const audioBlob = new Blob(commRecordingChunks, { type: commRecorderMimeType });
|
|
||||||
commRecordingChunks = [];
|
|
||||||
if (discard || audioBlob.size === 0) return;
|
|
||||||
|
|
||||||
commTranscribing.value = true;
|
|
||||||
try {
|
|
||||||
const text = await transcribeAudioBlob(audioBlob);
|
|
||||||
if (!text) {
|
|
||||||
commMicError.value = "Could not recognize speech";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const previous = String(commDraft.value ?? "").trim();
|
|
||||||
commDraft.value = previous ? `${previous} ${text}` : text;
|
|
||||||
commMicError.value = "";
|
|
||||||
} catch (error: any) {
|
|
||||||
commMicError.value = String(error?.data?.message ?? error?.message ?? "Voice transcription failed");
|
|
||||||
} finally {
|
|
||||||
commTranscribing.value = false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
recorder.start();
|
|
||||||
} catch {
|
|
||||||
commRecording.value = false;
|
|
||||||
commMicError.value = "No microphone access";
|
|
||||||
if (commRecorderStream) {
|
|
||||||
commRecorderStream.getTracks().forEach((track) => track.stop());
|
|
||||||
commRecorderStream = null;
|
|
||||||
}
|
|
||||||
commMediaRecorder = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function stopCommRecording(discard = false) {
|
|
||||||
if (!commMediaRecorder || commMediaRecorder.state === "inactive") {
|
|
||||||
commRecording.value = false;
|
|
||||||
commDiscardOnStop = false;
|
|
||||||
commRecordingChunks = [];
|
|
||||||
if (commRecorderStream) {
|
|
||||||
commRecorderStream.getTracks().forEach((track) => track.stop());
|
|
||||||
commRecorderStream = null;
|
|
||||||
}
|
|
||||||
commMediaRecorder = null;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
commDiscardOnStop = discard;
|
|
||||||
commMediaRecorder.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
function toggleCommRecording() {
|
|
||||||
if (commTranscribing.value) return;
|
|
||||||
if (commRecording.value) {
|
|
||||||
stopCommRecording();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
void startCommRecording();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function handleCommComposerEnter(event: KeyboardEvent) {
|
function handleCommComposerEnter(event: KeyboardEvent) {
|
||||||
@@ -5574,17 +5384,19 @@ async function decideFeedCard(card: FeedCard, decision: "accepted" | "rejected")
|
|||||||
<path d="M20 11H7.83l4.58-4.59L11 5l-7 7 7 7 1.41-1.41L7.83 13H20z" />
|
<path d="M20 11H7.83l4.58-4.59L11 5l-7 7 7 7 1.41-1.41L7.83 13H20z" />
|
||||||
</svg>
|
</svg>
|
||||||
</button>
|
</button>
|
||||||
<button
|
<CrmVoiceDictationButton
|
||||||
class="btn btn-xs btn-circle border border-base-300 bg-base-100 text-base-content/80 hover:bg-base-200"
|
class="btn btn-xs btn-circle border border-base-300 bg-base-100 text-base-content/80 hover:bg-base-200"
|
||||||
:class="commRecording || commTranscribing ? 'comm-mic-active' : ''"
|
:class="commRecording || commTranscribing ? 'comm-mic-active' : ''"
|
||||||
:disabled="commSending || commEventSaving || commTranscribing"
|
:disabled="commSending || commEventSaving"
|
||||||
:title="commRecording ? 'Stop and insert transcript' : commTranscribing ? 'Transcribing...' : 'Voice input'"
|
:session-key="selectedCommThreadId"
|
||||||
@click="toggleCommRecording"
|
idle-title="Voice input"
|
||||||
>
|
recording-title="Stop and insert transcript"
|
||||||
<svg viewBox="0 0 24 24" class="h-3.5 w-3.5 fill-current">
|
transcribing-title="Transcribing..."
|
||||||
<path d="M12 15a3 3 0 0 0 3-3V7a3 3 0 1 0-6 0v5a3 3 0 0 0 3 3m5-3a1 1 0 1 1 2 0 7 7 0 0 1-6 6.92V21h3a1 1 0 1 1 0 2H8a1 1 0 1 1 0-2h3v-2.08A7 7 0 0 1 5 12a1 1 0 1 1 2 0 5 5 0 0 0 10 0" />
|
@update:recording="commRecording = $event"
|
||||||
</svg>
|
@update:transcribing="commTranscribing = $event"
|
||||||
</button>
|
@transcript="onCommDictationTranscript"
|
||||||
|
@error="commMicError = $event"
|
||||||
|
/>
|
||||||
|
|
||||||
<button
|
<button
|
||||||
class="btn btn-sm btn-circle border-0 bg-[#5865f2] text-white hover:bg-[#4752c4]"
|
class="btn btn-sm btn-circle border-0 bg-[#5865f2] text-white hover:bg-[#4752c4]"
|
||||||
|
|||||||
@@ -0,0 +1,168 @@
|
|||||||
|
<script setup lang="ts">
|
||||||
|
import { onBeforeUnmount, ref, watch } from "vue";
|
||||||
|
import { isVoiceCaptureSupported, transcribeAudioBlob } from "~~/app/composables/useVoiceTranscription";
|
||||||
|
|
||||||
|
const props = defineProps<{
|
||||||
|
disabled?: boolean;
|
||||||
|
sessionKey?: string;
|
||||||
|
idleTitle?: string;
|
||||||
|
recordingTitle?: string;
|
||||||
|
transcribingTitle?: string;
|
||||||
|
}>();
|
||||||
|
|
||||||
|
const emit = defineEmits<{
|
||||||
|
(e: "update:recording", value: boolean): void;
|
||||||
|
(e: "update:transcribing", value: boolean): void;
|
||||||
|
(e: "transcript", value: string): void;
|
||||||
|
(e: "error", value: string): void;
|
||||||
|
}>();
|
||||||
|
|
||||||
|
const recording = ref(false);
|
||||||
|
const transcribing = ref(false);
|
||||||
|
let mediaRecorder: MediaRecorder | null = null;
|
||||||
|
let recorderStream: MediaStream | null = null;
|
||||||
|
let recorderMimeType = "audio/webm";
|
||||||
|
let recordingChunks: Blob[] = [];
|
||||||
|
let discardOnStop = false;
|
||||||
|
|
||||||
|
function setRecording(value: boolean) {
|
||||||
|
recording.value = value;
|
||||||
|
emit("update:recording", value);
|
||||||
|
}
|
||||||
|
|
||||||
|
function setTranscribing(value: boolean) {
|
||||||
|
transcribing.value = value;
|
||||||
|
emit("update:transcribing", value);
|
||||||
|
}
|
||||||
|
|
||||||
|
function clearRecorderResources() {
|
||||||
|
if (recorderStream) {
|
||||||
|
recorderStream.getTracks().forEach((track) => track.stop());
|
||||||
|
recorderStream = null;
|
||||||
|
}
|
||||||
|
mediaRecorder = null;
|
||||||
|
recordingChunks = [];
|
||||||
|
discardOnStop = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function startRecording() {
|
||||||
|
if (recording.value || transcribing.value) return;
|
||||||
|
emit("error", "");
|
||||||
|
|
||||||
|
if (!isVoiceCaptureSupported()) {
|
||||||
|
emit("error", "Recording is not supported in this browser");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
|
const preferredMime = "audio/webm;codecs=opus";
|
||||||
|
const recorder = MediaRecorder.isTypeSupported(preferredMime)
|
||||||
|
? new MediaRecorder(stream, { mimeType: preferredMime })
|
||||||
|
: new MediaRecorder(stream);
|
||||||
|
|
||||||
|
recorderStream = stream;
|
||||||
|
recorderMimeType = recorder.mimeType || "audio/webm";
|
||||||
|
mediaRecorder = recorder;
|
||||||
|
recordingChunks = [];
|
||||||
|
discardOnStop = false;
|
||||||
|
setRecording(true);
|
||||||
|
|
||||||
|
recorder.ondataavailable = (event: BlobEvent) => {
|
||||||
|
if (event.data?.size) recordingChunks.push(event.data);
|
||||||
|
};
|
||||||
|
|
||||||
|
recorder.onstop = async () => {
|
||||||
|
const discard = discardOnStop;
|
||||||
|
const audioBlob = new Blob(recordingChunks, { type: recorderMimeType });
|
||||||
|
|
||||||
|
setRecording(false);
|
||||||
|
clearRecorderResources();
|
||||||
|
if (discard || audioBlob.size === 0) return;
|
||||||
|
|
||||||
|
setTranscribing(true);
|
||||||
|
try {
|
||||||
|
const text = await transcribeAudioBlob(audioBlob);
|
||||||
|
if (!text) {
|
||||||
|
emit("error", "Could not recognize speech");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
emit("error", "");
|
||||||
|
emit("transcript", text);
|
||||||
|
} catch (error: any) {
|
||||||
|
emit("error", String(error?.data?.message ?? error?.message ?? "Voice transcription failed"));
|
||||||
|
} finally {
|
||||||
|
setTranscribing(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
recorder.start();
|
||||||
|
} catch {
|
||||||
|
setRecording(false);
|
||||||
|
clearRecorderResources();
|
||||||
|
emit("error", "No microphone access");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function stopRecording(discard = false) {
|
||||||
|
if (!mediaRecorder || mediaRecorder.state === "inactive") {
|
||||||
|
setRecording(false);
|
||||||
|
clearRecorderResources();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
discardOnStop = discard;
|
||||||
|
mediaRecorder.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleRecording() {
|
||||||
|
if (props.disabled || transcribing.value) return;
|
||||||
|
if (recording.value) {
|
||||||
|
stopRecording();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void startRecording();
|
||||||
|
}
|
||||||
|
|
||||||
|
watch(
|
||||||
|
() => props.sessionKey,
|
||||||
|
() => {
|
||||||
|
if (recording.value) stopRecording(true);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
watch(
|
||||||
|
() => props.disabled,
|
||||||
|
(disabled) => {
|
||||||
|
if (disabled && recording.value) stopRecording(true);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
onBeforeUnmount(() => {
|
||||||
|
if (recording.value) {
|
||||||
|
stopRecording(true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
clearRecorderResources();
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
:disabled="Boolean(props.disabled) || transcribing"
|
||||||
|
:title="
|
||||||
|
recording
|
||||||
|
? (props.recordingTitle || 'Stop and insert transcript')
|
||||||
|
: transcribing
|
||||||
|
? (props.transcribingTitle || 'Transcribing...')
|
||||||
|
: (props.idleTitle || 'Voice input')
|
||||||
|
"
|
||||||
|
@click="toggleRecording"
|
||||||
|
>
|
||||||
|
<slot :recording="recording" :transcribing="transcribing">
|
||||||
|
<svg viewBox="0 0 24 24" class="h-3.5 w-3.5 fill-current">
|
||||||
|
<path d="M12 15a3 3 0 0 0 3-3V7a3 3 0 1 0-6 0v5a3 3 0 0 0 3 3m5-3a1 1 0 1 1 2 0 7 7 0 0 1-6 6.92V21h3a1 1 0 1 1 0 2H8a1 1 0 1 1 0-2h3v-2.08A7 7 0 0 1 5 12a1 1 0 1 1 2 0 5 5 0 0 0 10 0" />
|
||||||
|
</svg>
|
||||||
|
</slot>
|
||||||
|
</button>
|
||||||
|
</template>
|
||||||
92
frontend/app/composables/useVoiceTranscription.ts
Normal file
92
frontend/app/composables/useVoiceTranscription.ts
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
function getAudioContextCtor(): typeof AudioContext {
|
||||||
|
const Ctor = (window.AudioContext || (window as any).webkitAudioContext) as typeof AudioContext | undefined;
|
||||||
|
if (!Ctor) {
|
||||||
|
throw new Error("Web Audio API is not supported in this browser");
|
||||||
|
}
|
||||||
|
return Ctor;
|
||||||
|
}
|
||||||
|
|
||||||
|
function toMonoFloat32(buffer: AudioBuffer) {
|
||||||
|
if (buffer.numberOfChannels <= 1) return buffer.getChannelData(0).slice();
|
||||||
|
|
||||||
|
const length = buffer.length;
|
||||||
|
const output = new Float32Array(length);
|
||||||
|
for (let i = 0; i < length; i += 1) {
|
||||||
|
let sum = 0;
|
||||||
|
for (let ch = 0; ch < buffer.numberOfChannels; ch += 1) {
|
||||||
|
sum += buffer.getChannelData(ch)[i] ?? 0;
|
||||||
|
}
|
||||||
|
output[i] = sum / buffer.numberOfChannels;
|
||||||
|
}
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
function resampleFloat32Linear(input: Float32Array, fromRate: number, toRate: number) {
|
||||||
|
if (fromRate === toRate) return input;
|
||||||
|
|
||||||
|
const ratio = fromRate / toRate;
|
||||||
|
const outLength = Math.max(1, Math.round(input.length / ratio));
|
||||||
|
const out = new Float32Array(outLength);
|
||||||
|
for (let i = 0; i < outLength; i += 1) {
|
||||||
|
const src = i * ratio;
|
||||||
|
const left = Math.floor(src);
|
||||||
|
const right = Math.min(input.length - 1, left + 1);
|
||||||
|
const frac = src - left;
|
||||||
|
out[i] = (input[left] ?? 0) * (1 - frac) + (input[right] ?? 0) * frac;
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
function floatToPcm16Bytes(input: Float32Array) {
|
||||||
|
const out = new Uint8Array(input.length * 2);
|
||||||
|
const view = new DataView(out.buffer);
|
||||||
|
for (let i = 0; i < input.length; i += 1) {
|
||||||
|
const sample = Math.max(-1, Math.min(1, input[i] ?? 0));
|
||||||
|
const value = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
|
||||||
|
view.setInt16(i * 2, Math.round(value), true);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
function bytesToBase64(bytes: Uint8Array) {
|
||||||
|
let binary = "";
|
||||||
|
const chunk = 0x8000;
|
||||||
|
for (let i = 0; i < bytes.length; i += chunk) {
|
||||||
|
binary += String.fromCharCode(...bytes.subarray(i, i + chunk));
|
||||||
|
}
|
||||||
|
return btoa(binary);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function decodeAudioBlobToPcm16(blob: Blob) {
|
||||||
|
const AudioContextCtor = getAudioContextCtor();
|
||||||
|
const context = new AudioContextCtor();
|
||||||
|
try {
|
||||||
|
const arrayBuffer = await blob.arrayBuffer();
|
||||||
|
const decoded = await context.decodeAudioData(arrayBuffer);
|
||||||
|
const mono = toMonoFloat32(decoded);
|
||||||
|
const targetSampleRate = 16000;
|
||||||
|
const resampled = resampleFloat32Linear(mono, decoded.sampleRate, targetSampleRate);
|
||||||
|
const pcm16 = floatToPcm16Bytes(resampled);
|
||||||
|
return {
|
||||||
|
audioBase64: bytesToBase64(pcm16),
|
||||||
|
sampleRate: targetSampleRate,
|
||||||
|
};
|
||||||
|
} finally {
|
||||||
|
await context.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isVoiceCaptureSupported() {
|
||||||
|
if (typeof window === "undefined") return false;
|
||||||
|
if (typeof navigator === "undefined") return false;
|
||||||
|
return typeof MediaRecorder !== "undefined" && Boolean(navigator.mediaDevices?.getUserMedia);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function transcribeAudioBlob(blob: Blob) {
|
||||||
|
const payload = await decodeAudioBlobToPcm16(blob);
|
||||||
|
const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
|
||||||
|
method: "POST",
|
||||||
|
body: payload,
|
||||||
|
});
|
||||||
|
return String(result?.text ?? "").trim();
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user