refactor(voice): extract chat dictation into reusable component

This commit is contained in:
Ruslan Bakiev
2026-02-23 19:43:00 +07:00
parent c1e8f912d1
commit c5d3a90413
3 changed files with 282 additions and 210 deletions

View File

@@ -4,6 +4,7 @@ import CrmAuthLoading from "~~/app/components/workspace/auth/CrmAuthLoading.vue"
import CrmCalendarPanel from "~~/app/components/workspace/calendar/CrmCalendarPanel.vue";
import CrmCommunicationsContextSidebar from "~~/app/components/workspace/communications/CrmCommunicationsContextSidebar.vue";
import CrmCommunicationsListSidebar from "~~/app/components/workspace/communications/CrmCommunicationsListSidebar.vue";
import CrmVoiceDictationButton from "~~/app/components/workspace/communications/CrmVoiceDictationButton.client.vue";
import CrmDocumentsPanel from "~~/app/components/workspace/documents/CrmDocumentsPanel.vue";
import CrmWorkspaceTopbar from "~~/app/components/workspace/header/CrmWorkspaceTopbar.vue";
import CrmPilotSidebar from "~~/app/components/workspace/pilot/CrmPilotSidebar.vue";
@@ -42,6 +43,7 @@ import {
formatDocumentScope,
isDocumentLinkedToContact,
} from "~~/app/composables/useWorkspaceDocuments";
import { isVoiceCaptureSupported, transcribeAudioBlob } from "~~/app/composables/useVoiceTranscription";
import { Chat as AiChat } from "@ai-sdk/vue";
import { DefaultChatTransport, isTextUIPart, type UIMessage } from "ai";
type TabId = "communications" | "documents";
@@ -1463,98 +1465,6 @@ function appendPilotTranscript(text: string) {
return merged;
}
function getAudioContextCtor(): typeof AudioContext {
const ctor = (globalThis as any).AudioContext ?? (globalThis as any).webkitAudioContext;
if (!ctor) {
throw new Error("AudioContext is not supported in this browser");
}
return ctor as typeof AudioContext;
}
function toMonoFloat32(buffer: AudioBuffer) {
if (buffer.numberOfChannels === 1) {
return buffer.getChannelData(0);
}
const out = new Float32Array(buffer.length);
for (let channel = 0; channel < buffer.numberOfChannels; channel += 1) {
const input = buffer.getChannelData(channel);
for (let i = 0; i < buffer.length; i += 1) {
const prev = out[i] ?? 0;
out[i] = prev + (input[i] ?? 0);
}
}
for (let i = 0; i < out.length; i += 1) {
out[i] = (out[i] ?? 0) / buffer.numberOfChannels;
}
return out;
}
function resampleFloat32Linear(input: Float32Array, fromRate: number, toRate: number) {
if (fromRate === toRate) return input;
const ratio = fromRate / toRate;
const outLength = Math.max(1, Math.round(input.length / ratio));
const out = new Float32Array(outLength);
for (let i = 0; i < outLength; i += 1) {
const position = i * ratio;
const left = Math.floor(position);
const right = Math.min(input.length - 1, left + 1);
const frac = position - left;
out[i] = (input[left] ?? 0) * (1 - frac) + (input[right] ?? 0) * frac;
}
return out;
}
function floatToPcm16Bytes(input: Float32Array) {
const out = new Uint8Array(input.length * 2);
const view = new DataView(out.buffer);
for (let i = 0; i < input.length; i += 1) {
const sample = Math.max(-1, Math.min(1, input[i] ?? 0));
const value = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
view.setInt16(i * 2, Math.round(value), true);
}
return out;
}
function bytesToBase64(bytes: Uint8Array) {
let binary = "";
const chunk = 0x8000;
for (let i = 0; i < bytes.length; i += chunk) {
binary += String.fromCharCode(...bytes.subarray(i, i + chunk));
}
return btoa(binary);
}
async function decodeAudioBlobToPcm16(blob: Blob) {
const AudioContextCtor = getAudioContextCtor();
const context = new AudioContextCtor();
try {
const arrayBuffer = await blob.arrayBuffer();
const decoded = await context.decodeAudioData(arrayBuffer);
const mono = toMonoFloat32(decoded);
const targetSampleRate = 16000;
const resampled = resampleFloat32Linear(mono, decoded.sampleRate, targetSampleRate);
const pcm16 = floatToPcm16Bytes(resampled);
return {
audioBase64: bytesToBase64(pcm16),
sampleRate: targetSampleRate,
};
} finally {
await context.close();
}
}
async function transcribeAudioBlob(blob: Blob) {
const payload = await decodeAudioBlobToPcm16(blob);
const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
method: "POST",
body: payload,
});
return String(result?.text ?? "").trim();
}
async function transcribeRecordedPilotAudio(blob: Blob) {
pilotMicError.value = null;
pilotTranscribing.value = true;
@@ -2242,10 +2152,7 @@ if (process.server) {
onMounted(() => {
pilotHeaderText.value = pilotHeaderPhrases[Math.floor(Math.random() * pilotHeaderPhrases.length)] ?? "Every step moves you forward";
pilotMicSupported.value =
typeof navigator !== "undefined" &&
typeof MediaRecorder !== "undefined" &&
Boolean(navigator.mediaDevices?.getUserMedia);
pilotMicSupported.value = isVoiceCaptureSupported();
lifecycleClock = setInterval(() => {
lifecycleNowMs.value = Date.now();
}, 15000);
@@ -2295,7 +2202,6 @@ onMounted(() => {
onBeforeUnmount(() => {
stopCrmRealtime();
stopCommRecording(true);
if (pilotRecording.value) {
stopPilotRecording("fill");
}
@@ -3496,11 +3402,6 @@ const eventCloseError = ref<Record<string, string>>({});
const eventArchiveRecordingById = ref<Record<string, boolean>>({});
const eventArchiveTranscribingById = ref<Record<string, boolean>>({});
const eventArchiveMicErrorById = ref<Record<string, string>>({});
let commMediaRecorder: MediaRecorder | null = null;
let commRecorderStream: MediaStream | null = null;
let commRecorderMimeType = "audio/webm";
let commRecordingChunks: Blob[] = [];
let commDiscardOnStop = false;
let eventArchiveMediaRecorder: MediaRecorder | null = null;
let eventArchiveRecorderStream: MediaStream | null = null;
let eventArchiveRecorderMimeType = "audio/webm";
@@ -3508,7 +3409,6 @@ let eventArchiveChunks: Blob[] = [];
let eventArchiveTargetEventId = "";
watch(selectedCommThreadId, () => {
stopCommRecording(true);
stopEventArchiveRecording();
destroyAllCommCallWaves();
callTranscriptOpen.value = {};
@@ -3517,6 +3417,8 @@ watch(selectedCommThreadId, () => {
callTranscriptError.value = {};
commPinnedOnly.value = false;
commDraft.value = "";
commRecording.value = false;
commTranscribing.value = false;
commMicError.value = "";
commComposerMode.value = "message";
commQuickMenuOpen.value = false;
@@ -4245,12 +4147,7 @@ async function transcribeCallItem(item: CommItem) {
if (!res.ok) throw new Error(`Audio fetch failed: ${res.status}`);
return res.blob();
});
const payload = await decodeAudioBlobToPcm16(audioBlob);
const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
method: "POST",
body: payload,
});
const text = String(result?.text ?? "").trim();
const text = await transcribeAudioBlob(audioBlob);
callTranscriptText.value[itemId] = text || "(empty transcript)";
await gqlFetch<{ updateCommunicationTranscript: { ok: boolean; id: string } }>(updateCommunicationTranscriptMutation, {
id: itemId,
@@ -4603,99 +4500,12 @@ async function sendCommMessage() {
}
}
async function startCommRecording() {
if (commRecording.value || commTranscribing.value) return;
function onCommDictationTranscript(text: string) {
const next = String(text ?? "").trim();
if (!next) return;
const previous = String(commDraft.value ?? "").trim();
commDraft.value = previous ? `${previous} ${next}` : next;
commMicError.value = "";
if (!pilotMicSupported.value) {
commMicError.value = "Recording is not supported in this browser";
return;
}
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const preferredMime = "audio/webm;codecs=opus";
const recorder = MediaRecorder.isTypeSupported(preferredMime)
? new MediaRecorder(stream, { mimeType: preferredMime })
: new MediaRecorder(stream);
commRecorderStream = stream;
commRecorderMimeType = recorder.mimeType || "audio/webm";
commMediaRecorder = recorder;
commRecordingChunks = [];
commDiscardOnStop = false;
commRecording.value = true;
recorder.ondataavailable = (event: BlobEvent) => {
if (event.data?.size) commRecordingChunks.push(event.data);
};
recorder.onstop = async () => {
const discard = commDiscardOnStop;
commDiscardOnStop = false;
commRecording.value = false;
commMediaRecorder = null;
if (commRecorderStream) {
commRecorderStream.getTracks().forEach((track) => track.stop());
commRecorderStream = null;
}
const audioBlob = new Blob(commRecordingChunks, { type: commRecorderMimeType });
commRecordingChunks = [];
if (discard || audioBlob.size === 0) return;
commTranscribing.value = true;
try {
const text = await transcribeAudioBlob(audioBlob);
if (!text) {
commMicError.value = "Could not recognize speech";
return;
}
const previous = String(commDraft.value ?? "").trim();
commDraft.value = previous ? `${previous} ${text}` : text;
commMicError.value = "";
} catch (error: any) {
commMicError.value = String(error?.data?.message ?? error?.message ?? "Voice transcription failed");
} finally {
commTranscribing.value = false;
}
};
recorder.start();
} catch {
commRecording.value = false;
commMicError.value = "No microphone access";
if (commRecorderStream) {
commRecorderStream.getTracks().forEach((track) => track.stop());
commRecorderStream = null;
}
commMediaRecorder = null;
}
}
function stopCommRecording(discard = false) {
if (!commMediaRecorder || commMediaRecorder.state === "inactive") {
commRecording.value = false;
commDiscardOnStop = false;
commRecordingChunks = [];
if (commRecorderStream) {
commRecorderStream.getTracks().forEach((track) => track.stop());
commRecorderStream = null;
}
commMediaRecorder = null;
return;
}
commDiscardOnStop = discard;
commMediaRecorder.stop();
}
function toggleCommRecording() {
if (commTranscribing.value) return;
if (commRecording.value) {
stopCommRecording();
return;
}
void startCommRecording();
}
function handleCommComposerEnter(event: KeyboardEvent) {
@@ -5574,17 +5384,19 @@ async function decideFeedCard(card: FeedCard, decision: "accepted" | "rejected")
<path d="M20 11H7.83l4.58-4.59L11 5l-7 7 7 7 1.41-1.41L7.83 13H20z" />
</svg>
</button>
<button
<CrmVoiceDictationButton
class="btn btn-xs btn-circle border border-base-300 bg-base-100 text-base-content/80 hover:bg-base-200"
:class="commRecording || commTranscribing ? 'comm-mic-active' : ''"
:disabled="commSending || commEventSaving || commTranscribing"
:title="commRecording ? 'Stop and insert transcript' : commTranscribing ? 'Transcribing...' : 'Voice input'"
@click="toggleCommRecording"
>
<svg viewBox="0 0 24 24" class="h-3.5 w-3.5 fill-current">
<path d="M12 15a3 3 0 0 0 3-3V7a3 3 0 1 0-6 0v5a3 3 0 0 0 3 3m5-3a1 1 0 1 1 2 0 7 7 0 0 1-6 6.92V21h3a1 1 0 1 1 0 2H8a1 1 0 1 1 0-2h3v-2.08A7 7 0 0 1 5 12a1 1 0 1 1 2 0 5 5 0 0 0 10 0" />
</svg>
</button>
:disabled="commSending || commEventSaving"
:session-key="selectedCommThreadId"
idle-title="Voice input"
recording-title="Stop and insert transcript"
transcribing-title="Transcribing..."
@update:recording="commRecording = $event"
@update:transcribing="commTranscribing = $event"
@transcript="onCommDictationTranscript"
@error="commMicError = $event"
/>
<button
class="btn btn-sm btn-circle border-0 bg-[#5865f2] text-white hover:bg-[#4752c4]"

View File

@@ -0,0 +1,168 @@
<script setup lang="ts">
import { onBeforeUnmount, ref, watch } from "vue";
import { isVoiceCaptureSupported, transcribeAudioBlob } from "~~/app/composables/useVoiceTranscription";
const props = defineProps<{
disabled?: boolean;
sessionKey?: string;
idleTitle?: string;
recordingTitle?: string;
transcribingTitle?: string;
}>();
const emit = defineEmits<{
(e: "update:recording", value: boolean): void;
(e: "update:transcribing", value: boolean): void;
(e: "transcript", value: string): void;
(e: "error", value: string): void;
}>();
const recording = ref(false);
const transcribing = ref(false);
let mediaRecorder: MediaRecorder | null = null;
let recorderStream: MediaStream | null = null;
let recorderMimeType = "audio/webm";
let recordingChunks: Blob[] = [];
let discardOnStop = false;
function setRecording(value: boolean) {
recording.value = value;
emit("update:recording", value);
}
function setTranscribing(value: boolean) {
transcribing.value = value;
emit("update:transcribing", value);
}
function clearRecorderResources() {
if (recorderStream) {
recorderStream.getTracks().forEach((track) => track.stop());
recorderStream = null;
}
mediaRecorder = null;
recordingChunks = [];
discardOnStop = false;
}
async function startRecording() {
if (recording.value || transcribing.value) return;
emit("error", "");
if (!isVoiceCaptureSupported()) {
emit("error", "Recording is not supported in this browser");
return;
}
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const preferredMime = "audio/webm;codecs=opus";
const recorder = MediaRecorder.isTypeSupported(preferredMime)
? new MediaRecorder(stream, { mimeType: preferredMime })
: new MediaRecorder(stream);
recorderStream = stream;
recorderMimeType = recorder.mimeType || "audio/webm";
mediaRecorder = recorder;
recordingChunks = [];
discardOnStop = false;
setRecording(true);
recorder.ondataavailable = (event: BlobEvent) => {
if (event.data?.size) recordingChunks.push(event.data);
};
recorder.onstop = async () => {
const discard = discardOnStop;
const audioBlob = new Blob(recordingChunks, { type: recorderMimeType });
setRecording(false);
clearRecorderResources();
if (discard || audioBlob.size === 0) return;
setTranscribing(true);
try {
const text = await transcribeAudioBlob(audioBlob);
if (!text) {
emit("error", "Could not recognize speech");
return;
}
emit("error", "");
emit("transcript", text);
} catch (error: any) {
emit("error", String(error?.data?.message ?? error?.message ?? "Voice transcription failed"));
} finally {
setTranscribing(false);
}
};
recorder.start();
} catch {
setRecording(false);
clearRecorderResources();
emit("error", "No microphone access");
}
}
function stopRecording(discard = false) {
if (!mediaRecorder || mediaRecorder.state === "inactive") {
setRecording(false);
clearRecorderResources();
return;
}
discardOnStop = discard;
mediaRecorder.stop();
}
function toggleRecording() {
if (props.disabled || transcribing.value) return;
if (recording.value) {
stopRecording();
return;
}
void startRecording();
}
watch(
() => props.sessionKey,
() => {
if (recording.value) stopRecording(true);
},
);
watch(
() => props.disabled,
(disabled) => {
if (disabled && recording.value) stopRecording(true);
},
);
onBeforeUnmount(() => {
if (recording.value) {
stopRecording(true);
return;
}
clearRecorderResources();
});
</script>
<template>
<button
type="button"
:disabled="Boolean(props.disabled) || transcribing"
:title="
recording
? (props.recordingTitle || 'Stop and insert transcript')
: transcribing
? (props.transcribingTitle || 'Transcribing...')
: (props.idleTitle || 'Voice input')
"
@click="toggleRecording"
>
<slot :recording="recording" :transcribing="transcribing">
<svg viewBox="0 0 24 24" class="h-3.5 w-3.5 fill-current">
<path d="M12 15a3 3 0 0 0 3-3V7a3 3 0 1 0-6 0v5a3 3 0 0 0 3 3m5-3a1 1 0 1 1 2 0 7 7 0 0 1-6 6.92V21h3a1 1 0 1 1 0 2H8a1 1 0 1 1 0-2h3v-2.08A7 7 0 0 1 5 12a1 1 0 1 1 2 0 5 5 0 0 0 10 0" />
</svg>
</slot>
</button>
</template>