refactor(voice): extract chat dictation into reusable component

2026-02-23 19:43:00 +07:00
parent c1e8f912d1
commit c5d3a90413
3 changed files with 282 additions and 210 deletions
--- a/frontend/app/components/workspace/CrmWorkspaceApp.vue
+++ b/frontend/app/components/workspace/CrmWorkspaceApp.vue
@@ -4,6 +4,7 @@ import CrmAuthLoading from "~~/app/components/workspace/auth/CrmAuthLoading.vue"
 import CrmCalendarPanel from "~~/app/components/workspace/calendar/CrmCalendarPanel.vue";
 import CrmCommunicationsContextSidebar from "~~/app/components/workspace/communications/CrmCommunicationsContextSidebar.vue";
 import CrmCommunicationsListSidebar from "~~/app/components/workspace/communications/CrmCommunicationsListSidebar.vue";
+import CrmVoiceDictationButton from "~~/app/components/workspace/communications/CrmVoiceDictationButton.client.vue";
 import CrmDocumentsPanel from "~~/app/components/workspace/documents/CrmDocumentsPanel.vue";
 import CrmWorkspaceTopbar from "~~/app/components/workspace/header/CrmWorkspaceTopbar.vue";
 import CrmPilotSidebar from "~~/app/components/workspace/pilot/CrmPilotSidebar.vue";
@@ -42,6 +43,7 @@ import {
  formatDocumentScope,
  isDocumentLinkedToContact,
 } from "~~/app/composables/useWorkspaceDocuments";
+import { isVoiceCaptureSupported, transcribeAudioBlob } from "~~/app/composables/useVoiceTranscription";
 import { Chat as AiChat } from "@ai-sdk/vue";
 import { DefaultChatTransport, isTextUIPart, type UIMessage } from "ai";
 type TabId = "communications" | "documents";
@@ -1463,98 +1465,6 @@ function appendPilotTranscript(text: string) {
  return merged;
 }

-function getAudioContextCtor(): typeof AudioContext {
-  const ctor = (globalThis as any).AudioContext ?? (globalThis as any).webkitAudioContext;
-  if (!ctor) {
-    throw new Error("AudioContext is not supported in this browser");
-  }
-  return ctor as typeof AudioContext;
-}
-
-function toMonoFloat32(buffer: AudioBuffer) {
-  if (buffer.numberOfChannels === 1) {
-    return buffer.getChannelData(0);
-  }
-
-  const out = new Float32Array(buffer.length);
-  for (let channel = 0; channel < buffer.numberOfChannels; channel += 1) {
-    const input = buffer.getChannelData(channel);
-    for (let i = 0; i < buffer.length; i += 1) {
-      const prev = out[i] ?? 0;
-      out[i] = prev + (input[i] ?? 0);
-    }
-  }
-  for (let i = 0; i < out.length; i += 1) {
-    out[i] = (out[i] ?? 0) / buffer.numberOfChannels;
-  }
-  return out;
-}
-
-function resampleFloat32Linear(input: Float32Array, fromRate: number, toRate: number) {
-  if (fromRate === toRate) return input;
-  const ratio = fromRate / toRate;
-  const outLength = Math.max(1, Math.round(input.length / ratio));
-  const out = new Float32Array(outLength);
-
-  for (let i = 0; i < outLength; i += 1) {
-    const position = i * ratio;
-    const left = Math.floor(position);
-    const right = Math.min(input.length - 1, left + 1);
-    const frac = position - left;
-    out[i] = (input[left] ?? 0) * (1 - frac) + (input[right] ?? 0) * frac;
-  }
-
-  return out;
-}
-
-function floatToPcm16Bytes(input: Float32Array) {
-  const out = new Uint8Array(input.length * 2);
-  const view = new DataView(out.buffer);
-  for (let i = 0; i < input.length; i += 1) {
-    const sample = Math.max(-1, Math.min(1, input[i] ?? 0));
-    const value = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
-    view.setInt16(i * 2, Math.round(value), true);
-  }
-  return out;
-}
-
-function bytesToBase64(bytes: Uint8Array) {
-  let binary = "";
-  const chunk = 0x8000;
-  for (let i = 0; i < bytes.length; i += chunk) {
-    binary += String.fromCharCode(...bytes.subarray(i, i + chunk));
-  }
-  return btoa(binary);
-}
-
-async function decodeAudioBlobToPcm16(blob: Blob) {
-  const AudioContextCtor = getAudioContextCtor();
-  const context = new AudioContextCtor();
-  try {
-    const arrayBuffer = await blob.arrayBuffer();
-    const decoded = await context.decodeAudioData(arrayBuffer);
-    const mono = toMonoFloat32(decoded);
-    const targetSampleRate = 16000;
-    const resampled = resampleFloat32Linear(mono, decoded.sampleRate, targetSampleRate);
-    const pcm16 = floatToPcm16Bytes(resampled);
-    return {
-      audioBase64: bytesToBase64(pcm16),
-      sampleRate: targetSampleRate,
-    };
-  } finally {
-    await context.close();
-  }
-}
-
-async function transcribeAudioBlob(blob: Blob) {
-  const payload = await decodeAudioBlobToPcm16(blob);
-  const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
-    method: "POST",
-    body: payload,
-  });
-  return String(result?.text ?? "").trim();
-}
-
 async function transcribeRecordedPilotAudio(blob: Blob) {
  pilotMicError.value = null;
  pilotTranscribing.value = true;
@@ -2242,10 +2152,7 @@ if (process.server) {

 onMounted(() => {
  pilotHeaderText.value = pilotHeaderPhrases[Math.floor(Math.random() * pilotHeaderPhrases.length)] ?? "Every step moves you forward";
-  pilotMicSupported.value =
-    typeof navigator !== "undefined" &&
-    typeof MediaRecorder !== "undefined" &&
-    Boolean(navigator.mediaDevices?.getUserMedia);
+  pilotMicSupported.value = isVoiceCaptureSupported();
  lifecycleClock = setInterval(() => {
    lifecycleNowMs.value = Date.now();
  }, 15000);
@@ -2295,7 +2202,6 @@ onMounted(() => {

 onBeforeUnmount(() => {
  stopCrmRealtime();
-  stopCommRecording(true);
  if (pilotRecording.value) {
    stopPilotRecording("fill");
  }
@@ -3496,11 +3402,6 @@ const eventCloseError = ref<Record<string, string>>({});
 const eventArchiveRecordingById = ref<Record<string, boolean>>({});
 const eventArchiveTranscribingById = ref<Record<string, boolean>>({});
 const eventArchiveMicErrorById = ref<Record<string, string>>({});
-let commMediaRecorder: MediaRecorder | null = null;
-let commRecorderStream: MediaStream | null = null;
-let commRecorderMimeType = "audio/webm";
-let commRecordingChunks: Blob[] = [];
-let commDiscardOnStop = false;
 let eventArchiveMediaRecorder: MediaRecorder | null = null;
 let eventArchiveRecorderStream: MediaStream | null = null;
 let eventArchiveRecorderMimeType = "audio/webm";
@@ -3508,7 +3409,6 @@ let eventArchiveChunks: Blob[] = [];
 let eventArchiveTargetEventId = "";

 watch(selectedCommThreadId, () => {
-  stopCommRecording(true);
  stopEventArchiveRecording();
  destroyAllCommCallWaves();
  callTranscriptOpen.value = {};
@@ -3517,6 +3417,8 @@ watch(selectedCommThreadId, () => {
  callTranscriptError.value = {};
  commPinnedOnly.value = false;
  commDraft.value = "";
+  commRecording.value = false;
+  commTranscribing.value = false;
  commMicError.value = "";
  commComposerMode.value = "message";
  commQuickMenuOpen.value = false;
@@ -4245,12 +4147,7 @@ async function transcribeCallItem(item: CommItem) {
      if (!res.ok) throw new Error(`Audio fetch failed: ${res.status}`);
      return res.blob();
    });
-    const payload = await decodeAudioBlobToPcm16(audioBlob);
-    const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
-      method: "POST",
-      body: payload,
-    });
-    const text = String(result?.text ?? "").trim();
+    const text = await transcribeAudioBlob(audioBlob);
    callTranscriptText.value[itemId] = text || "(empty transcript)";
    await gqlFetch<{ updateCommunicationTranscript: { ok: boolean; id: string } }>(updateCommunicationTranscriptMutation, {
      id: itemId,
@@ -4603,99 +4500,12 @@ async function sendCommMessage() {
  }
 }

-async function startCommRecording() {
-  if (commRecording.value || commTranscribing.value) return;
+function onCommDictationTranscript(text: string) {
+  const next = String(text ?? "").trim();
+  if (!next) return;
+  const previous = String(commDraft.value ?? "").trim();
+  commDraft.value = previous ? `${previous} ${next}` : next;
  commMicError.value = "";
-  if (!pilotMicSupported.value) {
-    commMicError.value = "Recording is not supported in this browser";
-    return;
-  }
-
-  try {
-    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-    const preferredMime = "audio/webm;codecs=opus";
-    const recorder = MediaRecorder.isTypeSupported(preferredMime)
-      ? new MediaRecorder(stream, { mimeType: preferredMime })
-      : new MediaRecorder(stream);
-
-    commRecorderStream = stream;
-    commRecorderMimeType = recorder.mimeType || "audio/webm";
-    commMediaRecorder = recorder;
-    commRecordingChunks = [];
-    commDiscardOnStop = false;
-    commRecording.value = true;
-
-    recorder.ondataavailable = (event: BlobEvent) => {
-      if (event.data?.size) commRecordingChunks.push(event.data);
-    };
-
-    recorder.onstop = async () => {
-      const discard = commDiscardOnStop;
-      commDiscardOnStop = false;
-      commRecording.value = false;
-      commMediaRecorder = null;
-
-      if (commRecorderStream) {
-        commRecorderStream.getTracks().forEach((track) => track.stop());
-        commRecorderStream = null;
-      }
-
-      const audioBlob = new Blob(commRecordingChunks, { type: commRecorderMimeType });
-      commRecordingChunks = [];
-      if (discard || audioBlob.size === 0) return;
-
-      commTranscribing.value = true;
-      try {
-        const text = await transcribeAudioBlob(audioBlob);
-        if (!text) {
-          commMicError.value = "Could not recognize speech";
-          return;
-        }
-        const previous = String(commDraft.value ?? "").trim();
-        commDraft.value = previous ? `${previous} ${text}` : text;
-        commMicError.value = "";
-      } catch (error: any) {
-        commMicError.value = String(error?.data?.message ?? error?.message ?? "Voice transcription failed");
-      } finally {
-        commTranscribing.value = false;
-      }
-    };
-
-    recorder.start();
-  } catch {
-    commRecording.value = false;
-    commMicError.value = "No microphone access";
-    if (commRecorderStream) {
-      commRecorderStream.getTracks().forEach((track) => track.stop());
-      commRecorderStream = null;
-    }
-    commMediaRecorder = null;
-  }
-}
-
-function stopCommRecording(discard = false) {
-  if (!commMediaRecorder || commMediaRecorder.state === "inactive") {
-    commRecording.value = false;
-    commDiscardOnStop = false;
-    commRecordingChunks = [];
-    if (commRecorderStream) {
-      commRecorderStream.getTracks().forEach((track) => track.stop());
-      commRecorderStream = null;
-    }
-    commMediaRecorder = null;
-    return;
-  }
-  commDiscardOnStop = discard;
-  commMediaRecorder.stop();
-}
-
-function toggleCommRecording() {
-  if (commTranscribing.value) return;
-  if (commRecording.value) {
-    stopCommRecording();
-    return;
-  }
-  void startCommRecording();
 }

 function handleCommComposerEnter(event: KeyboardEvent) {
@@ -5574,17 +5384,19 @@ async function decideFeedCard(card: FeedCard, decision: "accepted" | "rejected")
                              <path d="M20 11H7.83l4.58-4.59L11 5l-7 7 7 7 1.41-1.41L7.83 13H20z" />
                            </svg>
                          </button>
-                          <button
+                          <CrmVoiceDictationButton
                            class="btn btn-xs btn-circle border border-base-300 bg-base-100 text-base-content/80 hover:bg-base-200"
                            :class="commRecording || commTranscribing ? 'comm-mic-active' : ''"
-                            :disabled="commSending || commEventSaving || commTranscribing"
-                            :title="commRecording ? 'Stop and insert transcript' : commTranscribing ? 'Transcribing...' : 'Voice input'"
-                            @click="toggleCommRecording"
-                          >
-                            <svg viewBox="0 0 24 24" class="h-3.5 w-3.5 fill-current">
-                              <path d="M12 15a3 3 0 0 0 3-3V7a3 3 0 1 0-6 0v5a3 3 0 0 0 3 3m5-3a1 1 0 1 1 2 0 7 7 0 0 1-6 6.92V21h3a1 1 0 1 1 0 2H8a1 1 0 1 1 0-2h3v-2.08A7 7 0 0 1 5 12a1 1 0 1 1 2 0 5 5 0 0 0 10 0" />
-                            </svg>
-                          </button>
+                            :disabled="commSending || commEventSaving"
+                            :session-key="selectedCommThreadId"
+                            idle-title="Voice input"
+                            recording-title="Stop and insert transcript"
+                            transcribing-title="Transcribing..."
+                            @update:recording="commRecording = $event"
+                            @update:transcribing="commTranscribing = $event"
+                            @transcript="onCommDictationTranscript"
+                            @error="commMicError = $event"
+                          />

                          <button
                            class="btn btn-sm btn-circle border-0 bg-[#5865f2] text-white hover:bg-[#4752c4]"
--- a/frontend/app/components/workspace/communications/CrmVoiceDictationButton.client.vue
+++ b/frontend/app/components/workspace/communications/CrmVoiceDictationButton.client.vue
@@ -0,0 +1,168 @@
+<script setup lang="ts">
+import { onBeforeUnmount, ref, watch } from "vue";
+import { isVoiceCaptureSupported, transcribeAudioBlob } from "~~/app/composables/useVoiceTranscription";
+
+const props = defineProps<{
+  disabled?: boolean;
+  sessionKey?: string;
+  idleTitle?: string;
+  recordingTitle?: string;
+  transcribingTitle?: string;
+}>();
+
+const emit = defineEmits<{
+  (e: "update:recording", value: boolean): void;
+  (e: "update:transcribing", value: boolean): void;
+  (e: "transcript", value: string): void;
+  (e: "error", value: string): void;
+}>();
+
+const recording = ref(false);
+const transcribing = ref(false);
+let mediaRecorder: MediaRecorder | null = null;
+let recorderStream: MediaStream | null = null;
+let recorderMimeType = "audio/webm";
+let recordingChunks: Blob[] = [];
+let discardOnStop = false;
+
+function setRecording(value: boolean) {
+  recording.value = value;
+  emit("update:recording", value);
+}
+
+function setTranscribing(value: boolean) {
+  transcribing.value = value;
+  emit("update:transcribing", value);
+}
+
+function clearRecorderResources() {
+  if (recorderStream) {
+    recorderStream.getTracks().forEach((track) => track.stop());
+    recorderStream = null;
+  }
+  mediaRecorder = null;
+  recordingChunks = [];
+  discardOnStop = false;
+}
+
+async function startRecording() {
+  if (recording.value || transcribing.value) return;
+  emit("error", "");
+
+  if (!isVoiceCaptureSupported()) {
+    emit("error", "Recording is not supported in this browser");
+    return;
+  }
+
+  try {
+    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    const preferredMime = "audio/webm;codecs=opus";
+    const recorder = MediaRecorder.isTypeSupported(preferredMime)
+      ? new MediaRecorder(stream, { mimeType: preferredMime })
+      : new MediaRecorder(stream);
+
+    recorderStream = stream;
+    recorderMimeType = recorder.mimeType || "audio/webm";
+    mediaRecorder = recorder;
+    recordingChunks = [];
+    discardOnStop = false;
+    setRecording(true);
+
+    recorder.ondataavailable = (event: BlobEvent) => {
+      if (event.data?.size) recordingChunks.push(event.data);
+    };
+
+    recorder.onstop = async () => {
+      const discard = discardOnStop;
+      const audioBlob = new Blob(recordingChunks, { type: recorderMimeType });
+
+      setRecording(false);
+      clearRecorderResources();
+      if (discard || audioBlob.size === 0) return;
+
+      setTranscribing(true);
+      try {
+        const text = await transcribeAudioBlob(audioBlob);
+        if (!text) {
+          emit("error", "Could not recognize speech");
+          return;
+        }
+        emit("error", "");
+        emit("transcript", text);
+      } catch (error: any) {
+        emit("error", String(error?.data?.message ?? error?.message ?? "Voice transcription failed"));
+      } finally {
+        setTranscribing(false);
+      }
+    };
+
+    recorder.start();
+  } catch {
+    setRecording(false);
+    clearRecorderResources();
+    emit("error", "No microphone access");
+  }
+}
+
+function stopRecording(discard = false) {
+  if (!mediaRecorder || mediaRecorder.state === "inactive") {
+    setRecording(false);
+    clearRecorderResources();
+    return;
+  }
+  discardOnStop = discard;
+  mediaRecorder.stop();
+}
+
+function toggleRecording() {
+  if (props.disabled || transcribing.value) return;
+  if (recording.value) {
+    stopRecording();
+    return;
+  }
+  void startRecording();
+}
+
+watch(
+  () => props.sessionKey,
+  () => {
+    if (recording.value) stopRecording(true);
+  },
+);
+
+watch(
+  () => props.disabled,
+  (disabled) => {
+    if (disabled && recording.value) stopRecording(true);
+  },
+);
+
+onBeforeUnmount(() => {
+  if (recording.value) {
+    stopRecording(true);
+    return;
+  }
+  clearRecorderResources();
+});
+</script>
+
+<template>
+  <button
+    type="button"
+    :disabled="Boolean(props.disabled) || transcribing"
+    :title="
+      recording
+        ? (props.recordingTitle || 'Stop and insert transcript')
+        : transcribing
+          ? (props.transcribingTitle || 'Transcribing...')
+          : (props.idleTitle || 'Voice input')
+    "
+    @click="toggleRecording"
+  >
+    <slot :recording="recording" :transcribing="transcribing">
+      <svg viewBox="0 0 24 24" class="h-3.5 w-3.5 fill-current">
+        <path d="M12 15a3 3 0 0 0 3-3V7a3 3 0 1 0-6 0v5a3 3 0 0 0 3 3m5-3a1 1 0 1 1 2 0 7 7 0 0 1-6 6.92V21h3a1 1 0 1 1 0 2H8a1 1 0 1 1 0-2h3v-2.08A7 7 0 0 1 5 12a1 1 0 1 1 2 0 5 5 0 0 0 10 0" />
+      </svg>
+    </slot>
+  </button>
+</template>