refactor(voice): extract chat dictation into reusable component

2026-02-23 19:43:00 +07:00
parent c1e8f912d1
commit c5d3a90413
3 changed files with 282 additions and 210 deletions
--- a/frontend/app/components/workspace/CrmWorkspaceApp.vue
+++ b/frontend/app/components/workspace/CrmWorkspaceApp.vue
@@ -4,6 +4,7 @@ import CrmAuthLoading from "~~/app/components/workspace/auth/CrmAuthLoading.vue"
 import CrmCalendarPanel from "~~/app/components/workspace/calendar/CrmCalendarPanel.vue";
 import CrmCommunicationsContextSidebar from "~~/app/components/workspace/communications/CrmCommunicationsContextSidebar.vue";
 import CrmCommunicationsListSidebar from "~~/app/components/workspace/communications/CrmCommunicationsListSidebar.vue";
 import CrmVoiceDictationButton from "~~/app/components/workspace/communications/CrmVoiceDictationButton.client.vue";
 import CrmDocumentsPanel from "~~/app/components/workspace/documents/CrmDocumentsPanel.vue";
 import CrmWorkspaceTopbar from "~~/app/components/workspace/header/CrmWorkspaceTopbar.vue";
 import CrmPilotSidebar from "~~/app/components/workspace/pilot/CrmPilotSidebar.vue";
@@ -42,6 +43,7 @@ import {
  formatDocumentScope,
  isDocumentLinkedToContact,
 } from "~~/app/composables/useWorkspaceDocuments";
 import { isVoiceCaptureSupported, transcribeAudioBlob } from "~~/app/composables/useVoiceTranscription";
 import { Chat as AiChat } from "@ai-sdk/vue";
 import { DefaultChatTransport, isTextUIPart, type UIMessage } from "ai";
 type TabId = "communications" | "documents";
@@ -1463,98 +1465,6 @@ function appendPilotTranscript(text: string) {
  return merged;
 }
 function getAudioContextCtor(): typeof AudioContext {
  const ctor = (globalThis as any).AudioContext ?? (globalThis as any).webkitAudioContext;
  if (!ctor) {
    throw new Error("AudioContext is not supported in this browser");
  }
  return ctor as typeof AudioContext;
 }
 function toMonoFloat32(buffer: AudioBuffer) {
  if (buffer.numberOfChannels === 1) {
    return buffer.getChannelData(0);
  }
  const out = new Float32Array(buffer.length);
  for (let channel = 0; channel < buffer.numberOfChannels; channel += 1) {
    const input = buffer.getChannelData(channel);
    for (let i = 0; i < buffer.length; i += 1) {
      const prev = out[i] ?? 0;
      out[i] = prev + (input[i] ?? 0);
    }
  }
  for (let i = 0; i < out.length; i += 1) {
    out[i] = (out[i] ?? 0) / buffer.numberOfChannels;
  }
  return out;
 }
 function resampleFloat32Linear(input: Float32Array, fromRate: number, toRate: number) {
  if (fromRate === toRate) return input;
  const ratio = fromRate / toRate;
  const outLength = Math.max(1, Math.round(input.length / ratio));
  const out = new Float32Array(outLength);
  for (let i = 0; i < outLength; i += 1) {
    const position = i * ratio;
    const left = Math.floor(position);
    const right = Math.min(input.length - 1, left + 1);
    const frac = position - left;
    out[i] = (input[left] ?? 0) * (1 - frac) + (input[right] ?? 0) * frac;
  }
  return out;
 }
 function floatToPcm16Bytes(input: Float32Array) {
  const out = new Uint8Array(input.length * 2);
  const view = new DataView(out.buffer);
  for (let i = 0; i < input.length; i += 1) {
    const sample = Math.max(-1, Math.min(1, input[i] ?? 0));
    const value = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
    view.setInt16(i * 2, Math.round(value), true);
  }
  return out;
 }
 function bytesToBase64(bytes: Uint8Array) {
  let binary = "";
  const chunk = 0x8000;
  for (let i = 0; i < bytes.length; i += chunk) {
    binary += String.fromCharCode(...bytes.subarray(i, i + chunk));
  }
  return btoa(binary);
 }
 async function decodeAudioBlobToPcm16(blob: Blob) {
  const AudioContextCtor = getAudioContextCtor();
  const context = new AudioContextCtor();
  try {
    const arrayBuffer = await blob.arrayBuffer();
    const decoded = await context.decodeAudioData(arrayBuffer);
    const mono = toMonoFloat32(decoded);
    const targetSampleRate = 16000;
    const resampled = resampleFloat32Linear(mono, decoded.sampleRate, targetSampleRate);
    const pcm16 = floatToPcm16Bytes(resampled);
    return {
      audioBase64: bytesToBase64(pcm16),
      sampleRate: targetSampleRate,
    };
  } finally {
    await context.close();
  }
 }
 async function transcribeAudioBlob(blob: Blob) {
  const payload = await decodeAudioBlobToPcm16(blob);
  const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
    method: "POST",
    body: payload,
  });
  return String(result?.text ?? "").trim();
 }
 async function transcribeRecordedPilotAudio(blob: Blob) {
  pilotMicError.value = null;
  pilotTranscribing.value = true;
@@ -2242,10 +2152,7 @@ if (process.server) {
 onMounted(() => {
  pilotHeaderText.value = pilotHeaderPhrases[Math.floor(Math.random() * pilotHeaderPhrases.length)] ?? "Every step moves you forward";
-  pilotMicSupported.value =
+  pilotMicSupported.value = isVoiceCaptureSupported();
    typeof navigator !== "undefined" &&
    typeof MediaRecorder !== "undefined" &&
    Boolean(navigator.mediaDevices?.getUserMedia);
  lifecycleClock = setInterval(() => {
    lifecycleNowMs.value = Date.now();
  }, 15000);
@@ -2295,7 +2202,6 @@ onMounted(() => {
 onBeforeUnmount(() => {
  stopCrmRealtime();
  stopCommRecording(true);
  if (pilotRecording.value) {
    stopPilotRecording("fill");
  }
@@ -3496,11 +3402,6 @@ const eventCloseError = ref<Record<string, string>>({});
 const eventArchiveRecordingById = ref<Record<string, boolean>>({});
 const eventArchiveTranscribingById = ref<Record<string, boolean>>({});
 const eventArchiveMicErrorById = ref<Record<string, string>>({});
 let commMediaRecorder: MediaRecorder | null = null;
 let commRecorderStream: MediaStream | null = null;
 let commRecorderMimeType = "audio/webm";
 let commRecordingChunks: Blob[] = [];
 let commDiscardOnStop = false;
 let eventArchiveMediaRecorder: MediaRecorder | null = null;
 let eventArchiveRecorderStream: MediaStream | null = null;
 let eventArchiveRecorderMimeType = "audio/webm";
@@ -3508,7 +3409,6 @@ let eventArchiveChunks: Blob[] = [];
 let eventArchiveTargetEventId = "";
 watch(selectedCommThreadId, () => {
  stopCommRecording(true);
  stopEventArchiveRecording();
  destroyAllCommCallWaves();
  callTranscriptOpen.value = {};
@@ -3517,6 +3417,8 @@ watch(selectedCommThreadId, () => {
  callTranscriptError.value = {};
  commPinnedOnly.value = false;
  commDraft.value = "";
  commRecording.value = false;
  commTranscribing.value = false;
  commMicError.value = "";
  commComposerMode.value = "message";
  commQuickMenuOpen.value = false;
@@ -4245,12 +4147,7 @@ async function transcribeCallItem(item: CommItem) {
      if (!res.ok) throw new Error(`Audio fetch failed: ${res.status}`);
      return res.blob();
    });
-    const payload = await decodeAudioBlobToPcm16(audioBlob);
+    const text = await transcribeAudioBlob(audioBlob);
    const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
      method: "POST",
      body: payload,
    });
    const text = String(result?.text ?? "").trim();
    callTranscriptText.value[itemId] = text || "(empty transcript)";
    await gqlFetch<{ updateCommunicationTranscript: { ok: boolean; id: string } }>(updateCommunicationTranscriptMutation, {
      id: itemId,
@@ -4603,99 +4500,12 @@ async function sendCommMessage() {
  }
 }
-async function startCommRecording() {
+function onCommDictationTranscript(text: string) {
-  if (commRecording.value || commTranscribing.value) return;
+  const next = String(text ?? "").trim();
  if (!next) return;
  const previous = String(commDraft.value ?? "").trim();
  commDraft.value = previous ? `${previous} ${next}` : next;
  commMicError.value = "";
  if (!pilotMicSupported.value) {
    commMicError.value = "Recording is not supported in this browser";
    return;
  }
  try {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    const preferredMime = "audio/webm;codecs=opus";
    const recorder = MediaRecorder.isTypeSupported(preferredMime)
      ? new MediaRecorder(stream, { mimeType: preferredMime })
      : new MediaRecorder(stream);
    commRecorderStream = stream;
    commRecorderMimeType = recorder.mimeType || "audio/webm";
    commMediaRecorder = recorder;
    commRecordingChunks = [];
    commDiscardOnStop = false;
    commRecording.value = true;
    recorder.ondataavailable = (event: BlobEvent) => {
      if (event.data?.size) commRecordingChunks.push(event.data);
    };
    recorder.onstop = async () => {
      const discard = commDiscardOnStop;
      commDiscardOnStop = false;
      commRecording.value = false;
      commMediaRecorder = null;
      if (commRecorderStream) {
        commRecorderStream.getTracks().forEach((track) => track.stop());
        commRecorderStream = null;
      }
      const audioBlob = new Blob(commRecordingChunks, { type: commRecorderMimeType });
      commRecordingChunks = [];
      if (discard || audioBlob.size === 0) return;
      commTranscribing.value = true;
      try {
        const text = await transcribeAudioBlob(audioBlob);
        if (!text) {
          commMicError.value = "Could not recognize speech";
          return;
        }
        const previous = String(commDraft.value ?? "").trim();
        commDraft.value = previous ? `${previous} ${text}` : text;
        commMicError.value = "";
      } catch (error: any) {
        commMicError.value = String(error?.data?.message ?? error?.message ?? "Voice transcription failed");
      } finally {
        commTranscribing.value = false;
      }
    };
    recorder.start();
  } catch {
    commRecording.value = false;
    commMicError.value = "No microphone access";
    if (commRecorderStream) {
      commRecorderStream.getTracks().forEach((track) => track.stop());
      commRecorderStream = null;
    }
    commMediaRecorder = null;
  }
 }
 function stopCommRecording(discard = false) {
  if (!commMediaRecorder || commMediaRecorder.state === "inactive") {
    commRecording.value = false;
    commDiscardOnStop = false;
    commRecordingChunks = [];
    if (commRecorderStream) {
      commRecorderStream.getTracks().forEach((track) => track.stop());
      commRecorderStream = null;
    }
    commMediaRecorder = null;
    return;
  }
  commDiscardOnStop = discard;
  commMediaRecorder.stop();
 }
 function toggleCommRecording() {
  if (commTranscribing.value) return;
  if (commRecording.value) {
    stopCommRecording();
    return;
  }
  void startCommRecording();
 }
 function handleCommComposerEnter(event: KeyboardEvent) {
@@ -5574,17 +5384,19 @@ async function decideFeedCard(card: FeedCard, decision: "accepted" | "rejected")
                              <path d="M20 11H7.83l4.58-4.59L11 5l-7 7 7 7 1.41-1.41L7.83 13H20z" />
                            </svg>
                          </button>
-                          <button
+                          <CrmVoiceDictationButton
                            class="btn btn-xs btn-circle border border-base-300 bg-base-100 text-base-content/80 hover:bg-base-200"
                            :class="commRecording || commTranscribing ? 'comm-mic-active' : ''"
-                            :disabled="commSending || commEventSaving || commTranscribing"
+                            :disabled="commSending || commEventSaving"
-                            :title="commRecording ? 'Stop and insert transcript' : commTranscribing ? 'Transcribing...' : 'Voice input'"
+                            :session-key="selectedCommThreadId"
-                            @click="toggleCommRecording"
+                            idle-title="Voice input"
-                          >
+                            recording-title="Stop and insert transcript"
-                            <svg viewBox="0 0 24 24" class="h-3.5 w-3.5 fill-current">
+                            transcribing-title="Transcribing..."
-                              <path d="M12 15a3 3 0 0 0 3-3V7a3 3 0 1 0-6 0v5a3 3 0 0 0 3 3m5-3a1 1 0 1 1 2 0 7 7 0 0 1-6 6.92V21h3a1 1 0 1 1 0 2H8a1 1 0 1 1 0-2h3v-2.08A7 7 0 0 1 5 12a1 1 0 1 1 2 0 5 5 0 0 0 10 0" />
+                            @update:recording="commRecording = $event"
-                            </svg>
+                            @update:transcribing="commTranscribing = $event"
-                          </button>
+                            @transcript="onCommDictationTranscript"
                            @error="commMicError = $event"
                          />
                          <button
                            class="btn btn-sm btn-circle border-0 bg-[#5865f2] text-white hover:bg-[#4752c4]"
--- a/frontend/app/components/workspace/communications/CrmVoiceDictationButton.client.vue
+++ b/frontend/app/components/workspace/communications/CrmVoiceDictationButton.client.vue
@@ -0,0 +1,168 @@
 <script setup lang="ts">
 import { onBeforeUnmount, ref, watch } from "vue";
 import { isVoiceCaptureSupported, transcribeAudioBlob } from "~~/app/composables/useVoiceTranscription";
 const props = defineProps<{
  disabled?: boolean;
  sessionKey?: string;
  idleTitle?: string;
  recordingTitle?: string;
  transcribingTitle?: string;
 }>();
 const emit = defineEmits<{
  (e: "update:recording", value: boolean): void;
  (e: "update:transcribing", value: boolean): void;
  (e: "transcript", value: string): void;
  (e: "error", value: string): void;
 }>();
 const recording = ref(false);
 const transcribing = ref(false);
 let mediaRecorder: MediaRecorder | null = null;
 let recorderStream: MediaStream | null = null;
 let recorderMimeType = "audio/webm";
 let recordingChunks: Blob[] = [];
 let discardOnStop = false;
 function setRecording(value: boolean) {
  recording.value = value;
  emit("update:recording", value);
 }
 function setTranscribing(value: boolean) {
  transcribing.value = value;
  emit("update:transcribing", value);
 }
 function clearRecorderResources() {
  if (recorderStream) {
    recorderStream.getTracks().forEach((track) => track.stop());
    recorderStream = null;
  }
  mediaRecorder = null;
  recordingChunks = [];
  discardOnStop = false;
 }
 async function startRecording() {
  if (recording.value || transcribing.value) return;
  emit("error", "");
  if (!isVoiceCaptureSupported()) {
    emit("error", "Recording is not supported in this browser");
    return;
  }
  try {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    const preferredMime = "audio/webm;codecs=opus";
    const recorder = MediaRecorder.isTypeSupported(preferredMime)
      ? new MediaRecorder(stream, { mimeType: preferredMime })
      : new MediaRecorder(stream);
    recorderStream = stream;
    recorderMimeType = recorder.mimeType || "audio/webm";
    mediaRecorder = recorder;
    recordingChunks = [];
    discardOnStop = false;
    setRecording(true);
    recorder.ondataavailable = (event: BlobEvent) => {
      if (event.data?.size) recordingChunks.push(event.data);
    };
    recorder.onstop = async () => {
      const discard = discardOnStop;
      const audioBlob = new Blob(recordingChunks, { type: recorderMimeType });
      setRecording(false);
      clearRecorderResources();
      if (discard || audioBlob.size === 0) return;
      setTranscribing(true);
      try {
        const text = await transcribeAudioBlob(audioBlob);
        if (!text) {
          emit("error", "Could not recognize speech");
          return;
        }
        emit("error", "");
        emit("transcript", text);
      } catch (error: any) {
        emit("error", String(error?.data?.message ?? error?.message ?? "Voice transcription failed"));
      } finally {
        setTranscribing(false);
      }
    };
    recorder.start();
  } catch {
    setRecording(false);
    clearRecorderResources();
    emit("error", "No microphone access");
  }
 }
 function stopRecording(discard = false) {
  if (!mediaRecorder || mediaRecorder.state === "inactive") {
    setRecording(false);
    clearRecorderResources();
    return;
  }
  discardOnStop = discard;
  mediaRecorder.stop();
 }
 function toggleRecording() {
  if (props.disabled || transcribing.value) return;
  if (recording.value) {
    stopRecording();
    return;
  }
  void startRecording();
 }
 watch(
  () => props.sessionKey,
  () => {
    if (recording.value) stopRecording(true);
  },
 );
 watch(
  () => props.disabled,
  (disabled) => {
    if (disabled && recording.value) stopRecording(true);
  },
 );
 onBeforeUnmount(() => {
  if (recording.value) {
    stopRecording(true);
    return;
  }
  clearRecorderResources();
 });
 </script>
 <template>
  <button
    type="button"
    :disabled="Boolean(props.disabled) || transcribing"
    :title="
      recording
        ? (props.recordingTitle || 'Stop and insert transcript')
        : transcribing
          ? (props.transcribingTitle || 'Transcribing...')
          : (props.idleTitle || 'Voice input')
    "
    @click="toggleRecording"
  >
    <slot :recording="recording" :transcribing="transcribing">
      <svg viewBox="0 0 24 24" class="h-3.5 w-3.5 fill-current">
        <path d="M12 15a3 3 0 0 0 3-3V7a3 3 0 1 0-6 0v5a3 3 0 0 0 3 3m5-3a1 1 0 1 1 2 0 7 7 0 0 1-6 6.92V21h3a1 1 0 1 1 0 2H8a1 1 0 1 1 0-2h3v-2.08A7 7 0 0 1 5 12a1 1 0 1 1 2 0 5 5 0 0 0 10 0" />
      </svg>
    </slot>
  </button>
 </template>
--- a/frontend/app/composables/useVoiceTranscription.ts
+++ b/frontend/app/composables/useVoiceTranscription.ts
@@ -0,0 +1,92 @@
 function getAudioContextCtor(): typeof AudioContext {
  const Ctor = (window.AudioContext || (window as any).webkitAudioContext) as typeof AudioContext | undefined;
  if (!Ctor) {
    throw new Error("Web Audio API is not supported in this browser");
  }
  return Ctor;
 }
 function toMonoFloat32(buffer: AudioBuffer) {
  if (buffer.numberOfChannels <= 1) return buffer.getChannelData(0).slice();
  const length = buffer.length;
  const output = new Float32Array(length);
  for (let i = 0; i < length; i += 1) {
    let sum = 0;
    for (let ch = 0; ch < buffer.numberOfChannels; ch += 1) {
      sum += buffer.getChannelData(ch)[i] ?? 0;
    }
    output[i] = sum / buffer.numberOfChannels;
  }
  return output;
 }
 function resampleFloat32Linear(input: Float32Array, fromRate: number, toRate: number) {
  if (fromRate === toRate) return input;
  const ratio = fromRate / toRate;
  const outLength = Math.max(1, Math.round(input.length / ratio));
  const out = new Float32Array(outLength);
  for (let i = 0; i < outLength; i += 1) {
    const src = i * ratio;
    const left = Math.floor(src);
    const right = Math.min(input.length - 1, left + 1);
    const frac = src - left;
    out[i] = (input[left] ?? 0) * (1 - frac) + (input[right] ?? 0) * frac;
  }
  return out;
 }
 function floatToPcm16Bytes(input: Float32Array) {
  const out = new Uint8Array(input.length * 2);
  const view = new DataView(out.buffer);
  for (let i = 0; i < input.length; i += 1) {
    const sample = Math.max(-1, Math.min(1, input[i] ?? 0));
    const value = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
    view.setInt16(i * 2, Math.round(value), true);
  }
  return out;
 }
 function bytesToBase64(bytes: Uint8Array) {
  let binary = "";
  const chunk = 0x8000;
  for (let i = 0; i < bytes.length; i += chunk) {
    binary += String.fromCharCode(...bytes.subarray(i, i + chunk));
  }
  return btoa(binary);
 }
 async function decodeAudioBlobToPcm16(blob: Blob) {
  const AudioContextCtor = getAudioContextCtor();
  const context = new AudioContextCtor();
  try {
    const arrayBuffer = await blob.arrayBuffer();
    const decoded = await context.decodeAudioData(arrayBuffer);
    const mono = toMonoFloat32(decoded);
    const targetSampleRate = 16000;
    const resampled = resampleFloat32Linear(mono, decoded.sampleRate, targetSampleRate);
    const pcm16 = floatToPcm16Bytes(resampled);
    return {
      audioBase64: bytesToBase64(pcm16),
      sampleRate: targetSampleRate,
    };
  } finally {
    await context.close();
  }
 }
 export function isVoiceCaptureSupported() {
  if (typeof window === "undefined") return false;
  if (typeof navigator === "undefined") return false;
  return typeof MediaRecorder !== "undefined" && Boolean(navigator.mediaDevices?.getUserMedia);
 }
 export async function transcribeAudioBlob(blob: Blob) {
  const payload = await decodeAudioBlobToPcm16(blob);
  const result = await $fetch<{ text?: string }>("/api/pilot-transcribe", {
    method: "POST",
    body: payload,
  });
  return String(result?.text ?? "").trim();
 }