precompute call waveforms and stop list-time audio loading

2026-02-23 16:02:57 +07:00
parent 2eb2f3109c
commit 94c01516ba
9 changed files with 128 additions and 7 deletions
--- a/omni_chat/prisma/schema.prisma
+++ b/omni_chat/prisma/schema.prisma
@@ -158,6 +158,7 @@ model ContactMessage {
  content        String
  audioUrl       String?
  durationSec    Int?
+  waveformJson   Json?
  transcriptJson Json?
  occurredAt     DateTime           @default(now())
  createdAt      DateTime           @default(now())
--- a/omni_chat/src/worker.ts
+++ b/omni_chat/src/worker.ts
@@ -29,6 +29,7 @@ type OmniInboundEnvelopeV1 = {
 export const RECEIVER_FLOW_QUEUE_NAME = (process.env.RECEIVER_FLOW_QUEUE_NAME || "receiver.flow").trim();
 const TELEGRAM_PLACEHOLDER_PREFIX = "Telegram ";
 const TELEGRAM_AUDIO_FILE_MARKER = "tg-file:";
+const TELEGRAM_WAVE_BINS = 96;

 function redisConnectionFromEnv(): ConnectionOptions {
  const raw = (process.env.REDIS_URL || "redis://localhost:6379").trim();
@@ -87,6 +88,101 @@ function fallbackTextFromMedia(media: TelegramInboundMedia) {
  return "[audio]";
 }

+function buildFallbackWaveform(seedText: string, bins = TELEGRAM_WAVE_BINS) {
+  let seed = 0;
+  for (let i = 0; i < seedText.length; i += 1) {
+    seed = (seed * 33 + seedText.charCodeAt(i)) >>> 0;
+  }
+
+  const random = () => {
+    seed = (seed * 1664525 + 1013904223) >>> 0;
+    return seed / 0xffffffff;
+  };
+
+  const out: number[] = [];
+  let smooth = 0;
+  for (let i = 0; i < bins; i += 1) {
+    const t = i / Math.max(1, bins - 1);
+    const burst = Math.max(0, Math.sin(t * Math.PI * (2 + (seedText.length % 5))));
+    const noise = (random() * 2 - 1) * 0.6;
+    smooth = smooth * 0.72 + noise * 0.28;
+    const value = Math.max(0.06, Math.min(1, 0.12 + Math.abs(smooth) * 0.42 + burst * 0.4));
+    out.push(Number(value.toFixed(4)));
+  }
+  return out;
+}
+
+function buildWaveformFromBytes(bytes: Uint8Array, bins = TELEGRAM_WAVE_BINS) {
+  if (!bytes.length) return [];
+  const bucketSize = Math.max(1, Math.ceil(bytes.length / bins));
+  const raw = new Array<number>(bins).fill(0);
+
+  for (let i = 0; i < bins; i += 1) {
+    const start = i * bucketSize;
+    const end = Math.min(bytes.length, start + bucketSize);
+    if (start >= end) continue;
+
+    let energy = 0;
+    for (let j = start; j < end; j += 1) {
+      energy += Math.abs(bytes[j] - 128) / 128;
+    }
+    raw[i] = energy / (end - start);
+  }
+
+  const smooth: number[] = [];
+  let prev = 0;
+  for (const value of raw) {
+    prev = prev * 0.78 + value * 0.22;
+    smooth.push(prev);
+  }
+
+  const maxValue = Math.max(...smooth, 0);
+  if (maxValue <= 0) return [];
+
+  return smooth.map((value) => {
+    const normalized = value / maxValue;
+    const mapped = Math.max(0.06, Math.min(1, normalized * 0.9 + 0.06));
+    return Number(mapped.toFixed(4));
+  });
+}
+
+async function fetchTelegramFileBytes(fileId: string) {
+  const token = String(process.env.TELEGRAM_BOT_TOKEN ?? "").trim();
+  if (!token) return null;
+
+  const base = String(process.env.TELEGRAM_API_BASE ?? "https://api.telegram.org").replace(/\/+$/, "");
+
+  const metaRes = await fetch(`${base}/bot${token}/getFile`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ file_id: fileId }),
+  });
+  const metaJson = (await metaRes.json().catch(() => null)) as
+    | { ok?: boolean; result?: { file_path?: string } }
+    | null;
+  const filePath = String(metaJson?.result?.file_path ?? "").trim();
+  if (!metaRes.ok || !metaJson?.ok || !filePath) return null;
+
+  const fileRes = await fetch(`${base}/file/bot${token}/${filePath}`);
+  if (!fileRes.ok) return null;
+  return new Uint8Array(await fileRes.arrayBuffer());
+}
+
+async function resolveInboundWaveform(media: TelegramInboundMedia, text: string) {
+  const fallback = buildFallbackWaveform(`${media.fileId ?? "none"}:${media.durationSec ?? "0"}:${text}`);
+  const fileId = media.fileId;
+  if (!fileId) return fallback;
+
+  try {
+    const bytes = await fetchTelegramFileBytes(fileId);
+    if (!bytes?.length) return fallback;
+    const fromFile = buildWaveformFromBytes(bytes);
+    return fromFile.length ? fromFile : fallback;
+  } catch {
+    return fallback;
+  }
+}
+
 function parseOccurredAt(input: string | null | undefined) {
  const d = new Date(String(input ?? ""));
  if (Number.isNaN(d.getTime())) return new Date();
@@ -380,6 +476,7 @@ async function ingestInbound(env: OmniInboundEnvelopeV1) {
  const isAudioLike = Boolean(media.fileId) && (media.kind === "voice" || media.kind === "audio" || media.kind === "video_note");
  const contactMessageKind: "MESSAGE" | "CALL" = isAudioLike ? "CALL" : "MESSAGE";
  const contactMessageAudioUrl = isAudioLike ? `${TELEGRAM_AUDIO_FILE_MARKER}${media.fileId}` : null;
+  const waveformPeaks = isAudioLike ? await resolveInboundWaveform(media, text) : null;
  const occurredAt = parseOccurredAt(env.occurredAt);
  const direction = safeDirection(env.direction);
  const contactProfile = buildContactProfile(n, externalContactId);
@@ -482,6 +579,7 @@ async function ingestInbound(env: OmniInboundEnvelopeV1) {
      content: text,
      audioUrl: contactMessageAudioUrl,
      durationSec: media.durationSec,
+      ...(waveformPeaks ? { waveformJson: waveformPeaks as Prisma.InputJsonValue } : {}),
      occurredAt,
    },
  });