precompute call waveforms and stop list-time audio loading

This commit is contained in:
Ruslan Bakiev
2026-02-23 16:02:57 +07:00
parent 2eb2f3109c
commit 94c01516ba
9 changed files with 128 additions and 7 deletions

View File

@@ -158,6 +158,7 @@ model ContactMessage {
content String
audioUrl String?
durationSec Int?
waveformJson Json?
transcriptJson Json?
occurredAt DateTime @default(now())
createdAt DateTime @default(now())

View File

@@ -29,6 +29,7 @@ type OmniInboundEnvelopeV1 = {
export const RECEIVER_FLOW_QUEUE_NAME = (process.env.RECEIVER_FLOW_QUEUE_NAME || "receiver.flow").trim();
const TELEGRAM_PLACEHOLDER_PREFIX = "Telegram ";
const TELEGRAM_AUDIO_FILE_MARKER = "tg-file:";
const TELEGRAM_WAVE_BINS = 96;
function redisConnectionFromEnv(): ConnectionOptions {
const raw = (process.env.REDIS_URL || "redis://localhost:6379").trim();
@@ -87,6 +88,101 @@ function fallbackTextFromMedia(media: TelegramInboundMedia) {
return "[audio]";
}
function buildFallbackWaveform(seedText: string, bins = TELEGRAM_WAVE_BINS) {
let seed = 0;
for (let i = 0; i < seedText.length; i += 1) {
seed = (seed * 33 + seedText.charCodeAt(i)) >>> 0;
}
const random = () => {
seed = (seed * 1664525 + 1013904223) >>> 0;
return seed / 0xffffffff;
};
const out: number[] = [];
let smooth = 0;
for (let i = 0; i < bins; i += 1) {
const t = i / Math.max(1, bins - 1);
const burst = Math.max(0, Math.sin(t * Math.PI * (2 + (seedText.length % 5))));
const noise = (random() * 2 - 1) * 0.6;
smooth = smooth * 0.72 + noise * 0.28;
const value = Math.max(0.06, Math.min(1, 0.12 + Math.abs(smooth) * 0.42 + burst * 0.4));
out.push(Number(value.toFixed(4)));
}
return out;
}
function buildWaveformFromBytes(bytes: Uint8Array, bins = TELEGRAM_WAVE_BINS) {
if (!bytes.length) return [];
const bucketSize = Math.max(1, Math.ceil(bytes.length / bins));
const raw = new Array<number>(bins).fill(0);
for (let i = 0; i < bins; i += 1) {
const start = i * bucketSize;
const end = Math.min(bytes.length, start + bucketSize);
if (start >= end) continue;
let energy = 0;
for (let j = start; j < end; j += 1) {
energy += Math.abs(bytes[j] - 128) / 128;
}
raw[i] = energy / (end - start);
}
const smooth: number[] = [];
let prev = 0;
for (const value of raw) {
prev = prev * 0.78 + value * 0.22;
smooth.push(prev);
}
const maxValue = Math.max(...smooth, 0);
if (maxValue <= 0) return [];
return smooth.map((value) => {
const normalized = value / maxValue;
const mapped = Math.max(0.06, Math.min(1, normalized * 0.9 + 0.06));
return Number(mapped.toFixed(4));
});
}
async function fetchTelegramFileBytes(fileId: string) {
const token = String(process.env.TELEGRAM_BOT_TOKEN ?? "").trim();
if (!token) return null;
const base = String(process.env.TELEGRAM_API_BASE ?? "https://api.telegram.org").replace(/\/+$/, "");
const metaRes = await fetch(`${base}/bot${token}/getFile`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({ file_id: fileId }),
});
const metaJson = (await metaRes.json().catch(() => null)) as
| { ok?: boolean; result?: { file_path?: string } }
| null;
const filePath = String(metaJson?.result?.file_path ?? "").trim();
if (!metaRes.ok || !metaJson?.ok || !filePath) return null;
const fileRes = await fetch(`${base}/file/bot${token}/${filePath}`);
if (!fileRes.ok) return null;
return new Uint8Array(await fileRes.arrayBuffer());
}
async function resolveInboundWaveform(media: TelegramInboundMedia, text: string) {
const fallback = buildFallbackWaveform(`${media.fileId ?? "none"}:${media.durationSec ?? "0"}:${text}`);
const fileId = media.fileId;
if (!fileId) return fallback;
try {
const bytes = await fetchTelegramFileBytes(fileId);
if (!bytes?.length) return fallback;
const fromFile = buildWaveformFromBytes(bytes);
return fromFile.length ? fromFile : fallback;
} catch {
return fallback;
}
}
function parseOccurredAt(input: string | null | undefined) {
const d = new Date(String(input ?? ""));
if (Number.isNaN(d.getTime())) return new Date();
@@ -380,6 +476,7 @@ async function ingestInbound(env: OmniInboundEnvelopeV1) {
const isAudioLike = Boolean(media.fileId) && (media.kind === "voice" || media.kind === "audio" || media.kind === "video_note");
const contactMessageKind: "MESSAGE" | "CALL" = isAudioLike ? "CALL" : "MESSAGE";
const contactMessageAudioUrl = isAudioLike ? `${TELEGRAM_AUDIO_FILE_MARKER}${media.fileId}` : null;
const waveformPeaks = isAudioLike ? await resolveInboundWaveform(media, text) : null;
const occurredAt = parseOccurredAt(env.occurredAt);
const direction = safeDirection(env.direction);
const contactProfile = buildContactProfile(n, externalContactId);
@@ -482,6 +579,7 @@ async function ingestInbound(env: OmniInboundEnvelopeV1) {
content: text,
audioUrl: contactMessageAudioUrl,
durationSec: media.durationSec,
...(waveformPeaks ? { waveformJson: waveformPeaks as Prisma.InputJsonValue } : {}),
occurredAt,
},
});