Add Deepgram and OpenRouter voice processing
Some checks failed
Build and deploy Worker / build (push) Failing after 6m46s

This commit is contained in:
Ruslan Bakiev
2026-05-14 08:44:20 +07:00
parent f6caaac28f
commit 53a7cb7cc8
11 changed files with 663 additions and 171 deletions

View File

@@ -7,7 +7,7 @@ on:
jobs: jobs:
build: build:
runs-on: build-host runs-on: builder
env: env:
SERVICE_NAME: worker SERVICE_NAME: worker
IMAGE_SHA: gitea.dsrptlab.com/mapflow/worker:${{ github.sha }} IMAGE_SHA: gitea.dsrptlab.com/mapflow/worker:${{ github.sha }}
@@ -25,12 +25,6 @@ jobs:
- name: Build and push image - name: Build and push image
run: | run: |
set -euo pipefail set -euo pipefail
builder="builder"
if ! docker buildx inspect "$builder" >/dev/null 2>&1; then
docker buildx create --name "$builder" --driver docker-container --buildkitd-config /etc/buildkit/buildkitd.toml
fi
docker buildx use "$builder"
docker buildx inspect --bootstrap
docker buildx build --push --tag "$IMAGE_SHA" --tag "$IMAGE_LATEST" . docker buildx build --push --tag "$IMAGE_SHA" --tag "$IMAGE_LATEST" .
- name: Skip stale deployment - name: Skip stale deployment
@@ -58,8 +52,3 @@ jobs:
-d "$payload")" -d "$payload")"
cat "$response_file" cat "$response_file"
[ "$status_code" = "200" ] [ "$status_code" = "200" ]
- name: Prune shared BuildKit cache
run: |
set -euo pipefail
docker buildx prune --builder builder --all --max-used-space 40gb -f

View File

@@ -17,14 +17,16 @@ enum VoiceExperienceStatus {
} }
model Place { model Place {
id String @id @default(cuid()) id String @id @default(cuid())
googlePlaceId String @unique googlePlaceId String @unique
name String name String
latitude Float latitude Float
longitude Float longitude Float
experiences VoiceExperience[] googlePrimaryType String?
createdAt DateTime @default(now()) googleTypes String[] @default([])
updatedAt DateTime @updatedAt experiences VoiceExperience[]
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
} }
model User { model User {
@@ -35,22 +37,26 @@ model User {
lastName String? lastName String?
photoUrl String? photoUrl String?
languageCode String? languageCode String?
isAdmin Boolean @default(false)
voiceExperiences VoiceExperience[] voiceExperiences VoiceExperience[]
createdAt DateTime @default(now()) createdAt DateTime @default(now())
updatedAt DateTime @updatedAt updatedAt DateTime @updatedAt
} }
model VoiceExperience { model VoiceExperience {
id String @id @default(cuid()) id String @id @default(cuid())
placeId String placeId String
place Place @relation(fields: [placeId], references: [id]) place Place @relation(fields: [placeId], references: [id])
userId String? userId String?
user User? @relation(fields: [userId], references: [id]) user User? @relation(fields: [userId], references: [id])
durationSeconds Int durationSeconds Int
audioObjectKey String audioObjectKey String
status VoiceExperienceStatus @default(UPLOADED) audioContentBase64 String?
transcript String? audioMimeType String?
analysis Json? audioAccessToken String? @unique
createdAt DateTime @default(now()) status VoiceExperienceStatus @default(UPLOADED)
updatedAt DateTime @updatedAt transcript String?
analysis Json?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
} }

View File

@@ -8,4 +8,11 @@ export const config = {
databaseUrl: process.env.DATABASE_URL ?? '', databaseUrl: process.env.DATABASE_URL ?? '',
workerName: process.env.HATCHET_WORKER_NAME ?? 'mapflow-hatchet-worker', workerName: process.env.HATCHET_WORKER_NAME ?? 'mapflow-hatchet-worker',
workerSlots: Number.parseInt(process.env.HATCHET_WORKER_SLOTS ?? '4', 10), workerSlots: Number.parseInt(process.env.HATCHET_WORKER_SLOTS ?? '4', 10),
publicApiUrl: process.env.PUBLIC_API_URL ?? 'https://api.map.craftee.vn',
deepgramApiKey: process.env.DEEPGRAM_API_KEY ?? '',
deepgramModel: process.env.DEEPGRAM_MODEL ?? 'nova-3',
deepgramLanguage: process.env.DEEPGRAM_LANGUAGE ?? 'ru',
openRouterApiKey: process.env.OPENROUTER_API_KEY ?? '',
openRouterModel:
process.env.OPENROUTER_MODEL ?? 'minimax/minimax-m2.5:free',
}; };

File diff suppressed because one or more lines are too long

View File

@@ -126,6 +126,8 @@ exports.Prisma.PlaceScalarFieldEnum = {
name: 'name', name: 'name',
latitude: 'latitude', latitude: 'latitude',
longitude: 'longitude', longitude: 'longitude',
googlePrimaryType: 'googlePrimaryType',
googleTypes: 'googleTypes',
createdAt: 'createdAt', createdAt: 'createdAt',
updatedAt: 'updatedAt' updatedAt: 'updatedAt'
}; };
@@ -138,6 +140,7 @@ exports.Prisma.UserScalarFieldEnum = {
lastName: 'lastName', lastName: 'lastName',
photoUrl: 'photoUrl', photoUrl: 'photoUrl',
languageCode: 'languageCode', languageCode: 'languageCode',
isAdmin: 'isAdmin',
createdAt: 'createdAt', createdAt: 'createdAt',
updatedAt: 'updatedAt' updatedAt: 'updatedAt'
}; };
@@ -148,6 +151,9 @@ exports.Prisma.VoiceExperienceScalarFieldEnum = {
userId: 'userId', userId: 'userId',
durationSeconds: 'durationSeconds', durationSeconds: 'durationSeconds',
audioObjectKey: 'audioObjectKey', audioObjectKey: 'audioObjectKey',
audioContentBase64: 'audioContentBase64',
audioMimeType: 'audioMimeType',
audioAccessToken: 'audioAccessToken',
status: 'status', status: 'status',
transcript: 'transcript', transcript: 'transcript',
analysis: 'analysis', analysis: 'analysis',

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,5 @@
{ {
"name": "prisma-client-359590a92aedb32557346cd24ba7dd77e7dd4a806bf36f6641715b1b22de87e3", "name": "prisma-client-404088dc715856cb4ed2e7f2d4b8eb35c87427ce32c0e67bc04d6ff67961fc2b",
"main": "index.js", "main": "index.js",
"types": "index.d.ts", "types": "index.d.ts",
"browser": "default.js", "browser": "default.js",

View File

@@ -17,14 +17,16 @@ enum VoiceExperienceStatus {
} }
model Place { model Place {
id String @id @default(cuid()) id String @id @default(cuid())
googlePlaceId String @unique googlePlaceId String @unique
name String name String
latitude Float latitude Float
longitude Float longitude Float
experiences VoiceExperience[] googlePrimaryType String?
createdAt DateTime @default(now()) googleTypes String[] @default([])
updatedAt DateTime @updatedAt experiences VoiceExperience[]
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
} }
model User { model User {
@@ -35,22 +37,26 @@ model User {
lastName String? lastName String?
photoUrl String? photoUrl String?
languageCode String? languageCode String?
isAdmin Boolean @default(false)
voiceExperiences VoiceExperience[] voiceExperiences VoiceExperience[]
createdAt DateTime @default(now()) createdAt DateTime @default(now())
updatedAt DateTime @updatedAt updatedAt DateTime @updatedAt
} }
model VoiceExperience { model VoiceExperience {
id String @id @default(cuid()) id String @id @default(cuid())
placeId String placeId String
place Place @relation(fields: [placeId], references: [id]) place Place @relation(fields: [placeId], references: [id])
userId String? userId String?
user User? @relation(fields: [userId], references: [id]) user User? @relation(fields: [userId], references: [id])
durationSeconds Int durationSeconds Int
audioObjectKey String audioObjectKey String
status VoiceExperienceStatus @default(UPLOADED) audioContentBase64 String?
transcript String? audioMimeType String?
analysis Json? audioAccessToken String? @unique
createdAt DateTime @default(now()) status VoiceExperienceStatus @default(UPLOADED)
updatedAt DateTime @updatedAt transcript String?
analysis Json?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
} }

View File

@@ -1,6 +1,27 @@
import { buildPlaceAnalysis } from '../ontology/place-ontology.js'; import { config } from '../config.js';
import { placeOntology } from '../ontology/place-ontology.js';
import { prisma } from '../prisma.js'; import { prisma } from '../prisma.js';
type OpenRouterResponse = {
choices?: Array<{
message?: {
content?: string;
};
}>;
};
type PlaceAnalysis = {
placeName: string;
tags: string[];
signals: Array<{
axis: string;
leaf: string;
evidence: string;
confidence: number;
}>;
summary: string;
};
export async function analyzeVoiceExperience(experienceId: string) { export async function analyzeVoiceExperience(experienceId: string) {
const experience = await prisma.voiceExperience.findUnique({ const experience = await prisma.voiceExperience.findUnique({
where: { id: experienceId }, where: { id: experienceId },
@@ -20,7 +41,7 @@ export async function analyzeVoiceExperience(experienceId: string) {
data: { status: 'ANALYZING' }, data: { status: 'ANALYZING' },
}); });
const analysis = buildPlaceAnalysis({ const analysis = await buildPlaceAnalysis({
placeName: experience.place.name, placeName: experience.place.name,
transcript: experience.transcript, transcript: experience.transcript,
}); });
@@ -35,3 +56,98 @@ export async function analyzeVoiceExperience(experienceId: string) {
return { tags: analysis.tags }; return { tags: analysis.tags };
} }
async function buildPlaceAnalysis(input: {
placeName: string;
transcript: string;
}): Promise<PlaceAnalysis> {
if (!config.openRouterApiKey) {
throw new Error('OPENROUTER_API_KEY is required for voice analysis.');
}
const ontology = placeOntology.map((axis) => ({
axis: axis.id,
options: axis.leaves.map((leaf) => ({
tag: `${axis.id}:${leaf.id}`,
keywords: leaf.keywords,
})),
}));
const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
method: 'POST',
headers: {
authorization: `Bearer ${config.openRouterApiKey}`,
'content-type': 'application/json',
'http-referer': 'https://map.craftee.vn',
'x-title': 'MapFlow',
},
body: JSON.stringify({
model: config.openRouterModel,
temperature: 0.1,
max_tokens: 900,
messages: [
{
role: 'system',
content:
'You classify voice reviews about places. Return only valid JSON. Use only allowed ontology tags.',
},
{
role: 'user',
content: JSON.stringify({
task:
'Analyze the transcript and select place-experience tags that are explicitly supported by the text. Do not invent facts.',
outputShape: {
placeName: 'string',
tags: ['axis:leaf'],
signals: [
{
axis: 'string',
leaf: 'string',
evidence: 'short quote or paraphrase from transcript',
confidence: 'number from 0 to 1',
},
],
summary: 'one short sentence',
},
placeName: input.placeName,
ontology,
transcript: input.transcript,
}),
},
],
}),
});
if (!response.ok) {
throw new Error(`OpenRouter analysis failed with ${response.status}.`);
}
const payload = (await response.json()) as OpenRouterResponse;
const content = payload.choices?.[0]?.message?.content;
if (!content) {
throw new Error('OpenRouter returned an empty analysis.');
}
const analysis = JSON.parse(content) as PlaceAnalysis;
assertValidAnalysis(analysis);
return analysis;
}
function assertValidAnalysis(analysis: PlaceAnalysis) {
const allowedTags = new Set(
placeOntology.flatMap((axis) =>
axis.leaves.map((leaf) => `${axis.id}:${leaf.id}`),
),
);
if (!Array.isArray(analysis.tags)) {
throw new Error('OpenRouter analysis tags must be an array.');
}
for (const tag of analysis.tags) {
if (!allowedTags.has(tag)) {
throw new Error(`OpenRouter returned unsupported tag: ${tag}.`);
}
}
if (!Array.isArray(analysis.signals)) {
throw new Error('OpenRouter analysis signals must be an array.');
}
}

View File

@@ -1,5 +1,19 @@
import { config } from '../config.js';
import { prisma } from '../prisma.js'; import { prisma } from '../prisma.js';
type DeepgramResponse = {
results?: {
channels?: Array<{
alternatives?: Array<{
transcript?: string;
paragraphs?: {
transcript?: string;
};
}>;
}>;
};
};
export async function transcribeVoiceExperience(experienceId: string) { export async function transcribeVoiceExperience(experienceId: string) {
const experience = await prisma.voiceExperience.findUnique({ const experience = await prisma.voiceExperience.findUnique({
where: { id: experienceId }, where: { id: experienceId },
@@ -14,11 +28,13 @@ export async function transcribeVoiceExperience(experienceId: string) {
data: { status: 'TRANSCRIBING' }, data: { status: 'TRANSCRIBING' },
}); });
const transcript = await transcribeAudioObject(experience.audioObjectKey); const transcript = await transcribeAudioObject(experience.id);
await prisma.voiceExperience.update({ await prisma.voiceExperience.update({
where: { id: experienceId }, where: { id: experienceId },
data: { data: {
audioContentBase64: null,
audioAccessToken: null,
transcript, transcript,
status: 'TRANSCRIBED', status: 'TRANSCRIBED',
}, },
@@ -27,7 +43,68 @@ export async function transcribeVoiceExperience(experienceId: string) {
return { transcript }; return { transcript };
} }
async function transcribeAudioObject(audioObjectKey: string): Promise<string> { async function transcribeAudioObject(experienceId: string): Promise<string> {
// TODO: replace this adapter with the production speech-to-text provider. if (!config.deepgramApiKey) {
return `Transcription placeholder for ${audioObjectKey}`; throw new Error('DEEPGRAM_API_KEY is required for transcription.');
}
const experience = await prisma.voiceExperience.findUnique({
where: { id: experienceId },
select: {
id: true,
audioObjectKey: true,
audioAccessToken: true,
audioContentBase64: true,
audioMimeType: true,
},
});
if (!experience) {
throw new Error(`Voice experience ${experienceId} was not found.`);
}
if (!experience.audioAccessToken) {
throw new Error(
`Voice audio ${experience.audioObjectKey} has no access token.`,
);
}
if (!experience.audioContentBase64 || !experience.audioMimeType) {
throw new Error(
`Voice audio ${experience.audioObjectKey} has no stored content.`,
);
}
const params = new URLSearchParams({
model: config.deepgramModel,
language: config.deepgramLanguage,
smart_format: 'true',
punctuate: 'true',
});
const audioUrl = `${config.publicApiUrl.replace(/\/$/, '')}/audio/voice-experiences/${experience.id}?token=${encodeURIComponent(experience.audioAccessToken)}`;
const response = await fetch(
`https://api.deepgram.com/v1/listen?${params.toString()}`,
{
method: 'POST',
headers: {
authorization: `Token ${config.deepgramApiKey}`,
'content-type': 'application/json',
},
body: JSON.stringify({ url: audioUrl }),
},
);
if (!response.ok) {
throw new Error(`Deepgram transcription failed with ${response.status}.`);
}
const payload = (await response.json()) as DeepgramResponse;
const alternative = payload.results?.channels?.[0]?.alternatives?.[0];
const transcript =
alternative?.paragraphs?.transcript?.trim() ??
alternative?.transcript?.trim() ??
'';
if (!transcript) {
throw new Error('Deepgram returned an empty transcript.');
}
return transcript;
} }