Add Deepgram and OpenRouter voice processing
Some checks failed
Build and deploy Worker / build (push) Failing after 6m46s
Some checks failed
Build and deploy Worker / build (push) Failing after 6m46s
This commit is contained in:
@@ -7,7 +7,7 @@ on:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: build-host
|
||||
runs-on: builder
|
||||
env:
|
||||
SERVICE_NAME: worker
|
||||
IMAGE_SHA: gitea.dsrptlab.com/mapflow/worker:${{ github.sha }}
|
||||
@@ -25,12 +25,6 @@ jobs:
|
||||
- name: Build and push image
|
||||
run: |
|
||||
set -euo pipefail
|
||||
builder="builder"
|
||||
if ! docker buildx inspect "$builder" >/dev/null 2>&1; then
|
||||
docker buildx create --name "$builder" --driver docker-container --buildkitd-config /etc/buildkit/buildkitd.toml
|
||||
fi
|
||||
docker buildx use "$builder"
|
||||
docker buildx inspect --bootstrap
|
||||
docker buildx build --push --tag "$IMAGE_SHA" --tag "$IMAGE_LATEST" .
|
||||
|
||||
- name: Skip stale deployment
|
||||
@@ -58,8 +52,3 @@ jobs:
|
||||
-d "$payload")"
|
||||
cat "$response_file"
|
||||
[ "$status_code" = "200" ]
|
||||
|
||||
- name: Prune shared BuildKit cache
|
||||
run: |
|
||||
set -euo pipefail
|
||||
docker buildx prune --builder builder --all --max-used-space 40gb -f
|
||||
|
||||
@@ -17,14 +17,16 @@ enum VoiceExperienceStatus {
|
||||
}
|
||||
|
||||
model Place {
|
||||
id String @id @default(cuid())
|
||||
googlePlaceId String @unique
|
||||
name String
|
||||
latitude Float
|
||||
longitude Float
|
||||
experiences VoiceExperience[]
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
id String @id @default(cuid())
|
||||
googlePlaceId String @unique
|
||||
name String
|
||||
latitude Float
|
||||
longitude Float
|
||||
googlePrimaryType String?
|
||||
googleTypes String[] @default([])
|
||||
experiences VoiceExperience[]
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
}
|
||||
|
||||
model User {
|
||||
@@ -35,22 +37,26 @@ model User {
|
||||
lastName String?
|
||||
photoUrl String?
|
||||
languageCode String?
|
||||
isAdmin Boolean @default(false)
|
||||
voiceExperiences VoiceExperience[]
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
}
|
||||
|
||||
model VoiceExperience {
|
||||
id String @id @default(cuid())
|
||||
placeId String
|
||||
place Place @relation(fields: [placeId], references: [id])
|
||||
userId String?
|
||||
user User? @relation(fields: [userId], references: [id])
|
||||
durationSeconds Int
|
||||
audioObjectKey String
|
||||
status VoiceExperienceStatus @default(UPLOADED)
|
||||
transcript String?
|
||||
analysis Json?
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
id String @id @default(cuid())
|
||||
placeId String
|
||||
place Place @relation(fields: [placeId], references: [id])
|
||||
userId String?
|
||||
user User? @relation(fields: [userId], references: [id])
|
||||
durationSeconds Int
|
||||
audioObjectKey String
|
||||
audioContentBase64 String?
|
||||
audioMimeType String?
|
||||
audioAccessToken String? @unique
|
||||
status VoiceExperienceStatus @default(UPLOADED)
|
||||
transcript String?
|
||||
analysis Json?
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
}
|
||||
|
||||
@@ -8,4 +8,11 @@ export const config = {
|
||||
databaseUrl: process.env.DATABASE_URL ?? '',
|
||||
workerName: process.env.HATCHET_WORKER_NAME ?? 'mapflow-hatchet-worker',
|
||||
workerSlots: Number.parseInt(process.env.HATCHET_WORKER_SLOTS ?? '4', 10),
|
||||
publicApiUrl: process.env.PUBLIC_API_URL ?? 'https://api.map.craftee.vn',
|
||||
deepgramApiKey: process.env.DEEPGRAM_API_KEY ?? '',
|
||||
deepgramModel: process.env.DEEPGRAM_MODEL ?? 'nova-3',
|
||||
deepgramLanguage: process.env.DEEPGRAM_LANGUAGE ?? 'ru',
|
||||
openRouterApiKey: process.env.OPENROUTER_API_KEY ?? '',
|
||||
openRouterModel:
|
||||
process.env.OPENROUTER_MODEL ?? 'minimax/minimax-m2.5:free',
|
||||
};
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -126,6 +126,8 @@ exports.Prisma.PlaceScalarFieldEnum = {
|
||||
name: 'name',
|
||||
latitude: 'latitude',
|
||||
longitude: 'longitude',
|
||||
googlePrimaryType: 'googlePrimaryType',
|
||||
googleTypes: 'googleTypes',
|
||||
createdAt: 'createdAt',
|
||||
updatedAt: 'updatedAt'
|
||||
};
|
||||
@@ -138,6 +140,7 @@ exports.Prisma.UserScalarFieldEnum = {
|
||||
lastName: 'lastName',
|
||||
photoUrl: 'photoUrl',
|
||||
languageCode: 'languageCode',
|
||||
isAdmin: 'isAdmin',
|
||||
createdAt: 'createdAt',
|
||||
updatedAt: 'updatedAt'
|
||||
};
|
||||
@@ -148,6 +151,9 @@ exports.Prisma.VoiceExperienceScalarFieldEnum = {
|
||||
userId: 'userId',
|
||||
durationSeconds: 'durationSeconds',
|
||||
audioObjectKey: 'audioObjectKey',
|
||||
audioContentBase64: 'audioContentBase64',
|
||||
audioMimeType: 'audioMimeType',
|
||||
audioAccessToken: 'audioAccessToken',
|
||||
status: 'status',
|
||||
transcript: 'transcript',
|
||||
analysis: 'analysis',
|
||||
|
||||
481
src/generated/prisma/index.d.ts
vendored
481
src/generated/prisma/index.d.ts
vendored
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"name": "prisma-client-359590a92aedb32557346cd24ba7dd77e7dd4a806bf36f6641715b1b22de87e3",
|
||||
"name": "prisma-client-404088dc715856cb4ed2e7f2d4b8eb35c87427ce32c0e67bc04d6ff67961fc2b",
|
||||
"main": "index.js",
|
||||
"types": "index.d.ts",
|
||||
"browser": "default.js",
|
||||
|
||||
@@ -17,14 +17,16 @@ enum VoiceExperienceStatus {
|
||||
}
|
||||
|
||||
model Place {
|
||||
id String @id @default(cuid())
|
||||
googlePlaceId String @unique
|
||||
name String
|
||||
latitude Float
|
||||
longitude Float
|
||||
experiences VoiceExperience[]
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
id String @id @default(cuid())
|
||||
googlePlaceId String @unique
|
||||
name String
|
||||
latitude Float
|
||||
longitude Float
|
||||
googlePrimaryType String?
|
||||
googleTypes String[] @default([])
|
||||
experiences VoiceExperience[]
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
}
|
||||
|
||||
model User {
|
||||
@@ -35,22 +37,26 @@ model User {
|
||||
lastName String?
|
||||
photoUrl String?
|
||||
languageCode String?
|
||||
isAdmin Boolean @default(false)
|
||||
voiceExperiences VoiceExperience[]
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
}
|
||||
|
||||
model VoiceExperience {
|
||||
id String @id @default(cuid())
|
||||
placeId String
|
||||
place Place @relation(fields: [placeId], references: [id])
|
||||
userId String?
|
||||
user User? @relation(fields: [userId], references: [id])
|
||||
durationSeconds Int
|
||||
audioObjectKey String
|
||||
status VoiceExperienceStatus @default(UPLOADED)
|
||||
transcript String?
|
||||
analysis Json?
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
id String @id @default(cuid())
|
||||
placeId String
|
||||
place Place @relation(fields: [placeId], references: [id])
|
||||
userId String?
|
||||
user User? @relation(fields: [userId], references: [id])
|
||||
durationSeconds Int
|
||||
audioObjectKey String
|
||||
audioContentBase64 String?
|
||||
audioMimeType String?
|
||||
audioAccessToken String? @unique
|
||||
status VoiceExperienceStatus @default(UPLOADED)
|
||||
transcript String?
|
||||
analysis Json?
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
}
|
||||
|
||||
@@ -1,6 +1,27 @@
|
||||
import { buildPlaceAnalysis } from '../ontology/place-ontology.js';
|
||||
import { config } from '../config.js';
|
||||
import { placeOntology } from '../ontology/place-ontology.js';
|
||||
import { prisma } from '../prisma.js';
|
||||
|
||||
type OpenRouterResponse = {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: string;
|
||||
};
|
||||
}>;
|
||||
};
|
||||
|
||||
type PlaceAnalysis = {
|
||||
placeName: string;
|
||||
tags: string[];
|
||||
signals: Array<{
|
||||
axis: string;
|
||||
leaf: string;
|
||||
evidence: string;
|
||||
confidence: number;
|
||||
}>;
|
||||
summary: string;
|
||||
};
|
||||
|
||||
export async function analyzeVoiceExperience(experienceId: string) {
|
||||
const experience = await prisma.voiceExperience.findUnique({
|
||||
where: { id: experienceId },
|
||||
@@ -20,7 +41,7 @@ export async function analyzeVoiceExperience(experienceId: string) {
|
||||
data: { status: 'ANALYZING' },
|
||||
});
|
||||
|
||||
const analysis = buildPlaceAnalysis({
|
||||
const analysis = await buildPlaceAnalysis({
|
||||
placeName: experience.place.name,
|
||||
transcript: experience.transcript,
|
||||
});
|
||||
@@ -35,3 +56,98 @@ export async function analyzeVoiceExperience(experienceId: string) {
|
||||
|
||||
return { tags: analysis.tags };
|
||||
}
|
||||
|
||||
async function buildPlaceAnalysis(input: {
|
||||
placeName: string;
|
||||
transcript: string;
|
||||
}): Promise<PlaceAnalysis> {
|
||||
if (!config.openRouterApiKey) {
|
||||
throw new Error('OPENROUTER_API_KEY is required for voice analysis.');
|
||||
}
|
||||
|
||||
const ontology = placeOntology.map((axis) => ({
|
||||
axis: axis.id,
|
||||
options: axis.leaves.map((leaf) => ({
|
||||
tag: `${axis.id}:${leaf.id}`,
|
||||
keywords: leaf.keywords,
|
||||
})),
|
||||
}));
|
||||
const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
authorization: `Bearer ${config.openRouterApiKey}`,
|
||||
'content-type': 'application/json',
|
||||
'http-referer': 'https://map.craftee.vn',
|
||||
'x-title': 'MapFlow',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: config.openRouterModel,
|
||||
temperature: 0.1,
|
||||
max_tokens: 900,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content:
|
||||
'You classify voice reviews about places. Return only valid JSON. Use only allowed ontology tags.',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: JSON.stringify({
|
||||
task:
|
||||
'Analyze the transcript and select place-experience tags that are explicitly supported by the text. Do not invent facts.',
|
||||
outputShape: {
|
||||
placeName: 'string',
|
||||
tags: ['axis:leaf'],
|
||||
signals: [
|
||||
{
|
||||
axis: 'string',
|
||||
leaf: 'string',
|
||||
evidence: 'short quote or paraphrase from transcript',
|
||||
confidence: 'number from 0 to 1',
|
||||
},
|
||||
],
|
||||
summary: 'one short sentence',
|
||||
},
|
||||
placeName: input.placeName,
|
||||
ontology,
|
||||
transcript: input.transcript,
|
||||
}),
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`OpenRouter analysis failed with ${response.status}.`);
|
||||
}
|
||||
|
||||
const payload = (await response.json()) as OpenRouterResponse;
|
||||
const content = payload.choices?.[0]?.message?.content;
|
||||
if (!content) {
|
||||
throw new Error('OpenRouter returned an empty analysis.');
|
||||
}
|
||||
|
||||
const analysis = JSON.parse(content) as PlaceAnalysis;
|
||||
assertValidAnalysis(analysis);
|
||||
return analysis;
|
||||
}
|
||||
|
||||
function assertValidAnalysis(analysis: PlaceAnalysis) {
|
||||
const allowedTags = new Set(
|
||||
placeOntology.flatMap((axis) =>
|
||||
axis.leaves.map((leaf) => `${axis.id}:${leaf.id}`),
|
||||
),
|
||||
);
|
||||
|
||||
if (!Array.isArray(analysis.tags)) {
|
||||
throw new Error('OpenRouter analysis tags must be an array.');
|
||||
}
|
||||
for (const tag of analysis.tags) {
|
||||
if (!allowedTags.has(tag)) {
|
||||
throw new Error(`OpenRouter returned unsupported tag: ${tag}.`);
|
||||
}
|
||||
}
|
||||
if (!Array.isArray(analysis.signals)) {
|
||||
throw new Error('OpenRouter analysis signals must be an array.');
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
import { config } from '../config.js';
|
||||
import { prisma } from '../prisma.js';
|
||||
|
||||
type DeepgramResponse = {
|
||||
results?: {
|
||||
channels?: Array<{
|
||||
alternatives?: Array<{
|
||||
transcript?: string;
|
||||
paragraphs?: {
|
||||
transcript?: string;
|
||||
};
|
||||
}>;
|
||||
}>;
|
||||
};
|
||||
};
|
||||
|
||||
export async function transcribeVoiceExperience(experienceId: string) {
|
||||
const experience = await prisma.voiceExperience.findUnique({
|
||||
where: { id: experienceId },
|
||||
@@ -14,11 +28,13 @@ export async function transcribeVoiceExperience(experienceId: string) {
|
||||
data: { status: 'TRANSCRIBING' },
|
||||
});
|
||||
|
||||
const transcript = await transcribeAudioObject(experience.audioObjectKey);
|
||||
const transcript = await transcribeAudioObject(experience.id);
|
||||
|
||||
await prisma.voiceExperience.update({
|
||||
where: { id: experienceId },
|
||||
data: {
|
||||
audioContentBase64: null,
|
||||
audioAccessToken: null,
|
||||
transcript,
|
||||
status: 'TRANSCRIBED',
|
||||
},
|
||||
@@ -27,7 +43,68 @@ export async function transcribeVoiceExperience(experienceId: string) {
|
||||
return { transcript };
|
||||
}
|
||||
|
||||
async function transcribeAudioObject(audioObjectKey: string): Promise<string> {
|
||||
// TODO: replace this adapter with the production speech-to-text provider.
|
||||
return `Transcription placeholder for ${audioObjectKey}`;
|
||||
async function transcribeAudioObject(experienceId: string): Promise<string> {
|
||||
if (!config.deepgramApiKey) {
|
||||
throw new Error('DEEPGRAM_API_KEY is required for transcription.');
|
||||
}
|
||||
|
||||
const experience = await prisma.voiceExperience.findUnique({
|
||||
where: { id: experienceId },
|
||||
select: {
|
||||
id: true,
|
||||
audioObjectKey: true,
|
||||
audioAccessToken: true,
|
||||
audioContentBase64: true,
|
||||
audioMimeType: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (!experience) {
|
||||
throw new Error(`Voice experience ${experienceId} was not found.`);
|
||||
}
|
||||
if (!experience.audioAccessToken) {
|
||||
throw new Error(
|
||||
`Voice audio ${experience.audioObjectKey} has no access token.`,
|
||||
);
|
||||
}
|
||||
if (!experience.audioContentBase64 || !experience.audioMimeType) {
|
||||
throw new Error(
|
||||
`Voice audio ${experience.audioObjectKey} has no stored content.`,
|
||||
);
|
||||
}
|
||||
|
||||
const params = new URLSearchParams({
|
||||
model: config.deepgramModel,
|
||||
language: config.deepgramLanguage,
|
||||
smart_format: 'true',
|
||||
punctuate: 'true',
|
||||
});
|
||||
const audioUrl = `${config.publicApiUrl.replace(/\/$/, '')}/audio/voice-experiences/${experience.id}?token=${encodeURIComponent(experience.audioAccessToken)}`;
|
||||
const response = await fetch(
|
||||
`https://api.deepgram.com/v1/listen?${params.toString()}`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
authorization: `Token ${config.deepgramApiKey}`,
|
||||
'content-type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ url: audioUrl }),
|
||||
},
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Deepgram transcription failed with ${response.status}.`);
|
||||
}
|
||||
|
||||
const payload = (await response.json()) as DeepgramResponse;
|
||||
const alternative = payload.results?.channels?.[0]?.alternatives?.[0];
|
||||
const transcript =
|
||||
alternative?.paragraphs?.transcript?.trim() ??
|
||||
alternative?.transcript?.trim() ??
|
||||
'';
|
||||
if (!transcript) {
|
||||
throw new Error('Deepgram returned an empty transcript.');
|
||||
}
|
||||
|
||||
return transcript;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user