Add Deepgram and OpenRouter voice processing
Some checks failed
Build and deploy Worker / build (push) Failing after 6m46s
Some checks failed
Build and deploy Worker / build (push) Failing after 6m46s
This commit is contained in:
@@ -7,7 +7,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: build-host
|
runs-on: builder
|
||||||
env:
|
env:
|
||||||
SERVICE_NAME: worker
|
SERVICE_NAME: worker
|
||||||
IMAGE_SHA: gitea.dsrptlab.com/mapflow/worker:${{ github.sha }}
|
IMAGE_SHA: gitea.dsrptlab.com/mapflow/worker:${{ github.sha }}
|
||||||
@@ -25,12 +25,6 @@ jobs:
|
|||||||
- name: Build and push image
|
- name: Build and push image
|
||||||
run: |
|
run: |
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
builder="builder"
|
|
||||||
if ! docker buildx inspect "$builder" >/dev/null 2>&1; then
|
|
||||||
docker buildx create --name "$builder" --driver docker-container --buildkitd-config /etc/buildkit/buildkitd.toml
|
|
||||||
fi
|
|
||||||
docker buildx use "$builder"
|
|
||||||
docker buildx inspect --bootstrap
|
|
||||||
docker buildx build --push --tag "$IMAGE_SHA" --tag "$IMAGE_LATEST" .
|
docker buildx build --push --tag "$IMAGE_SHA" --tag "$IMAGE_LATEST" .
|
||||||
|
|
||||||
- name: Skip stale deployment
|
- name: Skip stale deployment
|
||||||
@@ -58,8 +52,3 @@ jobs:
|
|||||||
-d "$payload")"
|
-d "$payload")"
|
||||||
cat "$response_file"
|
cat "$response_file"
|
||||||
[ "$status_code" = "200" ]
|
[ "$status_code" = "200" ]
|
||||||
|
|
||||||
- name: Prune shared BuildKit cache
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
docker buildx prune --builder builder --all --max-used-space 40gb -f
|
|
||||||
|
|||||||
@@ -17,14 +17,16 @@ enum VoiceExperienceStatus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
model Place {
|
model Place {
|
||||||
id String @id @default(cuid())
|
id String @id @default(cuid())
|
||||||
googlePlaceId String @unique
|
googlePlaceId String @unique
|
||||||
name String
|
name String
|
||||||
latitude Float
|
latitude Float
|
||||||
longitude Float
|
longitude Float
|
||||||
experiences VoiceExperience[]
|
googlePrimaryType String?
|
||||||
createdAt DateTime @default(now())
|
googleTypes String[] @default([])
|
||||||
updatedAt DateTime @updatedAt
|
experiences VoiceExperience[]
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
}
|
}
|
||||||
|
|
||||||
model User {
|
model User {
|
||||||
@@ -35,22 +37,26 @@ model User {
|
|||||||
lastName String?
|
lastName String?
|
||||||
photoUrl String?
|
photoUrl String?
|
||||||
languageCode String?
|
languageCode String?
|
||||||
|
isAdmin Boolean @default(false)
|
||||||
voiceExperiences VoiceExperience[]
|
voiceExperiences VoiceExperience[]
|
||||||
createdAt DateTime @default(now())
|
createdAt DateTime @default(now())
|
||||||
updatedAt DateTime @updatedAt
|
updatedAt DateTime @updatedAt
|
||||||
}
|
}
|
||||||
|
|
||||||
model VoiceExperience {
|
model VoiceExperience {
|
||||||
id String @id @default(cuid())
|
id String @id @default(cuid())
|
||||||
placeId String
|
placeId String
|
||||||
place Place @relation(fields: [placeId], references: [id])
|
place Place @relation(fields: [placeId], references: [id])
|
||||||
userId String?
|
userId String?
|
||||||
user User? @relation(fields: [userId], references: [id])
|
user User? @relation(fields: [userId], references: [id])
|
||||||
durationSeconds Int
|
durationSeconds Int
|
||||||
audioObjectKey String
|
audioObjectKey String
|
||||||
status VoiceExperienceStatus @default(UPLOADED)
|
audioContentBase64 String?
|
||||||
transcript String?
|
audioMimeType String?
|
||||||
analysis Json?
|
audioAccessToken String? @unique
|
||||||
createdAt DateTime @default(now())
|
status VoiceExperienceStatus @default(UPLOADED)
|
||||||
updatedAt DateTime @updatedAt
|
transcript String?
|
||||||
|
analysis Json?
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,4 +8,11 @@ export const config = {
|
|||||||
databaseUrl: process.env.DATABASE_URL ?? '',
|
databaseUrl: process.env.DATABASE_URL ?? '',
|
||||||
workerName: process.env.HATCHET_WORKER_NAME ?? 'mapflow-hatchet-worker',
|
workerName: process.env.HATCHET_WORKER_NAME ?? 'mapflow-hatchet-worker',
|
||||||
workerSlots: Number.parseInt(process.env.HATCHET_WORKER_SLOTS ?? '4', 10),
|
workerSlots: Number.parseInt(process.env.HATCHET_WORKER_SLOTS ?? '4', 10),
|
||||||
|
publicApiUrl: process.env.PUBLIC_API_URL ?? 'https://api.map.craftee.vn',
|
||||||
|
deepgramApiKey: process.env.DEEPGRAM_API_KEY ?? '',
|
||||||
|
deepgramModel: process.env.DEEPGRAM_MODEL ?? 'nova-3',
|
||||||
|
deepgramLanguage: process.env.DEEPGRAM_LANGUAGE ?? 'ru',
|
||||||
|
openRouterApiKey: process.env.OPENROUTER_API_KEY ?? '',
|
||||||
|
openRouterModel:
|
||||||
|
process.env.OPENROUTER_MODEL ?? 'minimax/minimax-m2.5:free',
|
||||||
};
|
};
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -126,6 +126,8 @@ exports.Prisma.PlaceScalarFieldEnum = {
|
|||||||
name: 'name',
|
name: 'name',
|
||||||
latitude: 'latitude',
|
latitude: 'latitude',
|
||||||
longitude: 'longitude',
|
longitude: 'longitude',
|
||||||
|
googlePrimaryType: 'googlePrimaryType',
|
||||||
|
googleTypes: 'googleTypes',
|
||||||
createdAt: 'createdAt',
|
createdAt: 'createdAt',
|
||||||
updatedAt: 'updatedAt'
|
updatedAt: 'updatedAt'
|
||||||
};
|
};
|
||||||
@@ -138,6 +140,7 @@ exports.Prisma.UserScalarFieldEnum = {
|
|||||||
lastName: 'lastName',
|
lastName: 'lastName',
|
||||||
photoUrl: 'photoUrl',
|
photoUrl: 'photoUrl',
|
||||||
languageCode: 'languageCode',
|
languageCode: 'languageCode',
|
||||||
|
isAdmin: 'isAdmin',
|
||||||
createdAt: 'createdAt',
|
createdAt: 'createdAt',
|
||||||
updatedAt: 'updatedAt'
|
updatedAt: 'updatedAt'
|
||||||
};
|
};
|
||||||
@@ -148,6 +151,9 @@ exports.Prisma.VoiceExperienceScalarFieldEnum = {
|
|||||||
userId: 'userId',
|
userId: 'userId',
|
||||||
durationSeconds: 'durationSeconds',
|
durationSeconds: 'durationSeconds',
|
||||||
audioObjectKey: 'audioObjectKey',
|
audioObjectKey: 'audioObjectKey',
|
||||||
|
audioContentBase64: 'audioContentBase64',
|
||||||
|
audioMimeType: 'audioMimeType',
|
||||||
|
audioAccessToken: 'audioAccessToken',
|
||||||
status: 'status',
|
status: 'status',
|
||||||
transcript: 'transcript',
|
transcript: 'transcript',
|
||||||
analysis: 'analysis',
|
analysis: 'analysis',
|
||||||
|
|||||||
481
src/generated/prisma/index.d.ts
vendored
481
src/generated/prisma/index.d.ts
vendored
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"name": "prisma-client-359590a92aedb32557346cd24ba7dd77e7dd4a806bf36f6641715b1b22de87e3",
|
"name": "prisma-client-404088dc715856cb4ed2e7f2d4b8eb35c87427ce32c0e67bc04d6ff67961fc2b",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"types": "index.d.ts",
|
"types": "index.d.ts",
|
||||||
"browser": "default.js",
|
"browser": "default.js",
|
||||||
|
|||||||
@@ -17,14 +17,16 @@ enum VoiceExperienceStatus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
model Place {
|
model Place {
|
||||||
id String @id @default(cuid())
|
id String @id @default(cuid())
|
||||||
googlePlaceId String @unique
|
googlePlaceId String @unique
|
||||||
name String
|
name String
|
||||||
latitude Float
|
latitude Float
|
||||||
longitude Float
|
longitude Float
|
||||||
experiences VoiceExperience[]
|
googlePrimaryType String?
|
||||||
createdAt DateTime @default(now())
|
googleTypes String[] @default([])
|
||||||
updatedAt DateTime @updatedAt
|
experiences VoiceExperience[]
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
}
|
}
|
||||||
|
|
||||||
model User {
|
model User {
|
||||||
@@ -35,22 +37,26 @@ model User {
|
|||||||
lastName String?
|
lastName String?
|
||||||
photoUrl String?
|
photoUrl String?
|
||||||
languageCode String?
|
languageCode String?
|
||||||
|
isAdmin Boolean @default(false)
|
||||||
voiceExperiences VoiceExperience[]
|
voiceExperiences VoiceExperience[]
|
||||||
createdAt DateTime @default(now())
|
createdAt DateTime @default(now())
|
||||||
updatedAt DateTime @updatedAt
|
updatedAt DateTime @updatedAt
|
||||||
}
|
}
|
||||||
|
|
||||||
model VoiceExperience {
|
model VoiceExperience {
|
||||||
id String @id @default(cuid())
|
id String @id @default(cuid())
|
||||||
placeId String
|
placeId String
|
||||||
place Place @relation(fields: [placeId], references: [id])
|
place Place @relation(fields: [placeId], references: [id])
|
||||||
userId String?
|
userId String?
|
||||||
user User? @relation(fields: [userId], references: [id])
|
user User? @relation(fields: [userId], references: [id])
|
||||||
durationSeconds Int
|
durationSeconds Int
|
||||||
audioObjectKey String
|
audioObjectKey String
|
||||||
status VoiceExperienceStatus @default(UPLOADED)
|
audioContentBase64 String?
|
||||||
transcript String?
|
audioMimeType String?
|
||||||
analysis Json?
|
audioAccessToken String? @unique
|
||||||
createdAt DateTime @default(now())
|
status VoiceExperienceStatus @default(UPLOADED)
|
||||||
updatedAt DateTime @updatedAt
|
transcript String?
|
||||||
|
analysis Json?
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,27 @@
|
|||||||
import { buildPlaceAnalysis } from '../ontology/place-ontology.js';
|
import { config } from '../config.js';
|
||||||
|
import { placeOntology } from '../ontology/place-ontology.js';
|
||||||
import { prisma } from '../prisma.js';
|
import { prisma } from '../prisma.js';
|
||||||
|
|
||||||
|
type OpenRouterResponse = {
|
||||||
|
choices?: Array<{
|
||||||
|
message?: {
|
||||||
|
content?: string;
|
||||||
|
};
|
||||||
|
}>;
|
||||||
|
};
|
||||||
|
|
||||||
|
type PlaceAnalysis = {
|
||||||
|
placeName: string;
|
||||||
|
tags: string[];
|
||||||
|
signals: Array<{
|
||||||
|
axis: string;
|
||||||
|
leaf: string;
|
||||||
|
evidence: string;
|
||||||
|
confidence: number;
|
||||||
|
}>;
|
||||||
|
summary: string;
|
||||||
|
};
|
||||||
|
|
||||||
export async function analyzeVoiceExperience(experienceId: string) {
|
export async function analyzeVoiceExperience(experienceId: string) {
|
||||||
const experience = await prisma.voiceExperience.findUnique({
|
const experience = await prisma.voiceExperience.findUnique({
|
||||||
where: { id: experienceId },
|
where: { id: experienceId },
|
||||||
@@ -20,7 +41,7 @@ export async function analyzeVoiceExperience(experienceId: string) {
|
|||||||
data: { status: 'ANALYZING' },
|
data: { status: 'ANALYZING' },
|
||||||
});
|
});
|
||||||
|
|
||||||
const analysis = buildPlaceAnalysis({
|
const analysis = await buildPlaceAnalysis({
|
||||||
placeName: experience.place.name,
|
placeName: experience.place.name,
|
||||||
transcript: experience.transcript,
|
transcript: experience.transcript,
|
||||||
});
|
});
|
||||||
@@ -35,3 +56,98 @@ export async function analyzeVoiceExperience(experienceId: string) {
|
|||||||
|
|
||||||
return { tags: analysis.tags };
|
return { tags: analysis.tags };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function buildPlaceAnalysis(input: {
|
||||||
|
placeName: string;
|
||||||
|
transcript: string;
|
||||||
|
}): Promise<PlaceAnalysis> {
|
||||||
|
if (!config.openRouterApiKey) {
|
||||||
|
throw new Error('OPENROUTER_API_KEY is required for voice analysis.');
|
||||||
|
}
|
||||||
|
|
||||||
|
const ontology = placeOntology.map((axis) => ({
|
||||||
|
axis: axis.id,
|
||||||
|
options: axis.leaves.map((leaf) => ({
|
||||||
|
tag: `${axis.id}:${leaf.id}`,
|
||||||
|
keywords: leaf.keywords,
|
||||||
|
})),
|
||||||
|
}));
|
||||||
|
const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
authorization: `Bearer ${config.openRouterApiKey}`,
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'http-referer': 'https://map.craftee.vn',
|
||||||
|
'x-title': 'MapFlow',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: config.openRouterModel,
|
||||||
|
temperature: 0.1,
|
||||||
|
max_tokens: 900,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content:
|
||||||
|
'You classify voice reviews about places. Return only valid JSON. Use only allowed ontology tags.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: JSON.stringify({
|
||||||
|
task:
|
||||||
|
'Analyze the transcript and select place-experience tags that are explicitly supported by the text. Do not invent facts.',
|
||||||
|
outputShape: {
|
||||||
|
placeName: 'string',
|
||||||
|
tags: ['axis:leaf'],
|
||||||
|
signals: [
|
||||||
|
{
|
||||||
|
axis: 'string',
|
||||||
|
leaf: 'string',
|
||||||
|
evidence: 'short quote or paraphrase from transcript',
|
||||||
|
confidence: 'number from 0 to 1',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
summary: 'one short sentence',
|
||||||
|
},
|
||||||
|
placeName: input.placeName,
|
||||||
|
ontology,
|
||||||
|
transcript: input.transcript,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`OpenRouter analysis failed with ${response.status}.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const payload = (await response.json()) as OpenRouterResponse;
|
||||||
|
const content = payload.choices?.[0]?.message?.content;
|
||||||
|
if (!content) {
|
||||||
|
throw new Error('OpenRouter returned an empty analysis.');
|
||||||
|
}
|
||||||
|
|
||||||
|
const analysis = JSON.parse(content) as PlaceAnalysis;
|
||||||
|
assertValidAnalysis(analysis);
|
||||||
|
return analysis;
|
||||||
|
}
|
||||||
|
|
||||||
|
function assertValidAnalysis(analysis: PlaceAnalysis) {
|
||||||
|
const allowedTags = new Set(
|
||||||
|
placeOntology.flatMap((axis) =>
|
||||||
|
axis.leaves.map((leaf) => `${axis.id}:${leaf.id}`),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!Array.isArray(analysis.tags)) {
|
||||||
|
throw new Error('OpenRouter analysis tags must be an array.');
|
||||||
|
}
|
||||||
|
for (const tag of analysis.tags) {
|
||||||
|
if (!allowedTags.has(tag)) {
|
||||||
|
throw new Error(`OpenRouter returned unsupported tag: ${tag}.`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!Array.isArray(analysis.signals)) {
|
||||||
|
throw new Error('OpenRouter analysis signals must be an array.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,5 +1,19 @@
|
|||||||
|
import { config } from '../config.js';
|
||||||
import { prisma } from '../prisma.js';
|
import { prisma } from '../prisma.js';
|
||||||
|
|
||||||
|
type DeepgramResponse = {
|
||||||
|
results?: {
|
||||||
|
channels?: Array<{
|
||||||
|
alternatives?: Array<{
|
||||||
|
transcript?: string;
|
||||||
|
paragraphs?: {
|
||||||
|
transcript?: string;
|
||||||
|
};
|
||||||
|
}>;
|
||||||
|
}>;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
export async function transcribeVoiceExperience(experienceId: string) {
|
export async function transcribeVoiceExperience(experienceId: string) {
|
||||||
const experience = await prisma.voiceExperience.findUnique({
|
const experience = await prisma.voiceExperience.findUnique({
|
||||||
where: { id: experienceId },
|
where: { id: experienceId },
|
||||||
@@ -14,11 +28,13 @@ export async function transcribeVoiceExperience(experienceId: string) {
|
|||||||
data: { status: 'TRANSCRIBING' },
|
data: { status: 'TRANSCRIBING' },
|
||||||
});
|
});
|
||||||
|
|
||||||
const transcript = await transcribeAudioObject(experience.audioObjectKey);
|
const transcript = await transcribeAudioObject(experience.id);
|
||||||
|
|
||||||
await prisma.voiceExperience.update({
|
await prisma.voiceExperience.update({
|
||||||
where: { id: experienceId },
|
where: { id: experienceId },
|
||||||
data: {
|
data: {
|
||||||
|
audioContentBase64: null,
|
||||||
|
audioAccessToken: null,
|
||||||
transcript,
|
transcript,
|
||||||
status: 'TRANSCRIBED',
|
status: 'TRANSCRIBED',
|
||||||
},
|
},
|
||||||
@@ -27,7 +43,68 @@ export async function transcribeVoiceExperience(experienceId: string) {
|
|||||||
return { transcript };
|
return { transcript };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function transcribeAudioObject(audioObjectKey: string): Promise<string> {
|
async function transcribeAudioObject(experienceId: string): Promise<string> {
|
||||||
// TODO: replace this adapter with the production speech-to-text provider.
|
if (!config.deepgramApiKey) {
|
||||||
return `Transcription placeholder for ${audioObjectKey}`;
|
throw new Error('DEEPGRAM_API_KEY is required for transcription.');
|
||||||
|
}
|
||||||
|
|
||||||
|
const experience = await prisma.voiceExperience.findUnique({
|
||||||
|
where: { id: experienceId },
|
||||||
|
select: {
|
||||||
|
id: true,
|
||||||
|
audioObjectKey: true,
|
||||||
|
audioAccessToken: true,
|
||||||
|
audioContentBase64: true,
|
||||||
|
audioMimeType: true,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!experience) {
|
||||||
|
throw new Error(`Voice experience ${experienceId} was not found.`);
|
||||||
|
}
|
||||||
|
if (!experience.audioAccessToken) {
|
||||||
|
throw new Error(
|
||||||
|
`Voice audio ${experience.audioObjectKey} has no access token.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (!experience.audioContentBase64 || !experience.audioMimeType) {
|
||||||
|
throw new Error(
|
||||||
|
`Voice audio ${experience.audioObjectKey} has no stored content.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const params = new URLSearchParams({
|
||||||
|
model: config.deepgramModel,
|
||||||
|
language: config.deepgramLanguage,
|
||||||
|
smart_format: 'true',
|
||||||
|
punctuate: 'true',
|
||||||
|
});
|
||||||
|
const audioUrl = `${config.publicApiUrl.replace(/\/$/, '')}/audio/voice-experiences/${experience.id}?token=${encodeURIComponent(experience.audioAccessToken)}`;
|
||||||
|
const response = await fetch(
|
||||||
|
`https://api.deepgram.com/v1/listen?${params.toString()}`,
|
||||||
|
{
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
authorization: `Token ${config.deepgramApiKey}`,
|
||||||
|
'content-type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ url: audioUrl }),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Deepgram transcription failed with ${response.status}.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const payload = (await response.json()) as DeepgramResponse;
|
||||||
|
const alternative = payload.results?.channels?.[0]?.alternatives?.[0];
|
||||||
|
const transcript =
|
||||||
|
alternative?.paragraphs?.transcript?.trim() ??
|
||||||
|
alternative?.transcript?.trim() ??
|
||||||
|
'';
|
||||||
|
if (!transcript) {
|
||||||
|
throw new Error('Deepgram returned an empty transcript.');
|
||||||
|
}
|
||||||
|
|
||||||
|
return transcript;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user