feat: implement voice memo transcription with local parakeet container and fallback timeouts
This commit is contained in:
@@ -6,6 +6,10 @@ OpenRouterAPIKey=
|
|||||||
# Valid examples: anthropic/claude-3.5-haiku, anthropic/claude-3-haiku, anthropic/claude-haiku-4.5
|
# Valid examples: anthropic/claude-3.5-haiku, anthropic/claude-3-haiku, anthropic/claude-haiku-4.5
|
||||||
# OpenRouterModel=anthropic/claude-3.5-haiku
|
# OpenRouterModel=anthropic/claude-3.5-haiku
|
||||||
|
|
||||||
|
# Speech-to-Text Transcription Service (local Parakeet container endpoint)
|
||||||
|
# Defaults to: http://localhost:5092/v1/audio/transcriptions (or http://parakeet:5092/v1/audio/transcriptions in Docker)
|
||||||
|
# PARAKEET_URL=http://localhost:5092/v1/audio/transcriptions
|
||||||
|
|
||||||
# DeepL API (for scripts/translate-locales.mjs and scripts/translate-flyer.mjs)
|
# DeepL API (for scripts/translate-locales.mjs and scripts/translate-flyer.mjs)
|
||||||
# Free plan keys use api-free.deepl.com automatically (suffix :fx)
|
# Free plan keys use api-free.deepl.com automatically (suffix :fx)
|
||||||
DeepLAPIKey=
|
DeepLAPIKey=
|
||||||
|
|||||||
@@ -1,24 +1,80 @@
|
|||||||
|
import { useState, useEffect } from 'react'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import { Mic, Loader2 } from 'lucide-react'
|
||||||
import type { LogEventPayload } from '../utils/logEntryPayload.js'
|
import type { LogEventPayload } from '../utils/logEntryPayload.js'
|
||||||
import { parseLiveVoiceRemark } from '../utils/liveEventCodes.js'
|
import { parseLiveVoiceRemark } from '../utils/liveEventCodes.js'
|
||||||
import { formatEventSummary } from '../utils/formatEventSummary.js'
|
import { formatEventSummary } from '../utils/formatEventSummary.js'
|
||||||
import VoiceMemoPlayer, { type PreloadedVoiceMemo } from './VoiceMemoPlayer.tsx'
|
import VoiceMemoPlayer, { type PreloadedVoiceMemo } from './VoiceMemoPlayer.tsx'
|
||||||
|
import { useDialog } from './ModalDialog.tsx'
|
||||||
|
import { updateVoiceMemoTranscript } from '../services/voiceAttachments.js'
|
||||||
|
|
||||||
interface EventRemarksCellProps {
|
interface EventRemarksCellProps {
|
||||||
event: LogEventPayload
|
event: LogEventPayload
|
||||||
logbookId: string
|
logbookId: string
|
||||||
voiceMemoLookup?: Map<string, PreloadedVoiceMemo>
|
voiceMemoLookup?: Map<string, PreloadedVoiceMemo>
|
||||||
|
readOnly?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function EventRemarksCell({
|
export default function EventRemarksCell({
|
||||||
event,
|
event,
|
||||||
logbookId,
|
logbookId,
|
||||||
voiceMemoLookup
|
voiceMemoLookup,
|
||||||
|
readOnly = false
|
||||||
}: EventRemarksCellProps) {
|
}: EventRemarksCellProps) {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
|
const { showAlert } = useDialog()
|
||||||
const voiceId = parseLiveVoiceRemark(event.remarks.trim())
|
const voiceId = parseLiveVoiceRemark(event.remarks.trim())
|
||||||
const preloaded = voiceId ? voiceMemoLookup?.get(voiceId) : undefined
|
const preloaded = voiceId ? voiceMemoLookup?.get(voiceId) : undefined
|
||||||
|
|
||||||
|
const [transcribing, setTranscribing] = useState(false)
|
||||||
|
const [isOnline, setIsOnline] = useState(navigator.onLine)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const handleOnline = () => setIsOnline(true)
|
||||||
|
const handleOffline = () => setIsOnline(false)
|
||||||
|
window.addEventListener('online', handleOnline)
|
||||||
|
window.addEventListener('offline', handleOffline)
|
||||||
|
return () => {
|
||||||
|
window.removeEventListener('online', handleOnline)
|
||||||
|
window.removeEventListener('offline', handleOffline)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const handleTranscribe = async (e: React.MouseEvent) => {
|
||||||
|
e.preventDefault()
|
||||||
|
e.stopPropagation()
|
||||||
|
if (transcribing || !preloaded?.audio || !voiceId) return
|
||||||
|
setTranscribing(true)
|
||||||
|
const controller = new AbortController()
|
||||||
|
const timeoutId = setTimeout(() => controller.abort(), 15000)
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/ai/transcribe', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ audioDataUrl: preloaded.audio }),
|
||||||
|
signal: controller.signal
|
||||||
|
})
|
||||||
|
clearTimeout(timeoutId)
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`Server returned status ${res.status}`)
|
||||||
|
}
|
||||||
|
const data = await res.json()
|
||||||
|
const text = (data.text || '').trim()
|
||||||
|
if (!text) {
|
||||||
|
throw new Error('Transcription returned empty text')
|
||||||
|
}
|
||||||
|
await updateVoiceMemoTranscript(logbookId, voiceId, text)
|
||||||
|
} catch (err) {
|
||||||
|
clearTimeout(timeoutId)
|
||||||
|
console.error('[EventRemarksCell] Transcription failed:', err)
|
||||||
|
void showAlert(t('logs.live_voice_transcribe_failed'), t('logs.live_voice_btn'))
|
||||||
|
} finally {
|
||||||
|
setTranscribing(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let summary = formatEventSummary(event, t)
|
let summary = formatEventSummary(event, t)
|
||||||
if (voiceId && preloaded?.caption) {
|
if (voiceId && preloaded?.caption) {
|
||||||
summary = t('logs.live_voice_entry', { caption: preloaded.caption })
|
summary = t('logs.live_voice_entry', { caption: preloaded.caption })
|
||||||
@@ -28,12 +84,39 @@ export default function EventRemarksCell({
|
|||||||
<div className={`event-remarks-cell${voiceId ? ' event-remarks-cell--voice' : ''}`}>
|
<div className={`event-remarks-cell${voiceId ? ' event-remarks-cell--voice' : ''}`}>
|
||||||
<span>{summary}</span>
|
<span>{summary}</span>
|
||||||
{voiceId && (
|
{voiceId && (
|
||||||
|
<div style={{ display: 'inline-flex', alignItems: 'center', flexWrap: 'wrap', gap: '8px', marginTop: '4px' }}>
|
||||||
<VoiceMemoPlayer
|
<VoiceMemoPlayer
|
||||||
audioId={voiceId}
|
audioId={voiceId}
|
||||||
logbookId={logbookId}
|
logbookId={logbookId}
|
||||||
preloaded={preloaded}
|
preloaded={preloaded}
|
||||||
compact
|
compact
|
||||||
/>
|
/>
|
||||||
|
{!readOnly && preloaded && preloaded.transcribed === false && isOnline && (
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
className="btn-icon-text link-sec"
|
||||||
|
style={{
|
||||||
|
fontSize: '0.8rem',
|
||||||
|
padding: '2px 6px',
|
||||||
|
height: 'auto',
|
||||||
|
display: 'inline-flex',
|
||||||
|
alignItems: 'center',
|
||||||
|
gap: '4px',
|
||||||
|
margin: 0
|
||||||
|
}}
|
||||||
|
onClick={handleTranscribe}
|
||||||
|
disabled={transcribing}
|
||||||
|
title={t('logs.live_voice_transcribe_action')}
|
||||||
|
>
|
||||||
|
{transcribing ? (
|
||||||
|
<Loader2 size={12} className="spin" />
|
||||||
|
) : (
|
||||||
|
<Mic size={12} />
|
||||||
|
)}
|
||||||
|
{transcribing ? t('logs.live_voice_transcribing') : t('logs.live_voice_transcribe_action')}
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -31,7 +31,6 @@ import {
|
|||||||
removeLastEvent
|
removeLastEvent
|
||||||
} from '../services/quickEventLog.js'
|
} from '../services/quickEventLog.js'
|
||||||
import CreatorAvatar from './CreatorAvatar.tsx'
|
import CreatorAvatar from './CreatorAvatar.tsx'
|
||||||
import { formatEventSummary } from '../utils/formatEventSummary.js'
|
|
||||||
import {
|
import {
|
||||||
getLastAutoPositionMs,
|
getLastAutoPositionMs,
|
||||||
getLastLoggedPositionWithin,
|
getLastLoggedPositionWithin,
|
||||||
@@ -43,7 +42,6 @@ import {
|
|||||||
liveFuelRemark,
|
liveFuelRemark,
|
||||||
livePhotoRemark,
|
livePhotoRemark,
|
||||||
liveVoiceRemark,
|
liveVoiceRemark,
|
||||||
parseLiveVoiceRemark,
|
|
||||||
livePrecipRemark,
|
livePrecipRemark,
|
||||||
liveSailsRemark,
|
liveSailsRemark,
|
||||||
liveSogRemark,
|
liveSogRemark,
|
||||||
@@ -80,7 +78,7 @@ import CourseDialInput from './CourseDialInput.tsx'
|
|||||||
import GpsSignalHint from './GpsSignalHint.tsx'
|
import GpsSignalHint from './GpsSignalHint.tsx'
|
||||||
import LiveCameraCapture from './LiveCameraCapture.tsx'
|
import LiveCameraCapture from './LiveCameraCapture.tsx'
|
||||||
import LiveVoiceCapture from './LiveVoiceCapture.tsx'
|
import LiveVoiceCapture from './LiveVoiceCapture.tsx'
|
||||||
import VoiceMemoPlayer from './VoiceMemoPlayer.tsx'
|
import EventRemarksCell from './EventRemarksCell.tsx'
|
||||||
import { saveEntryPhoto, deleteEntryPhoto } from '../services/photoAttachments.js'
|
import { saveEntryPhoto, deleteEntryPhoto } from '../services/photoAttachments.js'
|
||||||
import { saveEntryVoiceMemo, deleteEntryVoiceMemo } from '../services/voiceAttachments.js'
|
import { saveEntryVoiceMemo, deleteEntryVoiceMemo } from '../services/voiceAttachments.js'
|
||||||
import { blobToCompressedJpegDataUrl } from '../utils/imageCompress.js'
|
import { blobToCompressedJpegDataUrl } from '../utils/imageCompress.js'
|
||||||
@@ -836,13 +834,46 @@ export default function LiveLogView({
|
|||||||
void (async () => {
|
void (async () => {
|
||||||
try {
|
try {
|
||||||
const audioDataUrl = await blobToAudioDataUrl(blob)
|
const audioDataUrl = await blobToAudioDataUrl(blob)
|
||||||
|
|
||||||
|
let transcriptionText = ''
|
||||||
|
let transcribed = true
|
||||||
|
let transcriptionError = false
|
||||||
|
|
||||||
|
try {
|
||||||
|
const controller = new AbortController()
|
||||||
|
const timeoutId = setTimeout(() => controller.abort(), 4000)
|
||||||
|
|
||||||
|
const res = await fetch('/api/ai/transcribe', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ audioDataUrl }),
|
||||||
|
signal: controller.signal
|
||||||
|
})
|
||||||
|
clearTimeout(timeoutId)
|
||||||
|
if (!res.ok) throw new Error(`Status ${res.status}`)
|
||||||
|
const data = await res.json()
|
||||||
|
transcriptionText = (data.text || '').trim()
|
||||||
|
} catch (err) {
|
||||||
|
console.warn('[LiveLogView] Automatic transcription failed or timed out:', err)
|
||||||
|
transcriptionError = true
|
||||||
|
transcribed = false
|
||||||
|
}
|
||||||
|
|
||||||
|
let finalCaption = caption
|
||||||
|
if (transcriptionText) {
|
||||||
|
finalCaption = caption
|
||||||
|
? `${caption}\n(Transkript: ${transcriptionText})`
|
||||||
|
: transcriptionText
|
||||||
|
}
|
||||||
|
|
||||||
const voiceId = await saveEntryVoiceMemo({
|
const voiceId = await saveEntryVoiceMemo({
|
||||||
logbookId,
|
logbookId,
|
||||||
entryId,
|
entryId,
|
||||||
audioDataUrl,
|
audioDataUrl,
|
||||||
mimeType,
|
mimeType,
|
||||||
durationSec,
|
durationSec,
|
||||||
caption,
|
caption: finalCaption,
|
||||||
|
transcribed,
|
||||||
analyticsContext: 'live_log'
|
analyticsContext: 'live_log'
|
||||||
})
|
})
|
||||||
await appendQuickEvent(logbookId, entryId, {
|
await appendQuickEvent(logbookId, entryId, {
|
||||||
@@ -854,6 +885,10 @@ export default function LiveLogView({
|
|||||||
setVoiceCaption('')
|
setVoiceCaption('')
|
||||||
showUndo('voice')
|
showUndo('voice')
|
||||||
trackPlausibleEvent(PlausibleEvents.LIVE_LOG_EVENT_LOGGED, { action: 'voice' })
|
trackPlausibleEvent(PlausibleEvents.LIVE_LOG_EVENT_LOGGED, { action: 'voice' })
|
||||||
|
|
||||||
|
if (transcriptionError) {
|
||||||
|
void showAlert(t('logs.live_voice_transcribe_failed'), t('logs.live_voice_btn'))
|
||||||
|
}
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
console.error('Live log voice save failed:', err)
|
console.error('Live log voice save failed:', err)
|
||||||
const msg = err instanceof Error && err.message === 'VOICE_MEMO_TOO_LARGE'
|
const msg = err instanceof Error && err.message === 'VOICE_MEMO_TOO_LARGE'
|
||||||
@@ -1225,12 +1260,6 @@ export default function LiveLogView({
|
|||||||
) : (
|
) : (
|
||||||
<ol className="live-log-stream">
|
<ol className="live-log-stream">
|
||||||
{events.map((event, index) => {
|
{events.map((event, index) => {
|
||||||
const voiceId = parseLiveVoiceRemark(event.remarks.trim())
|
|
||||||
const voicePreloaded = voiceId ? voiceMemoLookup.get(voiceId) : undefined
|
|
||||||
let summary = formatEventSummary(event, t)
|
|
||||||
if (voiceId && voicePreloaded?.caption) {
|
|
||||||
summary = t('logs.live_voice_entry', { caption: voicePreloaded.caption })
|
|
||||||
}
|
|
||||||
return (
|
return (
|
||||||
<li key={`${event.time}-${index}`} className="live-log-entry">
|
<li key={`${event.time}-${index}`} className="live-log-entry">
|
||||||
<time className="live-log-time">{event.time}</time>
|
<time className="live-log-time">{event.time}</time>
|
||||||
@@ -1240,15 +1269,12 @@ export default function LiveLogView({
|
|||||||
size={24}
|
size={24}
|
||||||
/>
|
/>
|
||||||
<div className="live-log-summary-block">
|
<div className="live-log-summary-block">
|
||||||
<span className="live-log-summary">{summary}</span>
|
<EventRemarksCell
|
||||||
{voiceId && (
|
event={event}
|
||||||
<VoiceMemoPlayer
|
|
||||||
audioId={voiceId}
|
|
||||||
logbookId={logbookId}
|
logbookId={logbookId}
|
||||||
preloaded={voicePreloaded}
|
voiceMemoLookup={voiceMemoLookup}
|
||||||
compact
|
readOnly={false}
|
||||||
/>
|
/>
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
</li>
|
</li>
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1909,6 +1909,7 @@ export default function LogEntryEditor({
|
|||||||
event={ev}
|
event={ev}
|
||||||
logbookId={logbookId}
|
logbookId={logbookId}
|
||||||
voiceMemoLookup={voiceMemoLookup}
|
voiceMemoLookup={voiceMemoLookup}
|
||||||
|
readOnly={readOnly}
|
||||||
/>
|
/>
|
||||||
</td>
|
</td>
|
||||||
{!readOnly && (
|
{!readOnly && (
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ export interface PreloadedVoiceMemo {
|
|||||||
mimeType?: string
|
mimeType?: string
|
||||||
durationSec?: number
|
durationSec?: number
|
||||||
caption?: string
|
caption?: string
|
||||||
|
transcribed?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
interface VoiceMemoPlayerProps {
|
interface VoiceMemoPlayerProps {
|
||||||
|
|||||||
@@ -48,7 +48,8 @@ export function useEntryVoiceMemos(
|
|||||||
audio: String(decrypted.audio),
|
audio: String(decrypted.audio),
|
||||||
mimeType: decrypted.mimeType ? String(decrypted.mimeType) : undefined,
|
mimeType: decrypted.mimeType ? String(decrypted.mimeType) : undefined,
|
||||||
durationSec: typeof decrypted.durationSec === 'number' ? decrypted.durationSec : undefined,
|
durationSec: typeof decrypted.durationSec === 'number' ? decrypted.durationSec : undefined,
|
||||||
caption: decrypted.caption ? String(decrypted.caption) : ''
|
caption: decrypted.caption ? String(decrypted.caption) : '',
|
||||||
|
transcribed: decrypted.transcribed !== false
|
||||||
})
|
})
|
||||||
} catch {
|
} catch {
|
||||||
// skip corrupt memo
|
// skip corrupt memo
|
||||||
|
|||||||
@@ -297,6 +297,9 @@
|
|||||||
"live_voice_entry_plain": "Stemmenotat",
|
"live_voice_entry_plain": "Stemmenotat",
|
||||||
"live_voice_caption_label": "Billedtekst (valgfrit)",
|
"live_voice_caption_label": "Billedtekst (valgfrit)",
|
||||||
"live_voice_caption_placeholder": "f.eks. radiokontakt med havnemester",
|
"live_voice_caption_placeholder": "f.eks. radiokontakt med havnemester",
|
||||||
|
"live_voice_transcribe_action": "Transkribere",
|
||||||
|
"live_voice_transcribing": "Transkriberer…",
|
||||||
|
"live_voice_transcribe_failed": "Stemmebesked gemt, men transkribering mislykkedes.",
|
||||||
"live_undo_voice_hint": "Stemmenotat gemt",
|
"live_undo_voice_hint": "Stemmenotat gemt",
|
||||||
"live_comment_btn": "Kommentar",
|
"live_comment_btn": "Kommentar",
|
||||||
"live_comment_placeholder": "Indtast tekst…",
|
"live_comment_placeholder": "Indtast tekst…",
|
||||||
|
|||||||
@@ -297,6 +297,9 @@
|
|||||||
"live_voice_entry_plain": "Sprachnotiz",
|
"live_voice_entry_plain": "Sprachnotiz",
|
||||||
"live_voice_caption_label": "Beschriftung (optional)",
|
"live_voice_caption_label": "Beschriftung (optional)",
|
||||||
"live_voice_caption_placeholder": "z. B. Funkverkehr mit Hafenmeister",
|
"live_voice_caption_placeholder": "z. B. Funkverkehr mit Hafenmeister",
|
||||||
|
"live_voice_transcribe_action": "Transkribieren",
|
||||||
|
"live_voice_transcribing": "Transkribiere...",
|
||||||
|
"live_voice_transcribe_failed": "Sprachmemo gespeichert, aber Transkription fehlgeschlagen.",
|
||||||
"live_undo_voice_hint": "Sprachnotiz gespeichert",
|
"live_undo_voice_hint": "Sprachnotiz gespeichert",
|
||||||
"live_comment_btn": "Kommentar",
|
"live_comment_btn": "Kommentar",
|
||||||
"live_comment_placeholder": "Freitext eingeben…",
|
"live_comment_placeholder": "Freitext eingeben…",
|
||||||
|
|||||||
@@ -297,6 +297,9 @@
|
|||||||
"live_voice_entry_plain": "Voice memo",
|
"live_voice_entry_plain": "Voice memo",
|
||||||
"live_voice_caption_label": "Caption (optional)",
|
"live_voice_caption_label": "Caption (optional)",
|
||||||
"live_voice_caption_placeholder": "e.g. radio call with harbour master",
|
"live_voice_caption_placeholder": "e.g. radio call with harbour master",
|
||||||
|
"live_voice_transcribe_action": "Transcribe",
|
||||||
|
"live_voice_transcribing": "Transcribing…",
|
||||||
|
"live_voice_transcribe_failed": "Voice memo saved, but transcription failed.",
|
||||||
"live_undo_voice_hint": "Voice memo saved",
|
"live_undo_voice_hint": "Voice memo saved",
|
||||||
"live_comment_btn": "Comment",
|
"live_comment_btn": "Comment",
|
||||||
"live_comment_placeholder": "Enter text…",
|
"live_comment_placeholder": "Enter text…",
|
||||||
|
|||||||
@@ -297,6 +297,9 @@
|
|||||||
"live_voice_entry_plain": "Talemelding",
|
"live_voice_entry_plain": "Talemelding",
|
||||||
"live_voice_caption_label": "Bildetekst (valgfritt)",
|
"live_voice_caption_label": "Bildetekst (valgfritt)",
|
||||||
"live_voice_caption_placeholder": "f.eks. radiokontakt med havnesjef",
|
"live_voice_caption_placeholder": "f.eks. radiokontakt med havnesjef",
|
||||||
|
"live_voice_transcribe_action": "Transkribere",
|
||||||
|
"live_voice_transcribing": "Transkriberer…",
|
||||||
|
"live_voice_transcribe_failed": "Taleopptak lagret, men transkribering mislyktes.",
|
||||||
"live_undo_voice_hint": "Talemelding lagret",
|
"live_undo_voice_hint": "Talemelding lagret",
|
||||||
"live_comment_btn": "Kommentar",
|
"live_comment_btn": "Kommentar",
|
||||||
"live_comment_placeholder": "Skriv inn tekst…",
|
"live_comment_placeholder": "Skriv inn tekst…",
|
||||||
|
|||||||
@@ -297,6 +297,9 @@
|
|||||||
"live_voice_entry_plain": "Röstanteckning",
|
"live_voice_entry_plain": "Röstanteckning",
|
||||||
"live_voice_caption_label": "Bildtext (valfritt)",
|
"live_voice_caption_label": "Bildtext (valfritt)",
|
||||||
"live_voice_caption_placeholder": "t.ex. radiokontakt med hamnmästare",
|
"live_voice_caption_placeholder": "t.ex. radiokontakt med hamnmästare",
|
||||||
|
"live_voice_transcribe_action": "Transkribera",
|
||||||
|
"live_voice_transcribing": "Transkriberar…",
|
||||||
|
"live_voice_transcribe_failed": "Röstanteckning sparad, men transkribering misslyckades.",
|
||||||
"live_undo_voice_hint": "Röstanteckning sparad",
|
"live_undo_voice_hint": "Röstanteckning sparad",
|
||||||
"live_comment_btn": "Kommentar",
|
"live_comment_btn": "Kommentar",
|
||||||
"live_comment_placeholder": "Ange text…",
|
"live_comment_placeholder": "Ange text…",
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import { db } from './db.js'
|
import { db } from './db.js'
|
||||||
import { getActiveMasterKey } from './auth.js'
|
import { getActiveMasterKey } from './auth.js'
|
||||||
import { getLogbookKey } from './logbookKeys.js'
|
import { getLogbookKey } from './logbookKeys.js'
|
||||||
import { encryptJson } from './crypto.js'
|
import { encryptJson, decryptJson } from './crypto.js'
|
||||||
import { syncLogbook } from './sync.js'
|
import { syncLogbook } from './sync.js'
|
||||||
import { PlausibleEvents, trackPlausibleEvent } from './analytics.js'
|
import { PlausibleEvents, trackPlausibleEvent } from './analytics.js'
|
||||||
|
|
||||||
@@ -18,6 +18,7 @@ export async function saveEntryVoiceMemo(options: {
|
|||||||
mimeType: string
|
mimeType: string
|
||||||
durationSec: number
|
durationSec: number
|
||||||
caption?: string
|
caption?: string
|
||||||
|
transcribed?: boolean
|
||||||
analyticsContext?: string
|
analyticsContext?: string
|
||||||
}): Promise<string> {
|
}): Promise<string> {
|
||||||
const {
|
const {
|
||||||
@@ -27,6 +28,7 @@ export async function saveEntryVoiceMemo(options: {
|
|||||||
mimeType,
|
mimeType,
|
||||||
durationSec,
|
durationSec,
|
||||||
caption = '',
|
caption = '',
|
||||||
|
transcribed = true,
|
||||||
analyticsContext = 'logbook'
|
analyticsContext = 'logbook'
|
||||||
} = options
|
} = options
|
||||||
const masterKey = await getEncryptionKey(logbookId)
|
const masterKey = await getEncryptionKey(logbookId)
|
||||||
@@ -35,7 +37,8 @@ export async function saveEntryVoiceMemo(options: {
|
|||||||
audio: audioDataUrl,
|
audio: audioDataUrl,
|
||||||
mimeType,
|
mimeType,
|
||||||
durationSec,
|
durationSec,
|
||||||
caption: caption.trim()
|
caption: caption.trim(),
|
||||||
|
transcribed: !!transcribed
|
||||||
}
|
}
|
||||||
|
|
||||||
const encrypted = await encryptJson(voicePayload, masterKey)
|
const encrypted = await encryptJson(voicePayload, masterKey)
|
||||||
@@ -98,3 +101,55 @@ export async function removeLastVoiceMemoForEntry(
|
|||||||
await deleteEntryVoiceMemo(logbookId, lastId)
|
await deleteEntryVoiceMemo(logbookId, lastId)
|
||||||
return lastId
|
return lastId
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Updates an existing voice memo payload with a new transcript and sets transcribed: true. */
|
||||||
|
export async function updateVoiceMemoTranscript(
|
||||||
|
logbookId: string,
|
||||||
|
voiceId: string,
|
||||||
|
transcript: string
|
||||||
|
): Promise<void> {
|
||||||
|
const masterKey = await getEncryptionKey(logbookId)
|
||||||
|
const record = await db.voiceMemos.get(voiceId)
|
||||||
|
if (!record) throw new Error('Voice memo not found')
|
||||||
|
|
||||||
|
const decrypted = await decryptJson(record.encryptedData, record.iv, record.tag, masterKey)
|
||||||
|
if (!decrypted) throw new Error('Failed to decrypt voice memo')
|
||||||
|
|
||||||
|
const manualCaption = decrypted.caption ? String(decrypted.caption).trim() : ''
|
||||||
|
const finalCaption = manualCaption
|
||||||
|
? `${manualCaption}\n(Transkript: ${transcript.trim()})`
|
||||||
|
: transcript.trim()
|
||||||
|
|
||||||
|
const updatedPayload = {
|
||||||
|
...decrypted,
|
||||||
|
caption: finalCaption,
|
||||||
|
transcribed: true
|
||||||
|
}
|
||||||
|
|
||||||
|
const encrypted = await encryptJson(updatedPayload, masterKey)
|
||||||
|
const now = new Date().toISOString()
|
||||||
|
|
||||||
|
await db.voiceMemos.put({
|
||||||
|
...record,
|
||||||
|
encryptedData: encrypted.ciphertext,
|
||||||
|
iv: encrypted.iv,
|
||||||
|
tag: encrypted.tag,
|
||||||
|
updatedAt: now
|
||||||
|
})
|
||||||
|
|
||||||
|
await db.syncQueue.put({
|
||||||
|
action: 'update',
|
||||||
|
type: 'voiceMemo',
|
||||||
|
payloadId: voiceId,
|
||||||
|
logbookId,
|
||||||
|
data: JSON.stringify({
|
||||||
|
encryptedData: encrypted.ciphertext,
|
||||||
|
iv: encrypted.iv,
|
||||||
|
tag: encrypted.tag,
|
||||||
|
entryId: record.entryId
|
||||||
|
}),
|
||||||
|
updatedAt: now
|
||||||
|
})
|
||||||
|
|
||||||
|
syncLogbook(logbookId).catch((err) => console.warn('Background sync failed:', err))
|
||||||
|
}
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ services:
|
|||||||
OpenWeatherMapAPIKey: ${OpenWeatherMapAPIKey:-}
|
OpenWeatherMapAPIKey: ${OpenWeatherMapAPIKey:-}
|
||||||
OpenRouterAPIKey: ${OpenRouterAPIKey:-}
|
OpenRouterAPIKey: ${OpenRouterAPIKey:-}
|
||||||
OpenRouterModel: ${OpenRouterModel:-anthropic/claude-3.5-haiku}
|
OpenRouterModel: ${OpenRouterModel:-anthropic/claude-3.5-haiku}
|
||||||
|
PARAKEET_URL: ${PARAKEET_URL:-http://parakeet:5092/v1/audio/transcriptions}
|
||||||
SESSION_SECRET: ${SESSION_SECRET:-}
|
SESSION_SECRET: ${SESSION_SECRET:-}
|
||||||
ADMIN_USER_IDS: ${ADMIN_USER_IDS:-}
|
ADMIN_USER_IDS: ${ADMIN_USER_IDS:-}
|
||||||
NTFY_SERVER: ${NTFY_SERVER:-https://ntfy.sh}
|
NTFY_SERVER: ${NTFY_SERVER:-https://ntfy.sh}
|
||||||
@@ -66,6 +67,13 @@ services:
|
|||||||
backend:
|
backend:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
||||||
|
parakeet:
|
||||||
|
image: ghcr.io/achetronic/parakeet:latest
|
||||||
|
container_name: daagbox-staging-parakeet
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- "5092:5092"
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
pgdata:
|
pgdata:
|
||||||
name: daagbox-staging-pgdata
|
name: daagbox-staging-pgdata
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ services:
|
|||||||
OpenWeatherMapAPIKey: ${OpenWeatherMapAPIKey:-}
|
OpenWeatherMapAPIKey: ${OpenWeatherMapAPIKey:-}
|
||||||
OpenRouterAPIKey: ${OpenRouterAPIKey:-}
|
OpenRouterAPIKey: ${OpenRouterAPIKey:-}
|
||||||
OpenRouterModel: ${OpenRouterModel:-anthropic/claude-3.5-haiku}
|
OpenRouterModel: ${OpenRouterModel:-anthropic/claude-3.5-haiku}
|
||||||
|
PARAKEET_URL: ${PARAKEET_URL:-http://parakeet:5092/v1/audio/transcriptions}
|
||||||
SESSION_SECRET: ${SESSION_SECRET:-}
|
SESSION_SECRET: ${SESSION_SECRET:-}
|
||||||
ADMIN_USER_IDS: ${ADMIN_USER_IDS:-}
|
ADMIN_USER_IDS: ${ADMIN_USER_IDS:-}
|
||||||
NTFY_SERVER: ${NTFY_SERVER:-https://ntfy.sh}
|
NTFY_SERVER: ${NTFY_SERVER:-https://ntfy.sh}
|
||||||
@@ -67,6 +68,13 @@ services:
|
|||||||
backend:
|
backend:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
||||||
|
parakeet:
|
||||||
|
image: ghcr.io/achetronic/parakeet:latest
|
||||||
|
container_name: daagbox-prod-parakeet
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- "5092:5092"
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
pgdata:
|
pgdata:
|
||||||
name: daagbox-prod-pgdata
|
name: daagbox-prod-pgdata
|
||||||
|
|||||||
@@ -59,4 +59,12 @@ describe('API smoke', () => {
|
|||||||
expect(res.status).toBe(401)
|
expect(res.status).toBe(401)
|
||||||
expect(res.body.error).toMatch(/Unauthorized/i)
|
expect(res.body.error).toMatch(/Unauthorized/i)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('POST /api/ai/transcribe requires session', async () => {
|
||||||
|
const res = await request(app)
|
||||||
|
.post('/api/ai/transcribe')
|
||||||
|
.send({ audioDataUrl: 'data:audio/webm;base64,abcdef' })
|
||||||
|
expect(res.status).toBe(401)
|
||||||
|
expect(res.body.error).toMatch(/Unauthorized/i)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import { requireUser } from '../middleware/auth.js'
|
|||||||
|
|
||||||
const router = Router()
|
const router = Router()
|
||||||
|
|
||||||
|
const PARAKEET_URL = process.env.PARAKEET_URL || 'http://localhost:5092/v1/audio/transcriptions'
|
||||||
const MAX_ATTEMPTS_PER_ENTRY = 3
|
const MAX_ATTEMPTS_PER_ENTRY = 3
|
||||||
const DEFAULT_MODEL = 'anthropic/claude-3.5-haiku'
|
const DEFAULT_MODEL = 'anthropic/claude-3.5-haiku'
|
||||||
const OPENROUTER_URL = 'https://openrouter.ai/api/v1/chat/completions'
|
const OPENROUTER_URL = 'https://openrouter.ai/api/v1/chat/completions'
|
||||||
@@ -230,4 +231,68 @@ router.post('/summary', async (req: any, res) => {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
router.post('/transcribe', async (req: any, res) => {
|
||||||
|
try {
|
||||||
|
const { audioDataUrl } = req.body ?? {}
|
||||||
|
if (!audioDataUrl || typeof audioDataUrl !== 'string') {
|
||||||
|
return res.status(400).json({ error: 'audioDataUrl is required' })
|
||||||
|
}
|
||||||
|
|
||||||
|
const match = audioDataUrl.match(/^data:([^;]+);base64,(.+)$/)
|
||||||
|
if (!match) {
|
||||||
|
return res.status(400).json({ error: 'Invalid audio data URL format' })
|
||||||
|
}
|
||||||
|
|
||||||
|
const [, mimeType, base64Data] = match
|
||||||
|
const buffer = Buffer.from(base64Data, 'base64')
|
||||||
|
|
||||||
|
let ext = 'webm'
|
||||||
|
if (mimeType.includes('mp4')) ext = 'mp4'
|
||||||
|
else if (mimeType.includes('ogg')) ext = 'ogg'
|
||||||
|
else if (mimeType.includes('wav')) ext = 'wav'
|
||||||
|
|
||||||
|
const filename = `audio.${ext}`
|
||||||
|
const file = new File([buffer], filename, { type: mimeType })
|
||||||
|
|
||||||
|
const formData = new FormData()
|
||||||
|
formData.append('file', file)
|
||||||
|
|
||||||
|
console.log(`[server] Forwarding ASR request to ${PARAKEET_URL} (${filename}, ${buffer.length} bytes)`)
|
||||||
|
|
||||||
|
const controller = new AbortController()
|
||||||
|
const timeoutId = setTimeout(() => controller.abort(), 15000)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const parakeetRes = await fetch(PARAKEET_URL, {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData,
|
||||||
|
signal: controller.signal
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!parakeetRes.ok) {
|
||||||
|
const errorText = await parakeetRes.text().catch(() => '')
|
||||||
|
console.error(`[server] Parakeet ASR error response (status=${parakeetRes.status}):`, errorText)
|
||||||
|
throw new Error(`Parakeet returned status ${parakeetRes.status}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
const data: any = await parakeetRes.json()
|
||||||
|
const text = (data?.text || '').trim()
|
||||||
|
|
||||||
|
console.log(`[server] ASR completed successfully: "${text}"`)
|
||||||
|
return res.json({ text })
|
||||||
|
} catch (error: unknown) {
|
||||||
|
if (error instanceof Error && error.name === 'AbortError') {
|
||||||
|
console.error('[server] Parakeet ASR request timed out')
|
||||||
|
return res.status(504).json({ error: 'Transcription request timed out' })
|
||||||
|
}
|
||||||
|
throw error
|
||||||
|
} finally {
|
||||||
|
clearTimeout(timeoutId)
|
||||||
|
}
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('ASR transcription failed:', error)
|
||||||
|
return res.status(503).json({ error: 'Transcription service unavailable' })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
export default router
|
export default router
|
||||||
|
|||||||
Reference in New Issue
Block a user