feat: implement voice memo transcription with local parakeet container and fallback timeouts
This commit is contained in:
@@ -1,24 +1,80 @@
|
||||
import { useState, useEffect } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { Mic, Loader2 } from 'lucide-react'
|
||||
import type { LogEventPayload } from '../utils/logEntryPayload.js'
|
||||
import { parseLiveVoiceRemark } from '../utils/liveEventCodes.js'
|
||||
import { formatEventSummary } from '../utils/formatEventSummary.js'
|
||||
import VoiceMemoPlayer, { type PreloadedVoiceMemo } from './VoiceMemoPlayer.tsx'
|
||||
import { useDialog } from './ModalDialog.tsx'
|
||||
import { updateVoiceMemoTranscript } from '../services/voiceAttachments.js'
|
||||
|
||||
interface EventRemarksCellProps {
|
||||
event: LogEventPayload
|
||||
logbookId: string
|
||||
voiceMemoLookup?: Map<string, PreloadedVoiceMemo>
|
||||
readOnly?: boolean
|
||||
}
|
||||
|
||||
export default function EventRemarksCell({
|
||||
event,
|
||||
logbookId,
|
||||
voiceMemoLookup
|
||||
voiceMemoLookup,
|
||||
readOnly = false
|
||||
}: EventRemarksCellProps) {
|
||||
const { t } = useTranslation()
|
||||
const { showAlert } = useDialog()
|
||||
const voiceId = parseLiveVoiceRemark(event.remarks.trim())
|
||||
const preloaded = voiceId ? voiceMemoLookup?.get(voiceId) : undefined
|
||||
|
||||
const [transcribing, setTranscribing] = useState(false)
|
||||
const [isOnline, setIsOnline] = useState(navigator.onLine)
|
||||
|
||||
useEffect(() => {
|
||||
const handleOnline = () => setIsOnline(true)
|
||||
const handleOffline = () => setIsOnline(false)
|
||||
window.addEventListener('online', handleOnline)
|
||||
window.addEventListener('offline', handleOffline)
|
||||
return () => {
|
||||
window.removeEventListener('online', handleOnline)
|
||||
window.removeEventListener('offline', handleOffline)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const handleTranscribe = async (e: React.MouseEvent) => {
|
||||
e.preventDefault()
|
||||
e.stopPropagation()
|
||||
if (transcribing || !preloaded?.audio || !voiceId) return
|
||||
setTranscribing(true)
|
||||
const controller = new AbortController()
|
||||
const timeoutId = setTimeout(() => controller.abort(), 15000)
|
||||
try {
|
||||
const res = await fetch('/api/ai/transcribe', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({ audioDataUrl: preloaded.audio }),
|
||||
signal: controller.signal
|
||||
})
|
||||
clearTimeout(timeoutId)
|
||||
if (!res.ok) {
|
||||
throw new Error(`Server returned status ${res.status}`)
|
||||
}
|
||||
const data = await res.json()
|
||||
const text = (data.text || '').trim()
|
||||
if (!text) {
|
||||
throw new Error('Transcription returned empty text')
|
||||
}
|
||||
await updateVoiceMemoTranscript(logbookId, voiceId, text)
|
||||
} catch (err) {
|
||||
clearTimeout(timeoutId)
|
||||
console.error('[EventRemarksCell] Transcription failed:', err)
|
||||
void showAlert(t('logs.live_voice_transcribe_failed'), t('logs.live_voice_btn'))
|
||||
} finally {
|
||||
setTranscribing(false)
|
||||
}
|
||||
}
|
||||
|
||||
let summary = formatEventSummary(event, t)
|
||||
if (voiceId && preloaded?.caption) {
|
||||
summary = t('logs.live_voice_entry', { caption: preloaded.caption })
|
||||
@@ -28,12 +84,39 @@ export default function EventRemarksCell({
|
||||
<div className={`event-remarks-cell${voiceId ? ' event-remarks-cell--voice' : ''}`}>
|
||||
<span>{summary}</span>
|
||||
{voiceId && (
|
||||
<VoiceMemoPlayer
|
||||
audioId={voiceId}
|
||||
logbookId={logbookId}
|
||||
preloaded={preloaded}
|
||||
compact
|
||||
/>
|
||||
<div style={{ display: 'inline-flex', alignItems: 'center', flexWrap: 'wrap', gap: '8px', marginTop: '4px' }}>
|
||||
<VoiceMemoPlayer
|
||||
audioId={voiceId}
|
||||
logbookId={logbookId}
|
||||
preloaded={preloaded}
|
||||
compact
|
||||
/>
|
||||
{!readOnly && preloaded && preloaded.transcribed === false && isOnline && (
|
||||
<button
|
||||
type="button"
|
||||
className="btn-icon-text link-sec"
|
||||
style={{
|
||||
fontSize: '0.8rem',
|
||||
padding: '2px 6px',
|
||||
height: 'auto',
|
||||
display: 'inline-flex',
|
||||
alignItems: 'center',
|
||||
gap: '4px',
|
||||
margin: 0
|
||||
}}
|
||||
onClick={handleTranscribe}
|
||||
disabled={transcribing}
|
||||
title={t('logs.live_voice_transcribe_action')}
|
||||
>
|
||||
{transcribing ? (
|
||||
<Loader2 size={12} className="spin" />
|
||||
) : (
|
||||
<Mic size={12} />
|
||||
)}
|
||||
{transcribing ? t('logs.live_voice_transcribing') : t('logs.live_voice_transcribe_action')}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
|
||||
@@ -31,7 +31,6 @@ import {
|
||||
removeLastEvent
|
||||
} from '../services/quickEventLog.js'
|
||||
import CreatorAvatar from './CreatorAvatar.tsx'
|
||||
import { formatEventSummary } from '../utils/formatEventSummary.js'
|
||||
import {
|
||||
getLastAutoPositionMs,
|
||||
getLastLoggedPositionWithin,
|
||||
@@ -43,7 +42,6 @@ import {
|
||||
liveFuelRemark,
|
||||
livePhotoRemark,
|
||||
liveVoiceRemark,
|
||||
parseLiveVoiceRemark,
|
||||
livePrecipRemark,
|
||||
liveSailsRemark,
|
||||
liveSogRemark,
|
||||
@@ -80,7 +78,7 @@ import CourseDialInput from './CourseDialInput.tsx'
|
||||
import GpsSignalHint from './GpsSignalHint.tsx'
|
||||
import LiveCameraCapture from './LiveCameraCapture.tsx'
|
||||
import LiveVoiceCapture from './LiveVoiceCapture.tsx'
|
||||
import VoiceMemoPlayer from './VoiceMemoPlayer.tsx'
|
||||
import EventRemarksCell from './EventRemarksCell.tsx'
|
||||
import { saveEntryPhoto, deleteEntryPhoto } from '../services/photoAttachments.js'
|
||||
import { saveEntryVoiceMemo, deleteEntryVoiceMemo } from '../services/voiceAttachments.js'
|
||||
import { blobToCompressedJpegDataUrl } from '../utils/imageCompress.js'
|
||||
@@ -836,13 +834,46 @@ export default function LiveLogView({
|
||||
void (async () => {
|
||||
try {
|
||||
const audioDataUrl = await blobToAudioDataUrl(blob)
|
||||
|
||||
let transcriptionText = ''
|
||||
let transcribed = true
|
||||
let transcriptionError = false
|
||||
|
||||
try {
|
||||
const controller = new AbortController()
|
||||
const timeoutId = setTimeout(() => controller.abort(), 4000)
|
||||
|
||||
const res = await fetch('/api/ai/transcribe', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ audioDataUrl }),
|
||||
signal: controller.signal
|
||||
})
|
||||
clearTimeout(timeoutId)
|
||||
if (!res.ok) throw new Error(`Status ${res.status}`)
|
||||
const data = await res.json()
|
||||
transcriptionText = (data.text || '').trim()
|
||||
} catch (err) {
|
||||
console.warn('[LiveLogView] Automatic transcription failed or timed out:', err)
|
||||
transcriptionError = true
|
||||
transcribed = false
|
||||
}
|
||||
|
||||
let finalCaption = caption
|
||||
if (transcriptionText) {
|
||||
finalCaption = caption
|
||||
? `${caption}\n(Transkript: ${transcriptionText})`
|
||||
: transcriptionText
|
||||
}
|
||||
|
||||
const voiceId = await saveEntryVoiceMemo({
|
||||
logbookId,
|
||||
entryId,
|
||||
audioDataUrl,
|
||||
mimeType,
|
||||
durationSec,
|
||||
caption,
|
||||
caption: finalCaption,
|
||||
transcribed,
|
||||
analyticsContext: 'live_log'
|
||||
})
|
||||
await appendQuickEvent(logbookId, entryId, {
|
||||
@@ -854,6 +885,10 @@ export default function LiveLogView({
|
||||
setVoiceCaption('')
|
||||
showUndo('voice')
|
||||
trackPlausibleEvent(PlausibleEvents.LIVE_LOG_EVENT_LOGGED, { action: 'voice' })
|
||||
|
||||
if (transcriptionError) {
|
||||
void showAlert(t('logs.live_voice_transcribe_failed'), t('logs.live_voice_btn'))
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
console.error('Live log voice save failed:', err)
|
||||
const msg = err instanceof Error && err.message === 'VOICE_MEMO_TOO_LARGE'
|
||||
@@ -1225,12 +1260,6 @@ export default function LiveLogView({
|
||||
) : (
|
||||
<ol className="live-log-stream">
|
||||
{events.map((event, index) => {
|
||||
const voiceId = parseLiveVoiceRemark(event.remarks.trim())
|
||||
const voicePreloaded = voiceId ? voiceMemoLookup.get(voiceId) : undefined
|
||||
let summary = formatEventSummary(event, t)
|
||||
if (voiceId && voicePreloaded?.caption) {
|
||||
summary = t('logs.live_voice_entry', { caption: voicePreloaded.caption })
|
||||
}
|
||||
return (
|
||||
<li key={`${event.time}-${index}`} className="live-log-entry">
|
||||
<time className="live-log-time">{event.time}</time>
|
||||
@@ -1240,15 +1269,12 @@ export default function LiveLogView({
|
||||
size={24}
|
||||
/>
|
||||
<div className="live-log-summary-block">
|
||||
<span className="live-log-summary">{summary}</span>
|
||||
{voiceId && (
|
||||
<VoiceMemoPlayer
|
||||
audioId={voiceId}
|
||||
logbookId={logbookId}
|
||||
preloaded={voicePreloaded}
|
||||
compact
|
||||
/>
|
||||
)}
|
||||
<EventRemarksCell
|
||||
event={event}
|
||||
logbookId={logbookId}
|
||||
voiceMemoLookup={voiceMemoLookup}
|
||||
readOnly={false}
|
||||
/>
|
||||
</div>
|
||||
</li>
|
||||
)
|
||||
|
||||
@@ -1909,6 +1909,7 @@ export default function LogEntryEditor({
|
||||
event={ev}
|
||||
logbookId={logbookId}
|
||||
voiceMemoLookup={voiceMemoLookup}
|
||||
readOnly={readOnly}
|
||||
/>
|
||||
</td>
|
||||
{!readOnly && (
|
||||
|
||||
@@ -11,6 +11,7 @@ export interface PreloadedVoiceMemo {
|
||||
mimeType?: string
|
||||
durationSec?: number
|
||||
caption?: string
|
||||
transcribed?: boolean
|
||||
}
|
||||
|
||||
interface VoiceMemoPlayerProps {
|
||||
|
||||
Reference in New Issue
Block a user