File transcription: save to text file, chunking, ffmpeg, logging
- Transcribe dropped files to text file via Save dialog (no paste) - Default save folder: source folder of audio file - Chunk long audio (45s) to avoid ORT errors with Parakeet - Prefer ffmpeg for WAV/MP3 conversion (better compatibility) - Add transcription-debug.log for debugging - Support 24/32-bit WAV, ffmpeg fallback for exotic formats Made-with: Cursor
This commit is contained in:
@@ -10,19 +10,16 @@ const TARGET_CHANNELS: u16 = 1;
|
||||
/// Bereitet eine Audio-Datei für die Transkription vor.
|
||||
/// Gibt den Pfad zu einer temporären WAV-Datei zurück (16 kHz, Mono, 16-bit).
|
||||
/// Der Aufrufer sollte die Temp-Datei nach der Transkription löschen.
|
||||
pub fn prepare_for_transcription(path: &Path) -> Result<std::path::PathBuf, String> {
|
||||
pub fn prepare_for_transcription(
|
||||
path: &Path,
|
||||
debug_logging: bool,
|
||||
) -> Result<std::path::PathBuf, String> {
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.map(|s| s.to_lowercase())
|
||||
.unwrap_or_default();
|
||||
|
||||
let samples = match ext.as_str() {
|
||||
"wav" => decode_wav(path)?,
|
||||
"mp3" => decode_mp3(path)?,
|
||||
_ => return Err(format!("Unsupported format: .{} (use .wav or .mp3)", ext)),
|
||||
};
|
||||
|
||||
let temp_path = std::env::temp_dir().join(format!(
|
||||
"hotkeet-transcribe-{}.wav",
|
||||
std::time::SystemTime::now()
|
||||
@@ -31,21 +28,176 @@ pub fn prepare_for_transcription(path: &Path) -> Result<std::path::PathBuf, Stri
|
||||
.unwrap_or(0)
|
||||
));
|
||||
|
||||
write_wav(&temp_path, &samples)?;
|
||||
match ext.as_str() {
|
||||
"wav" | "mp3" => {
|
||||
// ffmpeg erzeugt WAV, das transcribe-rs zuverlässig akzeptiert (vermeidet ORT-Fehler)
|
||||
if let Ok(()) = convert_with_ffmpeg(path, &temp_path, debug_logging) {
|
||||
if debug_logging {
|
||||
let size = std::fs::metadata(&temp_path).map(|m| m.len()).unwrap_or(0);
|
||||
crate::transcription_log::log(
|
||||
&format!("ffmpeg OK: {} -> {} ({} bytes)", path.display(), temp_path.display(), size),
|
||||
true,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
if debug_logging {
|
||||
crate::transcription_log::log("ffmpeg failed, fallback to native decode", true);
|
||||
}
|
||||
// Fallback: native Dekodierung
|
||||
let samples = match ext.as_str() {
|
||||
"wav" => decode_wav(path)?,
|
||||
"mp3" => decode_mp3(path)?,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
if samples.is_empty() {
|
||||
return Err("Audio file is empty or could not be decoded. Install ffmpeg for better compatibility.".to_string());
|
||||
}
|
||||
if debug_logging {
|
||||
crate::transcription_log::log(
|
||||
&format!("native decode OK: {} samples -> {}", samples.len(), temp_path.display()),
|
||||
true,
|
||||
);
|
||||
}
|
||||
write_wav(&temp_path, &samples)?;
|
||||
}
|
||||
}
|
||||
_ => return Err(format!("Unsupported format: .{} (use .wav or .mp3)", ext)),
|
||||
}
|
||||
|
||||
Ok(temp_path)
|
||||
}
|
||||
|
||||
/// Maximale Chunk-Dauer in Sekunden (Parakeet/ORT hat Probleme mit langen Dateien).
|
||||
pub const MAX_CHUNK_SEC: u32 = 45;
|
||||
|
||||
/// Teilt eine WAV-Datei in Chunks für die Transkription.
|
||||
/// Gibt Pfade zu temporären Chunk-WAVs zurück. Aufrufer muss diese löschen.
|
||||
pub fn split_wav_into_chunks(
|
||||
wav_path: &Path,
|
||||
max_sec: u32,
|
||||
debug_logging: bool,
|
||||
) -> Result<Vec<std::path::PathBuf>, String> {
|
||||
let reader = hound::WavReader::open(wav_path).map_err(|e| format!("WAV read: {}", e))?;
|
||||
let spec = reader.spec();
|
||||
if spec.sample_rate != TARGET_SAMPLE_RATE || spec.channels != TARGET_CHANNELS {
|
||||
return Err(format!(
|
||||
"WAV must be {} Hz mono, got {} Hz {} ch",
|
||||
TARGET_SAMPLE_RATE, spec.sample_rate, spec.channels
|
||||
));
|
||||
}
|
||||
|
||||
let samples: Vec<i16> = match spec.sample_format {
|
||||
hound::SampleFormat::Int => reader
|
||||
.into_samples::<i32>()
|
||||
.filter_map(|s| s.ok())
|
||||
.map(|s| s.clamp(-32768, 32767) as i16)
|
||||
.collect(),
|
||||
hound::SampleFormat::Float => reader
|
||||
.into_samples::<f32>()
|
||||
.filter_map(|s| s.ok())
|
||||
.map(|f| (f.clamp(-1.0, 1.0) * 32767.0) as i16)
|
||||
.collect(),
|
||||
};
|
||||
|
||||
let samples_per_chunk = (max_sec as usize) * (TARGET_SAMPLE_RATE as usize);
|
||||
if samples.len() <= samples_per_chunk {
|
||||
return Ok(vec![wav_path.to_path_buf()]);
|
||||
}
|
||||
|
||||
let mut paths = Vec::new();
|
||||
let base = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos())
|
||||
.unwrap_or(0);
|
||||
|
||||
for (i, chunk) in samples.chunks(samples_per_chunk).enumerate() {
|
||||
let path = std::env::temp_dir().join(format!("hotkeet-chunk-{}-{}.wav", base, i));
|
||||
write_wav(&path, chunk)?;
|
||||
paths.push(path);
|
||||
}
|
||||
|
||||
if debug_logging {
|
||||
crate::transcription_log::log(
|
||||
&format!("split into {} chunks ({} sec each)", paths.len(), max_sec),
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(paths)
|
||||
}
|
||||
|
||||
/// Konvertiert Audio mit ffmpeg zu 16 kHz Mono WAV.
|
||||
fn convert_with_ffmpeg(input: &Path, output: &Path, debug_logging: bool) -> Result<(), String> {
|
||||
let output = output.to_path_buf();
|
||||
let input = input.to_path_buf();
|
||||
|
||||
let mut cmd = std::process::Command::new("ffmpeg");
|
||||
cmd.arg("-y")
|
||||
.arg("-i")
|
||||
.arg(&input)
|
||||
.arg("-acodec")
|
||||
.arg("pcm_s16le")
|
||||
.arg("-ar")
|
||||
.arg(TARGET_SAMPLE_RATE.to_string())
|
||||
.arg("-ac")
|
||||
.arg(TARGET_CHANNELS.to_string())
|
||||
.arg("-f")
|
||||
.arg("wav")
|
||||
.arg(&output);
|
||||
|
||||
#[cfg(windows)]
|
||||
{
|
||||
use std::os::windows::process::CommandExt;
|
||||
const CREATE_NO_WINDOW: u32 = 0x0800_0000;
|
||||
cmd.creation_flags(CREATE_NO_WINDOW);
|
||||
}
|
||||
|
||||
if debug_logging {
|
||||
crate::transcription_log::log(
|
||||
&format!("ffmpeg -i {} -> {}", input.display(), output.display()),
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
let output_result = cmd.output().map_err(|e| format!("ffmpeg: {}", e))?;
|
||||
|
||||
if !output_result.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output_result.stderr);
|
||||
if debug_logging {
|
||||
crate::transcription_log::log(&format!("ffmpeg stderr: {}", stderr), true);
|
||||
}
|
||||
return Err(format!("ffmpeg failed: {}", stderr.lines().last().unwrap_or("")));
|
||||
}
|
||||
|
||||
if !output.exists() {
|
||||
return Err("ffmpeg did not create output file".to_string());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn decode_wav(path: &Path) -> Result<Vec<i16>, String> {
|
||||
let reader = hound::WavReader::open(path).map_err(|e| format!("WAV read: {}", e))?;
|
||||
let spec = reader.spec();
|
||||
let sample_rate = spec.sample_rate;
|
||||
let channels = spec.channels;
|
||||
|
||||
// i32 für 16/24/32-bit Int, f32 für Float
|
||||
let samples: Vec<i16> = match spec.sample_format {
|
||||
hound::SampleFormat::Int => reader
|
||||
.into_samples::<i16>()
|
||||
.filter_map(|s| s.ok())
|
||||
.collect(),
|
||||
hound::SampleFormat::Int => {
|
||||
let raw: Vec<i32> = reader
|
||||
.into_samples::<i32>()
|
||||
.filter_map(|s| s.ok())
|
||||
.collect();
|
||||
// Skalieren auf i16 (24/32-bit haben größeren Wertebereich)
|
||||
let max_val = 1 << (spec.bits_per_sample.saturating_sub(1));
|
||||
raw.into_iter()
|
||||
.map(|s| {
|
||||
let scaled = (s as f64 / max_val as f64).clamp(-1.0, 1.0) * 32767.0;
|
||||
scaled as i16
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
hound::SampleFormat::Float => reader
|
||||
.into_samples::<f32>()
|
||||
.filter_map(|s| s.ok())
|
||||
|
||||
@@ -60,6 +60,7 @@ fn show_main_window() {
|
||||
mod audio;
|
||||
mod config;
|
||||
mod hotkey;
|
||||
mod transcription_log;
|
||||
mod paste;
|
||||
mod recording;
|
||||
mod transcription;
|
||||
@@ -164,9 +165,8 @@ impl eframe::App for AppState {
|
||||
let cfg = self.config.read().unwrap().clone();
|
||||
let status = self.status.clone();
|
||||
let status_detail = self.status_detail.clone();
|
||||
let paste_tx = self.paste_tx.clone();
|
||||
std::thread::spawn(move || {
|
||||
run_file_transcription(path, cfg, status, status_detail, paste_tx);
|
||||
run_file_transcription(path, cfg, status, status_detail);
|
||||
});
|
||||
break; // Nur eine Datei pro Drop verarbeiten
|
||||
}
|
||||
@@ -443,7 +443,7 @@ fn main() -> eframe::Result<()> {
|
||||
};
|
||||
|
||||
let mut viewport = egui::ViewportBuilder::default()
|
||||
.with_inner_size([660.0, 520.0])
|
||||
.with_inner_size([680.0, 520.0])
|
||||
.with_min_inner_size([500.0, 420.0])
|
||||
.with_drag_and_drop(true);
|
||||
if start_minimized {
|
||||
@@ -484,47 +484,125 @@ fn run_file_transcription(
|
||||
config: DictateConfig,
|
||||
status: Arc<std::sync::RwLock<AppStatus>>,
|
||||
status_detail: Arc<std::sync::RwLock<String>>,
|
||||
paste_tx: Sender<PasteRequest>,
|
||||
) {
|
||||
let debug = config.debug_logging;
|
||||
if debug {
|
||||
transcription_log::log(&format!("file transcription start: {}", path.display()), true);
|
||||
}
|
||||
|
||||
set_status(&status, &status_detail, AppStatus::Transkribieren, "Converting…");
|
||||
|
||||
let wav_path = match audio::prepare_for_transcription(&path) {
|
||||
let wav_path = match audio::prepare_for_transcription(&path, debug) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
eprintln!("Audio prepare: {}", e);
|
||||
transcription_log::log(&format!("audio prepare error: {}", e), debug);
|
||||
set_status(&status, &status_detail, AppStatus::Fehler, &format!("Audio: {}", e));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let _defer = Defer(Some(|| {
|
||||
let _ = std::fs::remove_file(&wav_path);
|
||||
}));
|
||||
if debug {
|
||||
let size = std::fs::metadata(&wav_path).map(|m| m.len()).unwrap_or(0);
|
||||
transcription_log::log(
|
||||
&format!("wav ready: {} ({} bytes)", wav_path.display(), size),
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
set_status(&status, &status_detail, AppStatus::Transkribieren, "Transcribing…");
|
||||
|
||||
let text = match transcribe(
|
||||
&config.parakeet_cli_path,
|
||||
&config.model_path,
|
||||
&wav_path,
|
||||
) {
|
||||
Ok(t) => t,
|
||||
let chunks = match audio::split_wav_into_chunks(&wav_path, audio::MAX_CHUNK_SEC, debug) {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
eprintln!("Transcription: {}", e);
|
||||
set_status(&status, &status_detail, AppStatus::Fehler, &format!("Transcription: {}", e));
|
||||
transcription_log::log(&format!("split error: {}", e), debug);
|
||||
set_status(&status, &status_detail, AppStatus::Fehler, &format!("Audio: {}", e));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let req = PasteRequest {
|
||||
text,
|
||||
method: config.paste_method_enum(),
|
||||
target_hwnd: None,
|
||||
debug_logging: config.debug_logging,
|
||||
};
|
||||
if paste_tx.send(req).is_err() {
|
||||
eprintln!("Paste channel closed");
|
||||
set_status(&status, &status_detail, AppStatus::Fehler, "Paste channel failed");
|
||||
let mut paths_to_cleanup = vec![wav_path.clone()];
|
||||
for p in &chunks {
|
||||
if p != &wav_path {
|
||||
paths_to_cleanup.push(p.clone());
|
||||
}
|
||||
}
|
||||
let _defer = Defer(Some(move || {
|
||||
for p in paths_to_cleanup {
|
||||
let _ = std::fs::remove_file(&p);
|
||||
}
|
||||
}));
|
||||
|
||||
set_status(&status, &status_detail, AppStatus::Transkribieren, "Transcribing…");
|
||||
|
||||
let mut text_parts = Vec::new();
|
||||
for (i, chunk_path) in chunks.iter().enumerate() {
|
||||
if debug && chunks.len() > 1 {
|
||||
transcription_log::log(&format!("chunk {}/{}", i + 1, chunks.len()), true);
|
||||
}
|
||||
match transcribe(
|
||||
&config.parakeet_cli_path,
|
||||
&config.model_path,
|
||||
chunk_path,
|
||||
debug,
|
||||
) {
|
||||
Ok(t) if !t.is_empty() => text_parts.push(t),
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
transcription_log::log(&format!("transcription error: {}", e), debug);
|
||||
set_status(&status, &status_detail, AppStatus::Fehler, &format!("Transcription: {}", e));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
let text = text_parts.join(" ");
|
||||
|
||||
if debug {
|
||||
transcription_log::log(&format!("transcription OK: {} chars", text.len()), true);
|
||||
}
|
||||
|
||||
// Save-Dialog: Benutzer wählt Zieldatei
|
||||
let default_name = path
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("transcription")
|
||||
.to_string();
|
||||
let mut dialog = rfd::FileDialog::new()
|
||||
.set_title("Save transcription as")
|
||||
.add_filter("Text", &["txt"])
|
||||
.set_file_name(&format!("{}.txt", default_name));
|
||||
if let Some(dir) = path.parent() {
|
||||
dialog = dialog.set_directory(dir);
|
||||
}
|
||||
let save_path = dialog.save_file();
|
||||
|
||||
match save_path {
|
||||
Some(p) => {
|
||||
match std::fs::write(&p, &text) {
|
||||
Ok(()) => {
|
||||
let msg = format!("Saved to {}", p.display());
|
||||
set_status(&status, &status_detail, AppStatus::Fertig, &msg);
|
||||
if debug {
|
||||
transcription_log::log(&format!("saved to {}", p.display()), true);
|
||||
}
|
||||
let status_reset = status.clone();
|
||||
let detail_reset = status_detail.clone();
|
||||
std::thread::spawn(move || {
|
||||
std::thread::sleep(std::time::Duration::from_secs(5));
|
||||
let _ = status_reset.write().map(|mut w| *w = AppStatus::Bereit);
|
||||
let _ = detail_reset.write().map(|mut w| *w = String::new());
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
set_status(
|
||||
&status,
|
||||
&status_detail,
|
||||
AppStatus::Fehler,
|
||||
&format!("Write error: {}", e),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
set_status(&status, &status_detail, AppStatus::Bereit, "Save cancelled");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -624,10 +702,11 @@ fn run_recording(
|
||||
&config.parakeet_cli_path,
|
||||
&config.model_path,
|
||||
&wav_path,
|
||||
config.debug_logging,
|
||||
) {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
eprintln!("Transkription: {}", e);
|
||||
transcription_log::log(&format!("transcription error: {}", e), config.debug_logging);
|
||||
let _ = std::fs::remove_file(&wav_path);
|
||||
set_status(&status, &status_detail, AppStatus::Fehler, &format!("Transcription: {}", e));
|
||||
return;
|
||||
|
||||
@@ -23,6 +23,7 @@ pub fn transcribe(
|
||||
parakeet_cli_path: &str,
|
||||
model_dir: &str,
|
||||
wav_path: &Path,
|
||||
debug_logging: bool,
|
||||
) -> Result<String, String> {
|
||||
let cli = if parakeet_cli_path.is_empty() {
|
||||
"parakeet-cli"
|
||||
@@ -36,6 +37,13 @@ pub fn transcribe(
|
||||
model_dir
|
||||
};
|
||||
|
||||
if debug_logging {
|
||||
crate::transcription_log::log(
|
||||
&format!("parakeet-cli \"{}\" \"{}\"", model, wav_path.display()),
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
let mut cmd = Command::new(cli);
|
||||
cmd.arg(model).arg(wav_path);
|
||||
#[cfg(windows)]
|
||||
@@ -47,6 +55,9 @@ pub fn transcribe(
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
if debug_logging {
|
||||
crate::transcription_log::log(&format!("parakeet-cli stderr: {}", stderr), true);
|
||||
}
|
||||
return Err(format!("parakeet-cli error: {}", stderr));
|
||||
}
|
||||
|
||||
|
||||
31
HotKeet/src/transcription_log.rs
Normal file
31
HotKeet/src/transcription_log.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
//! Debug-Logging für Transkription (Konsole + Datei).
|
||||
|
||||
use std::io::Write;
|
||||
|
||||
/// Schreibt bei aktiviertem Logging in Konsole und transcription-debug.log.
|
||||
pub fn log(msg: &str, enabled: bool) {
|
||||
if !enabled {
|
||||
return;
|
||||
}
|
||||
eprintln!("[transcription] {}", msg);
|
||||
let log_dir = crate::config::DictateConfig::config_path()
|
||||
.parent()
|
||||
.map(|p| p.to_path_buf())
|
||||
.unwrap_or_else(|| std::path::PathBuf::from("."));
|
||||
let path = log_dir.join("transcription-debug.log");
|
||||
if let Some(parent) = path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
if let Ok(mut f) = std::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&path)
|
||||
{
|
||||
let _ = writeln!(
|
||||
f,
|
||||
"{} {}",
|
||||
chrono::Local::now().format("%H:%M:%S%.3f"),
|
||||
msg
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -302,7 +302,7 @@ impl SettingsApp {
|
||||
ui.checkbox(&mut self.config.start_minimized, "Minimize on start");
|
||||
ui.checkbox(&mut self.config.minimize_to_tray, "Minimize to tray");
|
||||
ui.checkbox(&mut self.config.sound_on_start_end, "Audio feedback on record start/end");
|
||||
ui.checkbox(&mut self.config.debug_logging, "Debug logging (paste-debug.log, console)");
|
||||
ui.checkbox(&mut self.config.debug_logging, "Debug logging (paste-debug.log, transcription-debug.log, console)");
|
||||
|
||||
ui.add_space(16.0);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user