diff --git a/HotKeet/src/audio.rs b/HotKeet/src/audio.rs index 2228555..62267d2 100644 --- a/HotKeet/src/audio.rs +++ b/HotKeet/src/audio.rs @@ -10,19 +10,16 @@ const TARGET_CHANNELS: u16 = 1; /// Bereitet eine Audio-Datei für die Transkription vor. /// Gibt den Pfad zu einer temporären WAV-Datei zurück (16 kHz, Mono, 16-bit). /// Der Aufrufer sollte die Temp-Datei nach der Transkription löschen. -pub fn prepare_for_transcription(path: &Path) -> Result { +pub fn prepare_for_transcription( + path: &Path, + debug_logging: bool, +) -> Result { let ext = path .extension() .and_then(|e| e.to_str()) .map(|s| s.to_lowercase()) .unwrap_or_default(); - let samples = match ext.as_str() { - "wav" => decode_wav(path)?, - "mp3" => decode_mp3(path)?, - _ => return Err(format!("Unsupported format: .{} (use .wav or .mp3)", ext)), - }; - let temp_path = std::env::temp_dir().join(format!( "hotkeet-transcribe-{}.wav", std::time::SystemTime::now() @@ -31,21 +28,176 @@ pub fn prepare_for_transcription(path: &Path) -> Result { + // ffmpeg erzeugt WAV, das transcribe-rs zuverlässig akzeptiert (vermeidet ORT-Fehler) + if let Ok(()) = convert_with_ffmpeg(path, &temp_path, debug_logging) { + if debug_logging { + let size = std::fs::metadata(&temp_path).map(|m| m.len()).unwrap_or(0); + crate::transcription_log::log( + &format!("ffmpeg OK: {} -> {} ({} bytes)", path.display(), temp_path.display(), size), + true, + ); + } + } else { + if debug_logging { + crate::transcription_log::log("ffmpeg failed, fallback to native decode", true); + } + // Fallback: native Dekodierung + let samples = match ext.as_str() { + "wav" => decode_wav(path)?, + "mp3" => decode_mp3(path)?, + _ => unreachable!(), + }; + if samples.is_empty() { + return Err("Audio file is empty or could not be decoded. Install ffmpeg for better compatibility.".to_string()); + } + if debug_logging { + crate::transcription_log::log( + &format!("native decode OK: {} samples -> {}", samples.len(), temp_path.display()), + true, + ); + } + write_wav(&temp_path, &samples)?; + } + } + _ => return Err(format!("Unsupported format: .{} (use .wav or .mp3)", ext)), + } + Ok(temp_path) } +/// Maximale Chunk-Dauer in Sekunden (Parakeet/ORT hat Probleme mit langen Dateien). +pub const MAX_CHUNK_SEC: u32 = 45; + +/// Teilt eine WAV-Datei in Chunks für die Transkription. +/// Gibt Pfade zu temporären Chunk-WAVs zurück. Aufrufer muss diese löschen. +pub fn split_wav_into_chunks( + wav_path: &Path, + max_sec: u32, + debug_logging: bool, +) -> Result, String> { + let reader = hound::WavReader::open(wav_path).map_err(|e| format!("WAV read: {}", e))?; + let spec = reader.spec(); + if spec.sample_rate != TARGET_SAMPLE_RATE || spec.channels != TARGET_CHANNELS { + return Err(format!( + "WAV must be {} Hz mono, got {} Hz {} ch", + TARGET_SAMPLE_RATE, spec.sample_rate, spec.channels + )); + } + + let samples: Vec = match spec.sample_format { + hound::SampleFormat::Int => reader + .into_samples::() + .filter_map(|s| s.ok()) + .map(|s| s.clamp(-32768, 32767) as i16) + .collect(), + hound::SampleFormat::Float => reader + .into_samples::() + .filter_map(|s| s.ok()) + .map(|f| (f.clamp(-1.0, 1.0) * 32767.0) as i16) + .collect(), + }; + + let samples_per_chunk = (max_sec as usize) * (TARGET_SAMPLE_RATE as usize); + if samples.len() <= samples_per_chunk { + return Ok(vec![wav_path.to_path_buf()]); + } + + let mut paths = Vec::new(); + let base = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + + for (i, chunk) in samples.chunks(samples_per_chunk).enumerate() { + let path = std::env::temp_dir().join(format!("hotkeet-chunk-{}-{}.wav", base, i)); + write_wav(&path, chunk)?; + paths.push(path); + } + + if debug_logging { + crate::transcription_log::log( + &format!("split into {} chunks ({} sec each)", paths.len(), max_sec), + true, + ); + } + + Ok(paths) +} + +/// Konvertiert Audio mit ffmpeg zu 16 kHz Mono WAV. +fn convert_with_ffmpeg(input: &Path, output: &Path, debug_logging: bool) -> Result<(), String> { + let output = output.to_path_buf(); + let input = input.to_path_buf(); + + let mut cmd = std::process::Command::new("ffmpeg"); + cmd.arg("-y") + .arg("-i") + .arg(&input) + .arg("-acodec") + .arg("pcm_s16le") + .arg("-ar") + .arg(TARGET_SAMPLE_RATE.to_string()) + .arg("-ac") + .arg(TARGET_CHANNELS.to_string()) + .arg("-f") + .arg("wav") + .arg(&output); + + #[cfg(windows)] + { + use std::os::windows::process::CommandExt; + const CREATE_NO_WINDOW: u32 = 0x0800_0000; + cmd.creation_flags(CREATE_NO_WINDOW); + } + + if debug_logging { + crate::transcription_log::log( + &format!("ffmpeg -i {} -> {}", input.display(), output.display()), + true, + ); + } + + let output_result = cmd.output().map_err(|e| format!("ffmpeg: {}", e))?; + + if !output_result.status.success() { + let stderr = String::from_utf8_lossy(&output_result.stderr); + if debug_logging { + crate::transcription_log::log(&format!("ffmpeg stderr: {}", stderr), true); + } + return Err(format!("ffmpeg failed: {}", stderr.lines().last().unwrap_or(""))); + } + + if !output.exists() { + return Err("ffmpeg did not create output file".to_string()); + } + + Ok(()) +} + fn decode_wav(path: &Path) -> Result, String> { let reader = hound::WavReader::open(path).map_err(|e| format!("WAV read: {}", e))?; let spec = reader.spec(); let sample_rate = spec.sample_rate; let channels = spec.channels; + // i32 für 16/24/32-bit Int, f32 für Float let samples: Vec = match spec.sample_format { - hound::SampleFormat::Int => reader - .into_samples::() - .filter_map(|s| s.ok()) - .collect(), + hound::SampleFormat::Int => { + let raw: Vec = reader + .into_samples::() + .filter_map(|s| s.ok()) + .collect(); + // Skalieren auf i16 (24/32-bit haben größeren Wertebereich) + let max_val = 1 << (spec.bits_per_sample.saturating_sub(1)); + raw.into_iter() + .map(|s| { + let scaled = (s as f64 / max_val as f64).clamp(-1.0, 1.0) * 32767.0; + scaled as i16 + }) + .collect() + } hound::SampleFormat::Float => reader .into_samples::() .filter_map(|s| s.ok()) diff --git a/HotKeet/src/main.rs b/HotKeet/src/main.rs index db3da17..9a902a8 100644 --- a/HotKeet/src/main.rs +++ b/HotKeet/src/main.rs @@ -60,6 +60,7 @@ fn show_main_window() { mod audio; mod config; mod hotkey; +mod transcription_log; mod paste; mod recording; mod transcription; @@ -164,9 +165,8 @@ impl eframe::App for AppState { let cfg = self.config.read().unwrap().clone(); let status = self.status.clone(); let status_detail = self.status_detail.clone(); - let paste_tx = self.paste_tx.clone(); std::thread::spawn(move || { - run_file_transcription(path, cfg, status, status_detail, paste_tx); + run_file_transcription(path, cfg, status, status_detail); }); break; // Nur eine Datei pro Drop verarbeiten } @@ -443,7 +443,7 @@ fn main() -> eframe::Result<()> { }; let mut viewport = egui::ViewportBuilder::default() - .with_inner_size([660.0, 520.0]) + .with_inner_size([680.0, 520.0]) .with_min_inner_size([500.0, 420.0]) .with_drag_and_drop(true); if start_minimized { @@ -484,47 +484,125 @@ fn run_file_transcription( config: DictateConfig, status: Arc>, status_detail: Arc>, - paste_tx: Sender, ) { + let debug = config.debug_logging; + if debug { + transcription_log::log(&format!("file transcription start: {}", path.display()), true); + } + set_status(&status, &status_detail, AppStatus::Transkribieren, "Converting…"); - let wav_path = match audio::prepare_for_transcription(&path) { + let wav_path = match audio::prepare_for_transcription(&path, debug) { Ok(p) => p, Err(e) => { - eprintln!("Audio prepare: {}", e); + transcription_log::log(&format!("audio prepare error: {}", e), debug); set_status(&status, &status_detail, AppStatus::Fehler, &format!("Audio: {}", e)); return; } }; - let _defer = Defer(Some(|| { - let _ = std::fs::remove_file(&wav_path); - })); + if debug { + let size = std::fs::metadata(&wav_path).map(|m| m.len()).unwrap_or(0); + transcription_log::log( + &format!("wav ready: {} ({} bytes)", wav_path.display(), size), + true, + ); + } - set_status(&status, &status_detail, AppStatus::Transkribieren, "Transcribing…"); - - let text = match transcribe( - &config.parakeet_cli_path, - &config.model_path, - &wav_path, - ) { - Ok(t) => t, + let chunks = match audio::split_wav_into_chunks(&wav_path, audio::MAX_CHUNK_SEC, debug) { + Ok(c) => c, Err(e) => { - eprintln!("Transcription: {}", e); - set_status(&status, &status_detail, AppStatus::Fehler, &format!("Transcription: {}", e)); + transcription_log::log(&format!("split error: {}", e), debug); + set_status(&status, &status_detail, AppStatus::Fehler, &format!("Audio: {}", e)); return; } }; - let req = PasteRequest { - text, - method: config.paste_method_enum(), - target_hwnd: None, - debug_logging: config.debug_logging, - }; - if paste_tx.send(req).is_err() { - eprintln!("Paste channel closed"); - set_status(&status, &status_detail, AppStatus::Fehler, "Paste channel failed"); + let mut paths_to_cleanup = vec![wav_path.clone()]; + for p in &chunks { + if p != &wav_path { + paths_to_cleanup.push(p.clone()); + } + } + let _defer = Defer(Some(move || { + for p in paths_to_cleanup { + let _ = std::fs::remove_file(&p); + } + })); + + set_status(&status, &status_detail, AppStatus::Transkribieren, "Transcribing…"); + + let mut text_parts = Vec::new(); + for (i, chunk_path) in chunks.iter().enumerate() { + if debug && chunks.len() > 1 { + transcription_log::log(&format!("chunk {}/{}", i + 1, chunks.len()), true); + } + match transcribe( + &config.parakeet_cli_path, + &config.model_path, + chunk_path, + debug, + ) { + Ok(t) if !t.is_empty() => text_parts.push(t), + Ok(_) => {} + Err(e) => { + transcription_log::log(&format!("transcription error: {}", e), debug); + set_status(&status, &status_detail, AppStatus::Fehler, &format!("Transcription: {}", e)); + return; + } + } + } + let text = text_parts.join(" "); + + if debug { + transcription_log::log(&format!("transcription OK: {} chars", text.len()), true); + } + + // Save-Dialog: Benutzer wählt Zieldatei + let default_name = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("transcription") + .to_string(); + let mut dialog = rfd::FileDialog::new() + .set_title("Save transcription as") + .add_filter("Text", &["txt"]) + .set_file_name(&format!("{}.txt", default_name)); + if let Some(dir) = path.parent() { + dialog = dialog.set_directory(dir); + } + let save_path = dialog.save_file(); + + match save_path { + Some(p) => { + match std::fs::write(&p, &text) { + Ok(()) => { + let msg = format!("Saved to {}", p.display()); + set_status(&status, &status_detail, AppStatus::Fertig, &msg); + if debug { + transcription_log::log(&format!("saved to {}", p.display()), true); + } + let status_reset = status.clone(); + let detail_reset = status_detail.clone(); + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_secs(5)); + let _ = status_reset.write().map(|mut w| *w = AppStatus::Bereit); + let _ = detail_reset.write().map(|mut w| *w = String::new()); + }); + } + Err(e) => { + set_status( + &status, + &status_detail, + AppStatus::Fehler, + &format!("Write error: {}", e), + ); + } + } + } + None => { + set_status(&status, &status_detail, AppStatus::Bereit, "Save cancelled"); + } } } @@ -624,10 +702,11 @@ fn run_recording( &config.parakeet_cli_path, &config.model_path, &wav_path, + config.debug_logging, ) { Ok(t) => t, Err(e) => { - eprintln!("Transkription: {}", e); + transcription_log::log(&format!("transcription error: {}", e), config.debug_logging); let _ = std::fs::remove_file(&wav_path); set_status(&status, &status_detail, AppStatus::Fehler, &format!("Transcription: {}", e)); return; diff --git a/HotKeet/src/transcription.rs b/HotKeet/src/transcription.rs index 1c22963..cb7159c 100644 --- a/HotKeet/src/transcription.rs +++ b/HotKeet/src/transcription.rs @@ -23,6 +23,7 @@ pub fn transcribe( parakeet_cli_path: &str, model_dir: &str, wav_path: &Path, + debug_logging: bool, ) -> Result { let cli = if parakeet_cli_path.is_empty() { "parakeet-cli" @@ -36,6 +37,13 @@ pub fn transcribe( model_dir }; + if debug_logging { + crate::transcription_log::log( + &format!("parakeet-cli \"{}\" \"{}\"", model, wav_path.display()), + true, + ); + } + let mut cmd = Command::new(cli); cmd.arg(model).arg(wav_path); #[cfg(windows)] @@ -47,6 +55,9 @@ pub fn transcribe( if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); + if debug_logging { + crate::transcription_log::log(&format!("parakeet-cli stderr: {}", stderr), true); + } return Err(format!("parakeet-cli error: {}", stderr)); } diff --git a/HotKeet/src/transcription_log.rs b/HotKeet/src/transcription_log.rs new file mode 100644 index 0000000..35818d6 --- /dev/null +++ b/HotKeet/src/transcription_log.rs @@ -0,0 +1,31 @@ +//! Debug-Logging für Transkription (Konsole + Datei). + +use std::io::Write; + +/// Schreibt bei aktiviertem Logging in Konsole und transcription-debug.log. +pub fn log(msg: &str, enabled: bool) { + if !enabled { + return; + } + eprintln!("[transcription] {}", msg); + let log_dir = crate::config::DictateConfig::config_path() + .parent() + .map(|p| p.to_path_buf()) + .unwrap_or_else(|| std::path::PathBuf::from(".")); + let path = log_dir.join("transcription-debug.log"); + if let Some(parent) = path.parent() { + let _ = std::fs::create_dir_all(parent); + } + if let Ok(mut f) = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(&path) + { + let _ = writeln!( + f, + "{} {}", + chrono::Local::now().format("%H:%M:%S%.3f"), + msg + ); + } +} diff --git a/HotKeet/src/ui.rs b/HotKeet/src/ui.rs index ce2eeac..7928d38 100644 --- a/HotKeet/src/ui.rs +++ b/HotKeet/src/ui.rs @@ -302,7 +302,7 @@ impl SettingsApp { ui.checkbox(&mut self.config.start_minimized, "Minimize on start"); ui.checkbox(&mut self.config.minimize_to_tray, "Minimize to tray"); ui.checkbox(&mut self.config.sound_on_start_end, "Audio feedback on record start/end"); - ui.checkbox(&mut self.config.debug_logging, "Debug logging (paste-debug.log, console)"); + ui.checkbox(&mut self.config.debug_logging, "Debug logging (paste-debug.log, transcription-debug.log, console)"); ui.add_space(16.0);