From c296e2afe905f2b330b18dee29bb3ac558f204f2 Mon Sep 17 00:00:00 2001 From: elpatron Date: Sun, 8 Mar 2026 10:28:02 +0100 Subject: [PATCH] Add vocabulary extension support in configuration and UI - Introduced a new configuration field for vocabulary extension path in DictateConfig. - Updated transcription functions to apply vocabulary extensions from the specified file. - Enhanced UI to allow users to browse and set the vocabulary extension file, with clear instructions on the format for replacement rules. --- HotKeet/src/config.rs | 5 +++ HotKeet/src/main.rs | 8 +++- HotKeet/src/ui.rs | 25 ++++++++++++ HotKeet/src/vocabulary_extension.rs | 59 +++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 HotKeet/src/vocabulary_extension.rs diff --git a/HotKeet/src/config.rs b/HotKeet/src/config.rs index 90f36d4..d772136 100644 --- a/HotKeet/src/config.rs +++ b/HotKeet/src/config.rs @@ -51,6 +51,10 @@ pub struct DictateConfig { #[serde(default)] pub model_path: String, + /// Pfad zu Ersetzungsregeln (eine Zeile: \"Quelltext→Ersetzung\" oder \"Quelltext=Ersetzung\") + #[serde(default)] + pub vocabulary_extension_path: String, + #[serde(default = "default_true")] pub start_minimized: bool, @@ -92,6 +96,7 @@ impl Default for DictateConfig { input_device_name: String::new(), parakeet_cli_path: String::new(), model_path: String::new(), + vocabulary_extension_path: String::new(), start_minimized: true, minimize_to_tray: true, paste_method: "Auto".to_string(), diff --git a/HotKeet/src/main.rs b/HotKeet/src/main.rs index 9a902a8..105c5cf 100644 --- a/HotKeet/src/main.rs +++ b/HotKeet/src/main.rs @@ -61,6 +61,7 @@ mod audio; mod config; mod hotkey; mod transcription_log; +mod vocabulary_extension; mod paste; mod recording; mod transcription; @@ -552,7 +553,8 @@ fn run_file_transcription( } } } - let text = text_parts.join(" "); + let mut text = text_parts.join(" "); + text = vocabulary_extension::apply_from_file(&text, &config.vocabulary_extension_path); if debug { transcription_log::log(&format!("transcription OK: {} chars", text.len()), true); @@ -698,7 +700,7 @@ fn run_recording( return; } - let text = match transcribe( + let mut text = match transcribe( &config.parakeet_cli_path, &config.model_path, &wav_path, @@ -715,6 +717,8 @@ fn run_recording( let _ = std::fs::remove_file(&wav_path); + text = vocabulary_extension::apply_from_file(&text, &config.vocabulary_extension_path); + if config.debug_logging { eprintln!( "[recording] Transkription: {} Zeichen, Text: {:?}", diff --git a/HotKeet/src/ui.rs b/HotKeet/src/ui.rs index 7928d38..b1cecf2 100644 --- a/HotKeet/src/ui.rs +++ b/HotKeet/src/ui.rs @@ -240,6 +240,31 @@ impl SettingsApp { } }); + ui.horizontal(|ui| { + ui.label("Vocabulary extension:"); + let display = if self.config.vocabulary_extension_path.is_empty() { + "(empty = no replacements)".to_string() + } else { + self.config.vocabulary_extension_path.clone() + }; + ui.label(egui::RichText::new(&display).color(egui::Color32::GRAY)); + if ui.button("Browse…").clicked() { + if let Some(p) = rfd::FileDialog::new() + .set_title("Select vocabulary extension file") + .add_filter("Text", &["txt"]) + .pick_file() + { + self.config.vocabulary_extension_path = p.display().to_string(); + self.status = "Remember to save changes.".to_string(); + } + } + if !self.config.vocabulary_extension_path.is_empty() && ui.small_button("✕").clicked() { + self.config.vocabulary_extension_path.clear(); + self.status = "Remember to save changes.".to_string(); + } + }); + ui.label(egui::RichText::new("Format: one rule per line, \"source→replacement\" or \"source=replacement\"").small().color(egui::Color32::GRAY)); + let model_path = Path::new(&self.config.model_path); let model_valid = !self.config.model_path.is_empty() && model::is_model_valid(model_path); let downloading = self.download_progress.as_ref().and_then(|p| p.read().ok()).and_then(|g| g.clone()); diff --git a/HotKeet/src/vocabulary_extension.rs b/HotKeet/src/vocabulary_extension.rs new file mode 100644 index 0000000..c69f3f7 --- /dev/null +++ b/HotKeet/src/vocabulary_extension.rs @@ -0,0 +1,59 @@ +//! Vokabular-Erweiterung: Ersetzungsregeln für Transkription. +//! Siehe https://deepwiki.com/altunenes/parakeet-rs/7.3-vocabulary-and-tokenization +//! +//! Format: Eine Zeile pro Regel, "Quelltext→Ersetzung" oder "Quelltext=Ersetzung". +//! Leere Zeilen und Zeilen mit # werden ignoriert. + +use std::path::Path; + +/// Lädt Ersetzungsregeln aus einer Datei. +/// Gibt Vec<(from, to)> zurück, sortiert nach from-Länge (längste zuerst). +pub fn load_rules(path: &Path) -> Result, String> { + let content = std::fs::read_to_string(path).map_err(|e| format!("Read vocabulary: {}", e))?; + let mut rules = Vec::new(); + for line in content.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + let (from, to) = if let Some(pos) = line.find("→") { + let (a, b) = line.split_at(pos); + (a.trim(), b[3..].trim()) // "→" is 3 bytes in UTF-8 + } else if let Some(pos) = line.find('=') { + let (a, b) = line.split_at(pos); + (a.trim(), b[1..].trim()) + } else { + continue; + }; + if !from.is_empty() { + rules.push((from.to_string(), to.to_string())); + } + } + // Längste Muster zuerst, damit "MediTech" vor "Medi" ersetzt wird + rules.sort_by(|a, b| b.0.len().cmp(&a.0.len())); + Ok(rules) +} + +/// Wendet Ersetzungsregeln auf den Text an. +pub fn apply(text: &str, rules: &[(String, String)]) -> String { + let mut result = text.to_string(); + for (from, to) in rules { + result = result.replace(from, to); + } + result +} + +/// Lädt Regeln und wendet sie an. Bei leerem Pfad oder Fehler: Original zurück. +pub fn apply_from_file(text: &str, path: &str) -> String { + if path.is_empty() { + return text.to_string(); + } + let p = Path::new(path); + if !p.is_file() { + return text.to_string(); + } + match load_rules(p) { + Ok(rules) if !rules.is_empty() => apply(text, &rules), + _ => text.to_string(), + } +}