Add vocabulary extension support in configuration and UI

- Introduced a new configuration field for vocabulary extension path in DictateConfig.
- Updated transcription functions to apply vocabulary extensions from the specified file.
- Enhanced UI to allow users to browse and set the vocabulary extension file, with clear instructions on the format for replacement rules.
This commit is contained in:
2026-03-08 10:28:02 +01:00
parent 3bef69fb5a
commit c296e2afe9
4 changed files with 95 additions and 2 deletions

View File

@@ -51,6 +51,10 @@ pub struct DictateConfig {
#[serde(default)]
pub model_path: String,
/// Pfad zu Ersetzungsregeln (eine Zeile: \"Quelltext→Ersetzung\" oder \"Quelltext=Ersetzung\")
#[serde(default)]
pub vocabulary_extension_path: String,
#[serde(default = "default_true")]
pub start_minimized: bool,
@@ -92,6 +96,7 @@ impl Default for DictateConfig {
input_device_name: String::new(),
parakeet_cli_path: String::new(),
model_path: String::new(),
vocabulary_extension_path: String::new(),
start_minimized: true,
minimize_to_tray: true,
paste_method: "Auto".to_string(),

View File

@@ -61,6 +61,7 @@ mod audio;
mod config;
mod hotkey;
mod transcription_log;
mod vocabulary_extension;
mod paste;
mod recording;
mod transcription;
@@ -552,7 +553,8 @@ fn run_file_transcription(
}
}
}
let text = text_parts.join(" ");
let mut text = text_parts.join(" ");
text = vocabulary_extension::apply_from_file(&text, &config.vocabulary_extension_path);
if debug {
transcription_log::log(&format!("transcription OK: {} chars", text.len()), true);
@@ -698,7 +700,7 @@ fn run_recording(
return;
}
let text = match transcribe(
let mut text = match transcribe(
&config.parakeet_cli_path,
&config.model_path,
&wav_path,
@@ -715,6 +717,8 @@ fn run_recording(
let _ = std::fs::remove_file(&wav_path);
text = vocabulary_extension::apply_from_file(&text, &config.vocabulary_extension_path);
if config.debug_logging {
eprintln!(
"[recording] Transkription: {} Zeichen, Text: {:?}",

View File

@@ -240,6 +240,31 @@ impl SettingsApp {
}
});
ui.horizontal(|ui| {
ui.label("Vocabulary extension:");
let display = if self.config.vocabulary_extension_path.is_empty() {
"(empty = no replacements)".to_string()
} else {
self.config.vocabulary_extension_path.clone()
};
ui.label(egui::RichText::new(&display).color(egui::Color32::GRAY));
if ui.button("Browse…").clicked() {
if let Some(p) = rfd::FileDialog::new()
.set_title("Select vocabulary extension file")
.add_filter("Text", &["txt"])
.pick_file()
{
self.config.vocabulary_extension_path = p.display().to_string();
self.status = "Remember to save changes.".to_string();
}
}
if !self.config.vocabulary_extension_path.is_empty() && ui.small_button("").clicked() {
self.config.vocabulary_extension_path.clear();
self.status = "Remember to save changes.".to_string();
}
});
ui.label(egui::RichText::new("Format: one rule per line, \"source→replacement\" or \"source=replacement\"").small().color(egui::Color32::GRAY));
let model_path = Path::new(&self.config.model_path);
let model_valid = !self.config.model_path.is_empty() && model::is_model_valid(model_path);
let downloading = self.download_progress.as_ref().and_then(|p| p.read().ok()).and_then(|g| g.clone());

View File

@@ -0,0 +1,59 @@
//! Vokabular-Erweiterung: Ersetzungsregeln für Transkription.
//! Siehe https://deepwiki.com/altunenes/parakeet-rs/7.3-vocabulary-and-tokenization
//!
//! Format: Eine Zeile pro Regel, "Quelltext→Ersetzung" oder "Quelltext=Ersetzung".
//! Leere Zeilen und Zeilen mit # werden ignoriert.
use std::path::Path;
/// Lädt Ersetzungsregeln aus einer Datei.
/// Gibt Vec<(from, to)> zurück, sortiert nach from-Länge (längste zuerst).
pub fn load_rules(path: &Path) -> Result<Vec<(String, String)>, String> {
let content = std::fs::read_to_string(path).map_err(|e| format!("Read vocabulary: {}", e))?;
let mut rules = Vec::new();
for line in content.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let (from, to) = if let Some(pos) = line.find("") {
let (a, b) = line.split_at(pos);
(a.trim(), b[3..].trim()) // "→" is 3 bytes in UTF-8
} else if let Some(pos) = line.find('=') {
let (a, b) = line.split_at(pos);
(a.trim(), b[1..].trim())
} else {
continue;
};
if !from.is_empty() {
rules.push((from.to_string(), to.to_string()));
}
}
// Längste Muster zuerst, damit "MediTech" vor "Medi" ersetzt wird
rules.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
Ok(rules)
}
/// Wendet Ersetzungsregeln auf den Text an.
pub fn apply(text: &str, rules: &[(String, String)]) -> String {
let mut result = text.to_string();
for (from, to) in rules {
result = result.replace(from, to);
}
result
}
/// Lädt Regeln und wendet sie an. Bei leerem Pfad oder Fehler: Original zurück.
pub fn apply_from_file(text: &str, path: &str) -> String {
if path.is_empty() {
return text.to_string();
}
let p = Path::new(path);
if !p.is_file() {
return text.to_string();
}
match load_rules(p) {
Ok(rules) if !rules.is_empty() => apply(text, &rules),
_ => text.to_string(),
}
}