Add vocabulary extension support in configuration and UI
- Introduced a new configuration field for vocabulary extension path in DictateConfig. - Updated transcription functions to apply vocabulary extensions from the specified file. - Enhanced UI to allow users to browse and set the vocabulary extension file, with clear instructions on the format for replacement rules.
This commit is contained in:
@@ -51,6 +51,10 @@ pub struct DictateConfig {
|
||||
#[serde(default)]
|
||||
pub model_path: String,
|
||||
|
||||
/// Pfad zu Ersetzungsregeln (eine Zeile: \"Quelltext→Ersetzung\" oder \"Quelltext=Ersetzung\")
|
||||
#[serde(default)]
|
||||
pub vocabulary_extension_path: String,
|
||||
|
||||
#[serde(default = "default_true")]
|
||||
pub start_minimized: bool,
|
||||
|
||||
@@ -92,6 +96,7 @@ impl Default for DictateConfig {
|
||||
input_device_name: String::new(),
|
||||
parakeet_cli_path: String::new(),
|
||||
model_path: String::new(),
|
||||
vocabulary_extension_path: String::new(),
|
||||
start_minimized: true,
|
||||
minimize_to_tray: true,
|
||||
paste_method: "Auto".to_string(),
|
||||
|
||||
@@ -61,6 +61,7 @@ mod audio;
|
||||
mod config;
|
||||
mod hotkey;
|
||||
mod transcription_log;
|
||||
mod vocabulary_extension;
|
||||
mod paste;
|
||||
mod recording;
|
||||
mod transcription;
|
||||
@@ -552,7 +553,8 @@ fn run_file_transcription(
|
||||
}
|
||||
}
|
||||
}
|
||||
let text = text_parts.join(" ");
|
||||
let mut text = text_parts.join(" ");
|
||||
text = vocabulary_extension::apply_from_file(&text, &config.vocabulary_extension_path);
|
||||
|
||||
if debug {
|
||||
transcription_log::log(&format!("transcription OK: {} chars", text.len()), true);
|
||||
@@ -698,7 +700,7 @@ fn run_recording(
|
||||
return;
|
||||
}
|
||||
|
||||
let text = match transcribe(
|
||||
let mut text = match transcribe(
|
||||
&config.parakeet_cli_path,
|
||||
&config.model_path,
|
||||
&wav_path,
|
||||
@@ -715,6 +717,8 @@ fn run_recording(
|
||||
|
||||
let _ = std::fs::remove_file(&wav_path);
|
||||
|
||||
text = vocabulary_extension::apply_from_file(&text, &config.vocabulary_extension_path);
|
||||
|
||||
if config.debug_logging {
|
||||
eprintln!(
|
||||
"[recording] Transkription: {} Zeichen, Text: {:?}",
|
||||
|
||||
@@ -240,6 +240,31 @@ impl SettingsApp {
|
||||
}
|
||||
});
|
||||
|
||||
ui.horizontal(|ui| {
|
||||
ui.label("Vocabulary extension:");
|
||||
let display = if self.config.vocabulary_extension_path.is_empty() {
|
||||
"(empty = no replacements)".to_string()
|
||||
} else {
|
||||
self.config.vocabulary_extension_path.clone()
|
||||
};
|
||||
ui.label(egui::RichText::new(&display).color(egui::Color32::GRAY));
|
||||
if ui.button("Browse…").clicked() {
|
||||
if let Some(p) = rfd::FileDialog::new()
|
||||
.set_title("Select vocabulary extension file")
|
||||
.add_filter("Text", &["txt"])
|
||||
.pick_file()
|
||||
{
|
||||
self.config.vocabulary_extension_path = p.display().to_string();
|
||||
self.status = "Remember to save changes.".to_string();
|
||||
}
|
||||
}
|
||||
if !self.config.vocabulary_extension_path.is_empty() && ui.small_button("✕").clicked() {
|
||||
self.config.vocabulary_extension_path.clear();
|
||||
self.status = "Remember to save changes.".to_string();
|
||||
}
|
||||
});
|
||||
ui.label(egui::RichText::new("Format: one rule per line, \"source→replacement\" or \"source=replacement\"").small().color(egui::Color32::GRAY));
|
||||
|
||||
let model_path = Path::new(&self.config.model_path);
|
||||
let model_valid = !self.config.model_path.is_empty() && model::is_model_valid(model_path);
|
||||
let downloading = self.download_progress.as_ref().and_then(|p| p.read().ok()).and_then(|g| g.clone());
|
||||
|
||||
59
HotKeet/src/vocabulary_extension.rs
Normal file
59
HotKeet/src/vocabulary_extension.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
//! Vokabular-Erweiterung: Ersetzungsregeln für Transkription.
|
||||
//! Siehe https://deepwiki.com/altunenes/parakeet-rs/7.3-vocabulary-and-tokenization
|
||||
//!
|
||||
//! Format: Eine Zeile pro Regel, "Quelltext→Ersetzung" oder "Quelltext=Ersetzung".
|
||||
//! Leere Zeilen und Zeilen mit # werden ignoriert.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
/// Lädt Ersetzungsregeln aus einer Datei.
|
||||
/// Gibt Vec<(from, to)> zurück, sortiert nach from-Länge (längste zuerst).
|
||||
pub fn load_rules(path: &Path) -> Result<Vec<(String, String)>, String> {
|
||||
let content = std::fs::read_to_string(path).map_err(|e| format!("Read vocabulary: {}", e))?;
|
||||
let mut rules = Vec::new();
|
||||
for line in content.lines() {
|
||||
let line = line.trim();
|
||||
if line.is_empty() || line.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
let (from, to) = if let Some(pos) = line.find("→") {
|
||||
let (a, b) = line.split_at(pos);
|
||||
(a.trim(), b[3..].trim()) // "→" is 3 bytes in UTF-8
|
||||
} else if let Some(pos) = line.find('=') {
|
||||
let (a, b) = line.split_at(pos);
|
||||
(a.trim(), b[1..].trim())
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
if !from.is_empty() {
|
||||
rules.push((from.to_string(), to.to_string()));
|
||||
}
|
||||
}
|
||||
// Längste Muster zuerst, damit "MediTech" vor "Medi" ersetzt wird
|
||||
rules.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
|
||||
Ok(rules)
|
||||
}
|
||||
|
||||
/// Wendet Ersetzungsregeln auf den Text an.
|
||||
pub fn apply(text: &str, rules: &[(String, String)]) -> String {
|
||||
let mut result = text.to_string();
|
||||
for (from, to) in rules {
|
||||
result = result.replace(from, to);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Lädt Regeln und wendet sie an. Bei leerem Pfad oder Fehler: Original zurück.
|
||||
pub fn apply_from_file(text: &str, path: &str) -> String {
|
||||
if path.is_empty() {
|
||||
return text.to_string();
|
||||
}
|
||||
let p = Path::new(path);
|
||||
if !p.is_file() {
|
||||
return text.to_string();
|
||||
}
|
||||
match load_rules(p) {
|
||||
Ok(rules) if !rules.is_empty() => apply(text, &rules),
|
||||
_ => text.to_string(),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user