Add vocabulary extension support in configuration and UI
- Introduced a new configuration field for vocabulary extension path in DictateConfig. - Updated transcription functions to apply vocabulary extensions from the specified file. - Enhanced UI to allow users to browse and set the vocabulary extension file, with clear instructions on the format for replacement rules.
This commit is contained in:
@@ -51,6 +51,10 @@ pub struct DictateConfig {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub model_path: String,
|
pub model_path: String,
|
||||||
|
|
||||||
|
/// Pfad zu Ersetzungsregeln (eine Zeile: \"Quelltext→Ersetzung\" oder \"Quelltext=Ersetzung\")
|
||||||
|
#[serde(default)]
|
||||||
|
pub vocabulary_extension_path: String,
|
||||||
|
|
||||||
#[serde(default = "default_true")]
|
#[serde(default = "default_true")]
|
||||||
pub start_minimized: bool,
|
pub start_minimized: bool,
|
||||||
|
|
||||||
@@ -92,6 +96,7 @@ impl Default for DictateConfig {
|
|||||||
input_device_name: String::new(),
|
input_device_name: String::new(),
|
||||||
parakeet_cli_path: String::new(),
|
parakeet_cli_path: String::new(),
|
||||||
model_path: String::new(),
|
model_path: String::new(),
|
||||||
|
vocabulary_extension_path: String::new(),
|
||||||
start_minimized: true,
|
start_minimized: true,
|
||||||
minimize_to_tray: true,
|
minimize_to_tray: true,
|
||||||
paste_method: "Auto".to_string(),
|
paste_method: "Auto".to_string(),
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ mod audio;
|
|||||||
mod config;
|
mod config;
|
||||||
mod hotkey;
|
mod hotkey;
|
||||||
mod transcription_log;
|
mod transcription_log;
|
||||||
|
mod vocabulary_extension;
|
||||||
mod paste;
|
mod paste;
|
||||||
mod recording;
|
mod recording;
|
||||||
mod transcription;
|
mod transcription;
|
||||||
@@ -552,7 +553,8 @@ fn run_file_transcription(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let text = text_parts.join(" ");
|
let mut text = text_parts.join(" ");
|
||||||
|
text = vocabulary_extension::apply_from_file(&text, &config.vocabulary_extension_path);
|
||||||
|
|
||||||
if debug {
|
if debug {
|
||||||
transcription_log::log(&format!("transcription OK: {} chars", text.len()), true);
|
transcription_log::log(&format!("transcription OK: {} chars", text.len()), true);
|
||||||
@@ -698,7 +700,7 @@ fn run_recording(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
let text = match transcribe(
|
let mut text = match transcribe(
|
||||||
&config.parakeet_cli_path,
|
&config.parakeet_cli_path,
|
||||||
&config.model_path,
|
&config.model_path,
|
||||||
&wav_path,
|
&wav_path,
|
||||||
@@ -715,6 +717,8 @@ fn run_recording(
|
|||||||
|
|
||||||
let _ = std::fs::remove_file(&wav_path);
|
let _ = std::fs::remove_file(&wav_path);
|
||||||
|
|
||||||
|
text = vocabulary_extension::apply_from_file(&text, &config.vocabulary_extension_path);
|
||||||
|
|
||||||
if config.debug_logging {
|
if config.debug_logging {
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"[recording] Transkription: {} Zeichen, Text: {:?}",
|
"[recording] Transkription: {} Zeichen, Text: {:?}",
|
||||||
|
|||||||
@@ -240,6 +240,31 @@ impl SettingsApp {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
ui.horizontal(|ui| {
|
||||||
|
ui.label("Vocabulary extension:");
|
||||||
|
let display = if self.config.vocabulary_extension_path.is_empty() {
|
||||||
|
"(empty = no replacements)".to_string()
|
||||||
|
} else {
|
||||||
|
self.config.vocabulary_extension_path.clone()
|
||||||
|
};
|
||||||
|
ui.label(egui::RichText::new(&display).color(egui::Color32::GRAY));
|
||||||
|
if ui.button("Browse…").clicked() {
|
||||||
|
if let Some(p) = rfd::FileDialog::new()
|
||||||
|
.set_title("Select vocabulary extension file")
|
||||||
|
.add_filter("Text", &["txt"])
|
||||||
|
.pick_file()
|
||||||
|
{
|
||||||
|
self.config.vocabulary_extension_path = p.display().to_string();
|
||||||
|
self.status = "Remember to save changes.".to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !self.config.vocabulary_extension_path.is_empty() && ui.small_button("✕").clicked() {
|
||||||
|
self.config.vocabulary_extension_path.clear();
|
||||||
|
self.status = "Remember to save changes.".to_string();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
ui.label(egui::RichText::new("Format: one rule per line, \"source→replacement\" or \"source=replacement\"").small().color(egui::Color32::GRAY));
|
||||||
|
|
||||||
let model_path = Path::new(&self.config.model_path);
|
let model_path = Path::new(&self.config.model_path);
|
||||||
let model_valid = !self.config.model_path.is_empty() && model::is_model_valid(model_path);
|
let model_valid = !self.config.model_path.is_empty() && model::is_model_valid(model_path);
|
||||||
let downloading = self.download_progress.as_ref().and_then(|p| p.read().ok()).and_then(|g| g.clone());
|
let downloading = self.download_progress.as_ref().and_then(|p| p.read().ok()).and_then(|g| g.clone());
|
||||||
|
|||||||
59
HotKeet/src/vocabulary_extension.rs
Normal file
59
HotKeet/src/vocabulary_extension.rs
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
//! Vokabular-Erweiterung: Ersetzungsregeln für Transkription.
|
||||||
|
//! Siehe https://deepwiki.com/altunenes/parakeet-rs/7.3-vocabulary-and-tokenization
|
||||||
|
//!
|
||||||
|
//! Format: Eine Zeile pro Regel, "Quelltext→Ersetzung" oder "Quelltext=Ersetzung".
|
||||||
|
//! Leere Zeilen und Zeilen mit # werden ignoriert.
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
/// Lädt Ersetzungsregeln aus einer Datei.
|
||||||
|
/// Gibt Vec<(from, to)> zurück, sortiert nach from-Länge (längste zuerst).
|
||||||
|
pub fn load_rules(path: &Path) -> Result<Vec<(String, String)>, String> {
|
||||||
|
let content = std::fs::read_to_string(path).map_err(|e| format!("Read vocabulary: {}", e))?;
|
||||||
|
let mut rules = Vec::new();
|
||||||
|
for line in content.lines() {
|
||||||
|
let line = line.trim();
|
||||||
|
if line.is_empty() || line.starts_with('#') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let (from, to) = if let Some(pos) = line.find("→") {
|
||||||
|
let (a, b) = line.split_at(pos);
|
||||||
|
(a.trim(), b[3..].trim()) // "→" is 3 bytes in UTF-8
|
||||||
|
} else if let Some(pos) = line.find('=') {
|
||||||
|
let (a, b) = line.split_at(pos);
|
||||||
|
(a.trim(), b[1..].trim())
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
if !from.is_empty() {
|
||||||
|
rules.push((from.to_string(), to.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Längste Muster zuerst, damit "MediTech" vor "Medi" ersetzt wird
|
||||||
|
rules.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
|
||||||
|
Ok(rules)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wendet Ersetzungsregeln auf den Text an.
|
||||||
|
pub fn apply(text: &str, rules: &[(String, String)]) -> String {
|
||||||
|
let mut result = text.to_string();
|
||||||
|
for (from, to) in rules {
|
||||||
|
result = result.replace(from, to);
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lädt Regeln und wendet sie an. Bei leerem Pfad oder Fehler: Original zurück.
|
||||||
|
pub fn apply_from_file(text: &str, path: &str) -> String {
|
||||||
|
if path.is_empty() {
|
||||||
|
return text.to_string();
|
||||||
|
}
|
||||||
|
let p = Path::new(path);
|
||||||
|
if !p.is_file() {
|
||||||
|
return text.to_string();
|
||||||
|
}
|
||||||
|
match load_rules(p) {
|
||||||
|
Ok(rules) if !rules.is_empty() => apply(text, &rules),
|
||||||
|
_ => text.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user