Add drag-and-drop audio transcription, fix model download, widen window
- Add audio module: WAV/MP3 conversion to 16kHz mono for Parakeet - Add drop zone for audio files (WAV, MP3) at bottom of settings UI - Enable drag-and-drop in viewport, process dropped files - Fix model download: use altunenes/parakeet-rs/tdt with correct filenames - Move drop zone above status line - Increase window width by ~1/3 (640px default, 500px min) Made-with: Cursor
This commit is contained in:
162
HotKeet/Cargo.lock
generated
162
HotKeet/Cargo.lock
generated
@@ -1013,6 +1013,15 @@ dependencies = [
|
|||||||
"bytemuck",
|
"bytemuck",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encoding_rs"
|
||||||
|
version = "0.8.35"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "endi"
|
name = "endi"
|
||||||
version = "1.1.1"
|
version = "1.1.1"
|
||||||
@@ -1153,6 +1162,12 @@ dependencies = [
|
|||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "extended"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "af9673d8203fcb076b19dfd17e38b3d4ae9f44959416ea532ce72415a6020365"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fastrand"
|
name = "fastrand"
|
||||||
version = "2.3.0"
|
version = "2.3.0"
|
||||||
@@ -1617,6 +1632,7 @@ dependencies = [
|
|||||||
"rfd",
|
"rfd",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"symphonia",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tray-item",
|
"tray-item",
|
||||||
"winapi",
|
"winapi",
|
||||||
@@ -3508,6 +3524,152 @@ version = "2.6.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5773a4c030a19d9bfaa090f49746ff35c75dfddfa700df7a5939d5e076a57039"
|
||||||
|
dependencies = [
|
||||||
|
"lazy_static",
|
||||||
|
"symphonia-bundle-flac",
|
||||||
|
"symphonia-bundle-mp3",
|
||||||
|
"symphonia-codec-adpcm",
|
||||||
|
"symphonia-codec-pcm",
|
||||||
|
"symphonia-codec-vorbis",
|
||||||
|
"symphonia-core",
|
||||||
|
"symphonia-format-mkv",
|
||||||
|
"symphonia-format-ogg",
|
||||||
|
"symphonia-format-riff",
|
||||||
|
"symphonia-metadata",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-bundle-flac"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c91565e180aea25d9b80a910c546802526ffd0072d0b8974e3ebe59b686c9976"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"symphonia-core",
|
||||||
|
"symphonia-metadata",
|
||||||
|
"symphonia-utils-xiph",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-bundle-mp3"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4872dd6bb56bf5eac799e3e957aa1981086c3e613b27e0ac23b176054f7c57ed"
|
||||||
|
dependencies = [
|
||||||
|
"lazy_static",
|
||||||
|
"log",
|
||||||
|
"symphonia-core",
|
||||||
|
"symphonia-metadata",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-codec-adpcm"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2dddc50e2bbea4cfe027441eece77c46b9f319748605ab8f3443350129ddd07f"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"symphonia-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-codec-pcm"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4e89d716c01541ad3ebe7c91ce4c8d38a7cf266a3f7b2f090b108fb0cb031d95"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"symphonia-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-codec-vorbis"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f025837c309cd69ffef572750b4a2257b59552c5399a5e49707cc5b1b85d1c73"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"symphonia-core",
|
||||||
|
"symphonia-utils-xiph",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-core"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ea00cc4f79b7f6bb7ff87eddc065a1066f3a43fe1875979056672c9ef948c2af"
|
||||||
|
dependencies = [
|
||||||
|
"arrayvec",
|
||||||
|
"bitflags 1.3.2",
|
||||||
|
"bytemuck",
|
||||||
|
"lazy_static",
|
||||||
|
"log",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-format-mkv"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "122d786d2c43a49beb6f397551b4a050d8229eaa54c7ddf9ee4b98899b8742d0"
|
||||||
|
dependencies = [
|
||||||
|
"lazy_static",
|
||||||
|
"log",
|
||||||
|
"symphonia-core",
|
||||||
|
"symphonia-metadata",
|
||||||
|
"symphonia-utils-xiph",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-format-ogg"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2b4955c67c1ed3aa8ae8428d04ca8397fbef6a19b2b051e73b5da8b1435639cb"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"symphonia-core",
|
||||||
|
"symphonia-metadata",
|
||||||
|
"symphonia-utils-xiph",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-format-riff"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c2d7c3df0e7d94efb68401d81906eae73c02b40d5ec1a141962c592d0f11a96f"
|
||||||
|
dependencies = [
|
||||||
|
"extended",
|
||||||
|
"log",
|
||||||
|
"symphonia-core",
|
||||||
|
"symphonia-metadata",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-metadata"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "36306ff42b9ffe6e5afc99d49e121e0bd62fe79b9db7b9681d48e29fa19e6b16"
|
||||||
|
dependencies = [
|
||||||
|
"encoding_rs",
|
||||||
|
"lazy_static",
|
||||||
|
"log",
|
||||||
|
"symphonia-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "symphonia-utils-xiph"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ee27c85ab799a338446b68eec77abf42e1a6f1bb490656e121c6e27bfbab9f16"
|
||||||
|
dependencies = [
|
||||||
|
"symphonia-core",
|
||||||
|
"symphonia-metadata",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.109"
|
version = "1.0.109"
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ chrono = "0.4"
|
|||||||
raw-window-handle = "0.6"
|
raw-window-handle = "0.6"
|
||||||
rfd = "0.14"
|
rfd = "0.14"
|
||||||
reqwest = { version = "0.12", default-features = false, features = ["blocking", "rustls-tls"] }
|
reqwest = { version = "0.12", default-features = false, features = ["blocking", "rustls-tls"] }
|
||||||
|
symphonia = { version = "0.5", features = ["mp3"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
winresource = "0.1"
|
winresource = "0.1"
|
||||||
|
|||||||
191
HotKeet/src/audio.rs
Normal file
191
HotKeet/src/audio.rs
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
//! Audio-Dateien (WAV, MP3) für Transkription vorbereiten.
|
||||||
|
//! Parakeet erwartet: 16 kHz, Mono, 16-bit PCM.
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
use symphonia::core::audio::Signal;
|
||||||
|
|
||||||
|
const TARGET_SAMPLE_RATE: u32 = 16000;
|
||||||
|
const TARGET_CHANNELS: u16 = 1;
|
||||||
|
|
||||||
|
/// Bereitet eine Audio-Datei für die Transkription vor.
|
||||||
|
/// Gibt den Pfad zu einer temporären WAV-Datei zurück (16 kHz, Mono, 16-bit).
|
||||||
|
/// Der Aufrufer sollte die Temp-Datei nach der Transkription löschen.
|
||||||
|
pub fn prepare_for_transcription(path: &Path) -> Result<std::path::PathBuf, String> {
|
||||||
|
let ext = path
|
||||||
|
.extension()
|
||||||
|
.and_then(|e| e.to_str())
|
||||||
|
.map(|s| s.to_lowercase())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let samples = match ext.as_str() {
|
||||||
|
"wav" => decode_wav(path)?,
|
||||||
|
"mp3" => decode_mp3(path)?,
|
||||||
|
_ => return Err(format!("Unsupported format: .{} (use .wav or .mp3)", ext)),
|
||||||
|
};
|
||||||
|
|
||||||
|
let temp_path = std::env::temp_dir().join(format!(
|
||||||
|
"hotkeet-transcribe-{}.wav",
|
||||||
|
std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.map(|d| d.as_nanos())
|
||||||
|
.unwrap_or(0)
|
||||||
|
));
|
||||||
|
|
||||||
|
write_wav(&temp_path, &samples)?;
|
||||||
|
Ok(temp_path)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_wav(path: &Path) -> Result<Vec<i16>, String> {
|
||||||
|
let reader = hound::WavReader::open(path).map_err(|e| format!("WAV read: {}", e))?;
|
||||||
|
let spec = reader.spec();
|
||||||
|
let sample_rate = spec.sample_rate;
|
||||||
|
let channels = spec.channels;
|
||||||
|
|
||||||
|
let samples: Vec<i16> = match spec.sample_format {
|
||||||
|
hound::SampleFormat::Int => reader
|
||||||
|
.into_samples::<i16>()
|
||||||
|
.filter_map(|s| s.ok())
|
||||||
|
.collect(),
|
||||||
|
hound::SampleFormat::Float => reader
|
||||||
|
.into_samples::<f32>()
|
||||||
|
.filter_map(|s| s.ok())
|
||||||
|
.map(|f| (f.clamp(-1.0, 1.0) * 32767.0) as i16)
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mono = if channels > 1 {
|
||||||
|
samples
|
||||||
|
.chunks(channels as usize)
|
||||||
|
.map(|c| {
|
||||||
|
let sum: i32 = c.iter().map(|&s| s as i32).sum();
|
||||||
|
(sum / channels as i32).clamp(-32768, 32767) as i16
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
samples
|
||||||
|
};
|
||||||
|
|
||||||
|
let resampled = resample_i16(&mono, sample_rate, TARGET_SAMPLE_RATE);
|
||||||
|
Ok(resampled)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_mp3(path: &Path) -> Result<Vec<i16>, String> {
|
||||||
|
let file = std::fs::File::open(path).map_err(|e| format!("Open MP3: {}", e))?;
|
||||||
|
let source = symphonia::core::io::MediaSourceStream::new(
|
||||||
|
Box::new(file),
|
||||||
|
symphonia::core::io::MediaSourceStreamOptions::default(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let probe = symphonia::default::get_probe();
|
||||||
|
let result = probe
|
||||||
|
.format(
|
||||||
|
&Default::default(),
|
||||||
|
source,
|
||||||
|
&Default::default(),
|
||||||
|
&Default::default(),
|
||||||
|
)
|
||||||
|
.map_err(|e| format!("MP3 probe: {}", e))?;
|
||||||
|
|
||||||
|
let mut format = result.format;
|
||||||
|
|
||||||
|
let track = format
|
||||||
|
.tracks()
|
||||||
|
.iter()
|
||||||
|
.find(|t| t.codec_params.codec != symphonia::core::codecs::CODEC_TYPE_NULL)
|
||||||
|
.ok_or("No audio track in MP3")?
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
let mut decoder = symphonia::default::get_codecs()
|
||||||
|
.make(&track.codec_params, &Default::default())
|
||||||
|
.map_err(|e| format!("MP3 decoder: {}", e))?;
|
||||||
|
|
||||||
|
let sample_rate = track
|
||||||
|
.codec_params
|
||||||
|
.sample_rate
|
||||||
|
.ok_or("MP3: no sample rate")? as u32;
|
||||||
|
let channels = track
|
||||||
|
.codec_params
|
||||||
|
.channels
|
||||||
|
.ok_or("MP3: no channels")?
|
||||||
|
.count() as usize;
|
||||||
|
|
||||||
|
let mut all_samples: Vec<i16> = Vec::new();
|
||||||
|
|
||||||
|
while let Ok(packet) = format.next_packet() {
|
||||||
|
if let Ok(decoded) = decoder.decode(&packet) {
|
||||||
|
match decoded {
|
||||||
|
symphonia::core::audio::AudioBufferRef::F32(buf) => {
|
||||||
|
let n_frames = buf.frames();
|
||||||
|
for i in 0..n_frames {
|
||||||
|
let mut sum = 0.0f32;
|
||||||
|
for c in 0..channels {
|
||||||
|
sum += buf.chan(c)[i];
|
||||||
|
}
|
||||||
|
sum /= channels as f32;
|
||||||
|
all_samples.push((sum.clamp(-1.0, 1.0) * 32767.0) as i16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
symphonia::core::audio::AudioBufferRef::S16(buf) => {
|
||||||
|
let n_frames = buf.frames();
|
||||||
|
for i in 0..n_frames {
|
||||||
|
let mut sum = 0i32;
|
||||||
|
for c in 0..channels {
|
||||||
|
sum += buf.chan(c)[i] as i32;
|
||||||
|
}
|
||||||
|
all_samples.push((sum / channels as i32).clamp(-32768, 32767) as i16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
symphonia::core::audio::AudioBufferRef::U16(buf) => {
|
||||||
|
let n_frames = buf.frames();
|
||||||
|
for i in 0..n_frames {
|
||||||
|
let mut sum = 0i32;
|
||||||
|
for c in 0..channels {
|
||||||
|
sum += (buf.chan(c)[i] as i32) - 32768;
|
||||||
|
}
|
||||||
|
all_samples.push((sum / channels as i32).clamp(-32768, 32767) as i16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => return Err("MP3: unsupported sample format".to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let resampled = resample_i16(&all_samples, sample_rate, TARGET_SAMPLE_RATE);
|
||||||
|
Ok(resampled)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resample_i16(samples: &[i16], from_rate: u32, to_rate: u32) -> Vec<i16> {
|
||||||
|
if from_rate == to_rate {
|
||||||
|
return samples.to_vec();
|
||||||
|
}
|
||||||
|
let ratio = from_rate as f64 / to_rate as f64;
|
||||||
|
let out_len = (samples.len() as f64 / ratio) as usize;
|
||||||
|
let mut out = Vec::with_capacity(out_len);
|
||||||
|
for i in 0..out_len {
|
||||||
|
let src_idx = i as f64 * ratio;
|
||||||
|
let idx0 = src_idx.floor() as usize;
|
||||||
|
let idx1 = (idx0 + 1).min(samples.len().saturating_sub(1));
|
||||||
|
let frac = src_idx - idx0 as f64;
|
||||||
|
let s0 = samples.get(idx0).copied().unwrap_or(0) as f64;
|
||||||
|
let s1 = samples.get(idx1).copied().unwrap_or(0) as f64;
|
||||||
|
let s = s0 * (1.0 - frac) + s1 * frac;
|
||||||
|
out.push(s.clamp(-32768.0, 32767.0) as i16);
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_wav(path: &Path, samples: &[i16]) -> Result<(), String> {
|
||||||
|
let spec = hound::WavSpec {
|
||||||
|
channels: TARGET_CHANNELS,
|
||||||
|
sample_rate: TARGET_SAMPLE_RATE,
|
||||||
|
bits_per_sample: 16,
|
||||||
|
sample_format: hound::SampleFormat::Int,
|
||||||
|
};
|
||||||
|
let mut writer = hound::WavWriter::create(path, spec)
|
||||||
|
.map_err(|e| format!("Create WAV: {}", e))?;
|
||||||
|
for &s in samples {
|
||||||
|
writer.write_sample(s).map_err(|e| format!("Write WAV: {}", e))?;
|
||||||
|
}
|
||||||
|
writer.finalize().map_err(|e| format!("Finalize WAV: {}", e))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
@@ -57,6 +57,7 @@ fn show_main_window() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
mod audio;
|
||||||
mod config;
|
mod config;
|
||||||
mod hotkey;
|
mod hotkey;
|
||||||
mod paste;
|
mod paste;
|
||||||
@@ -149,6 +150,29 @@ impl eframe::App for AppState {
|
|||||||
ctx.send_viewport_cmd(egui::ViewportCommand::Visible(false));
|
ctx.send_viewport_cmd(egui::ViewportCommand::Visible(false));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Dropped audio files: transcribe and paste
|
||||||
|
let dropped: Vec<_> = ctx.input(|i| i.raw.dropped_files.clone());
|
||||||
|
for file in dropped {
|
||||||
|
if let Some(ref path) = file.path {
|
||||||
|
let ext = path
|
||||||
|
.extension()
|
||||||
|
.and_then(|e| e.to_str())
|
||||||
|
.map(|s| s.to_lowercase())
|
||||||
|
.unwrap_or_default();
|
||||||
|
if ext == "wav" || ext == "mp3" {
|
||||||
|
let path = path.clone();
|
||||||
|
let cfg = self.config.read().unwrap().clone();
|
||||||
|
let status = self.status.clone();
|
||||||
|
let status_detail = self.status_detail.clone();
|
||||||
|
let paste_tx = self.paste_tx.clone();
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
run_file_transcription(path, cfg, status, status_detail, paste_tx);
|
||||||
|
});
|
||||||
|
break; // Nur eine Datei pro Drop verarbeiten
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Download request: spawn model download
|
// Download request: spawn model download
|
||||||
if let Ok(path) = self.download_request_rx.try_recv() {
|
if let Ok(path) = self.download_request_rx.try_recv() {
|
||||||
model::download_model_async(path, self.download_progress_tx.clone());
|
model::download_model_async(path, self.download_progress_tx.clone());
|
||||||
@@ -419,8 +443,9 @@ fn main() -> eframe::Result<()> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let mut viewport = egui::ViewportBuilder::default()
|
let mut viewport = egui::ViewportBuilder::default()
|
||||||
.with_inner_size([400.0, 400.0])
|
.with_inner_size([660.0, 520.0])
|
||||||
.with_min_inner_size([300.0, 300.0]);
|
.with_min_inner_size([500.0, 420.0])
|
||||||
|
.with_drag_and_drop(true);
|
||||||
if start_minimized {
|
if start_minimized {
|
||||||
viewport = viewport.with_visible(false);
|
viewport = viewport.with_visible(false);
|
||||||
}
|
}
|
||||||
@@ -454,6 +479,64 @@ fn main() -> eframe::Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn run_file_transcription(
|
||||||
|
path: std::path::PathBuf,
|
||||||
|
config: DictateConfig,
|
||||||
|
status: Arc<std::sync::RwLock<AppStatus>>,
|
||||||
|
status_detail: Arc<std::sync::RwLock<String>>,
|
||||||
|
paste_tx: Sender<PasteRequest>,
|
||||||
|
) {
|
||||||
|
set_status(&status, &status_detail, AppStatus::Transkribieren, "Converting…");
|
||||||
|
|
||||||
|
let wav_path = match audio::prepare_for_transcription(&path) {
|
||||||
|
Ok(p) => p,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Audio prepare: {}", e);
|
||||||
|
set_status(&status, &status_detail, AppStatus::Fehler, &format!("Audio: {}", e));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let _defer = Defer(Some(|| {
|
||||||
|
let _ = std::fs::remove_file(&wav_path);
|
||||||
|
}));
|
||||||
|
|
||||||
|
set_status(&status, &status_detail, AppStatus::Transkribieren, "Transcribing…");
|
||||||
|
|
||||||
|
let text = match transcribe(
|
||||||
|
&config.parakeet_cli_path,
|
||||||
|
&config.model_path,
|
||||||
|
&wav_path,
|
||||||
|
) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Transcription: {}", e);
|
||||||
|
set_status(&status, &status_detail, AppStatus::Fehler, &format!("Transcription: {}", e));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let req = PasteRequest {
|
||||||
|
text,
|
||||||
|
method: config.paste_method_enum(),
|
||||||
|
target_hwnd: None,
|
||||||
|
debug_logging: config.debug_logging,
|
||||||
|
};
|
||||||
|
if paste_tx.send(req).is_err() {
|
||||||
|
eprintln!("Paste channel closed");
|
||||||
|
set_status(&status, &status_detail, AppStatus::Fehler, "Paste channel failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Defer<F: FnOnce()>(Option<F>);
|
||||||
|
impl<F: FnOnce()> Drop for Defer<F> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if let Some(f) = self.0.take() {
|
||||||
|
f();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn set_status(
|
fn set_status(
|
||||||
status: &Arc<std::sync::RwLock<AppStatus>>,
|
status: &Arc<std::sync::RwLock<AppStatus>>,
|
||||||
detail: &Arc<std::sync::RwLock<String>>,
|
detail: &Arc<std::sync::RwLock<String>>,
|
||||||
|
|||||||
@@ -4,14 +4,15 @@ use std::io::Read;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::sync::mpsc::Sender;
|
use std::sync::mpsc::Sender;
|
||||||
|
|
||||||
const HF_BASE: &str = "https://huggingface.co/nasedkinpv/parakeet-tdt-0.6b-v3-onnx-int8/resolve/main";
|
/// transcribe-rs compatible model (encoder-model.int8.onnx, decoder_joint-model.int8.onnx, nemo128.onnx)
|
||||||
|
const HF_BASE: &str = "https://huggingface.co/altunenes/parakeet-rs/resolve/main/tdt";
|
||||||
|
|
||||||
/// Required files for Parakeet INT8 model (transcribe-rs).
|
/// Required files for Parakeet INT8 model (transcribe-rs).
|
||||||
const REQUIRED_FILES: &[&str] = &[
|
const REQUIRED_FILES: &[&str] = &[
|
||||||
"vocab.txt",
|
"vocab.txt",
|
||||||
"encoder-int8.onnx",
|
"nemo128.onnx",
|
||||||
"encoder-int8.onnx.data",
|
"encoder-model.int8.onnx",
|
||||||
"decoder_joint-int8.onnx",
|
"decoder_joint-model.int8.onnx",
|
||||||
];
|
];
|
||||||
|
|
||||||
/// Progress message during download.
|
/// Progress message during download.
|
||||||
|
|||||||
@@ -318,6 +318,37 @@ impl SettingsApp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ui.add_space(8.0);
|
||||||
|
|
||||||
|
// Drag-and-Drop-Bereich für Audio-Dateien (über der Statuszeile)
|
||||||
|
let drop_rect = ui.available_rect_before_wrap();
|
||||||
|
let drop_height = 56.0;
|
||||||
|
let drop_rect = egui::Rect::from_min_size(
|
||||||
|
drop_rect.min,
|
||||||
|
egui::vec2(drop_rect.width(), drop_height),
|
||||||
|
);
|
||||||
|
let (rect, _) = ui.allocate_exact_size(drop_rect.size(), egui::Sense::hover());
|
||||||
|
let is_hovered = ctx.input(|i| i.pointer.hover_pos())
|
||||||
|
.map(|p| rect.contains(p))
|
||||||
|
.unwrap_or(false);
|
||||||
|
let stroke = if is_hovered {
|
||||||
|
egui::Stroke::new(2.0, egui::Color32::from_rgb(100, 150, 255))
|
||||||
|
} else {
|
||||||
|
egui::Stroke::new(1.0, egui::Color32::from_rgb(120, 120, 120))
|
||||||
|
};
|
||||||
|
ui.painter().rect_stroke(rect, 4.0, stroke);
|
||||||
|
ui.allocate_new_ui(egui::UiBuilder::new().max_rect(rect), |ui| {
|
||||||
|
ui.vertical_centered(|ui| {
|
||||||
|
ui.add_space(8.0);
|
||||||
|
ui.label(
|
||||||
|
egui::RichText::new("Drop WAV or MP3 here for transcription")
|
||||||
|
.color(egui::Color32::from_rgb(140, 140, 140)),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
ui.add_space(4.0);
|
||||||
|
|
||||||
if !self.status.is_empty() {
|
if !self.status.is_empty() {
|
||||||
ui.add_space(8.0);
|
ui.add_space(8.0);
|
||||||
ui.label(&self.status);
|
ui.label(&self.status);
|
||||||
|
|||||||
Reference in New Issue
Block a user