Set STT default to oneshot; add pending_finalize and env model
This commit is contained in:
parent
acee131f72
commit
f67c18fc9e
7 changed files with 157 additions and 32 deletions
|
|
@ -39,4 +39,4 @@ set_target_properties(stt PROPERTIES PREFIX "")
|
||||||
# Install targets - use standard paths, Nix postInstall will handle fcitx5 paths
|
# Install targets - use standard paths, Nix postInstall will handle fcitx5 paths
|
||||||
install(TARGETS stt DESTINATION lib/fcitx5)
|
install(TARGETS stt DESTINATION lib/fcitx5)
|
||||||
install(FILES data/stt.conf DESTINATION share/fcitx5/addon)
|
install(FILES data/stt.conf DESTINATION share/fcitx5/addon)
|
||||||
install(FILES data/stt-im.conf DESTINATION share/fcitx5/inputmethod)
|
install(FILES data/stt-inputmethod.conf DESTINATION share/fcitx5/inputmethod RENAME stt.conf)
|
||||||
|
|
|
||||||
|
|
@ -159,7 +159,7 @@ private:
|
||||||
std::string readBuffer_;
|
std::string readBuffer_;
|
||||||
|
|
||||||
// Mode
|
// Mode
|
||||||
SttMode mode_ = SttMode::Manual;
|
SttMode mode_ = SttMode::Oneshot;
|
||||||
|
|
||||||
// Current state
|
// Current state
|
||||||
bool ready_ = false;
|
bool ready_ = false;
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,27 @@ impl ModelSize {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse(input: &str) -> Option<Self> {
|
||||||
|
let normalized = input
|
||||||
|
.trim()
|
||||||
|
.to_lowercase()
|
||||||
|
.replace('.', "-")
|
||||||
|
.replace('_', "-");
|
||||||
|
|
||||||
|
match normalized.as_str() {
|
||||||
|
"tiny" => Some(ModelSize::Tiny),
|
||||||
|
"tiny-en" => Some(ModelSize::TinyEn),
|
||||||
|
"base" => Some(ModelSize::Base),
|
||||||
|
"base-en" => Some(ModelSize::BaseEn),
|
||||||
|
"small" => Some(ModelSize::Small),
|
||||||
|
"small-en" => Some(ModelSize::SmallEn),
|
||||||
|
"medium" => Some(ModelSize::Medium),
|
||||||
|
"medium-en" => Some(ModelSize::MediumEn),
|
||||||
|
"large-v3" => Some(ModelSize::LargeV3),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn hf_repo(&self) -> &'static str {
|
fn hf_repo(&self) -> &'static str {
|
||||||
"ggerganov/whisper.cpp"
|
"ggerganov/whisper.cpp"
|
||||||
}
|
}
|
||||||
|
|
@ -210,6 +231,8 @@ struct AudioState {
|
||||||
silence_samples: usize,
|
silence_samples: usize,
|
||||||
/// Last partial emission time
|
/// Last partial emission time
|
||||||
last_partial: std::time::Instant,
|
last_partial: std::time::Instant,
|
||||||
|
/// Manual mode: stop requested, finalize next tick
|
||||||
|
pending_finalize: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AudioState {
|
impl AudioState {
|
||||||
|
|
@ -220,6 +243,7 @@ impl AudioState {
|
||||||
speech_detected: false,
|
speech_detected: false,
|
||||||
silence_samples: 0,
|
silence_samples: 0,
|
||||||
last_partial: std::time::Instant::now(),
|
last_partial: std::time::Instant::now(),
|
||||||
|
pending_finalize: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -227,6 +251,7 @@ impl AudioState {
|
||||||
self.buffer.clear();
|
self.buffer.clear();
|
||||||
self.speech_detected = false;
|
self.speech_detected = false;
|
||||||
self.silence_samples = 0;
|
self.silence_samples = 0;
|
||||||
|
self.pending_finalize = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -241,7 +266,22 @@ async fn main() -> Result<()> {
|
||||||
.with_writer(std::io::stderr)
|
.with_writer(std::io::stderr)
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
let args = Args::parse();
|
let mut args = Args::parse();
|
||||||
|
|
||||||
|
// Allow Nix/session configuration via env vars.
|
||||||
|
// Precedence: explicit CLI args > env vars > defaults.
|
||||||
|
//
|
||||||
|
// `ringofstorms.sttIme.model` uses dot notation (e.g. "tiny.en"),
|
||||||
|
// while clap's value enum expects kebab-case (e.g. "tiny-en").
|
||||||
|
let cli_has_model_flag = std::env::args().any(|a| a == "--model" || a == "-M");
|
||||||
|
if !cli_has_model_flag && args.model_path.is_none() {
|
||||||
|
if let Ok(model) = std::env::var("STT_STREAM_MODEL") {
|
||||||
|
if let Some(parsed) = ModelSize::parse(&model) {
|
||||||
|
args.model = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
info!("Starting stt-stream with mode: {:?}", args.mode);
|
info!("Starting stt-stream with mode: {:?}", args.mode);
|
||||||
|
|
||||||
// Load Whisper model
|
// Load Whisper model
|
||||||
|
|
@ -380,6 +420,7 @@ async fn main() -> Result<()> {
|
||||||
SttCommand::Stop => {
|
SttCommand::Stop => {
|
||||||
if let Ok(mut state) = audio_state_stdin.lock() {
|
if let Ok(mut state) = audio_state_stdin.lock() {
|
||||||
state.is_recording = false;
|
state.is_recording = false;
|
||||||
|
state.pending_finalize = true;
|
||||||
emit_event(&SttEvent::RecordingStopped);
|
emit_event(&SttEvent::RecordingStopped);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -434,7 +475,10 @@ async fn main() -> Result<()> {
|
||||||
// Mode-specific behavior
|
// Mode-specific behavior
|
||||||
match current_mode {
|
match current_mode {
|
||||||
Mode::Manual => {
|
Mode::Manual => {
|
||||||
if !state.is_recording {
|
// In manual mode we normally ignore audio unless explicitly recording.
|
||||||
|
// Exception: after receiving a "stop" command, we need one more tick
|
||||||
|
// to finalize and emit the transcript.
|
||||||
|
if !state.is_recording && !state.pending_finalize {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -489,7 +533,7 @@ async fn main() -> Result<()> {
|
||||||
|
|
||||||
// Check for end of utterance
|
// Check for end of utterance
|
||||||
let should_finalize = match current_mode {
|
let should_finalize = match current_mode {
|
||||||
Mode::Manual => !state.is_recording && state.speech_detected,
|
Mode::Manual => state.pending_finalize && state.speech_detected,
|
||||||
Mode::Oneshot | Mode::Continuous => {
|
Mode::Oneshot | Mode::Continuous => {
|
||||||
state.speech_detected && state.silence_samples > silence_samples_threshold
|
state.speech_detected && state.silence_samples > silence_samples_threshold
|
||||||
}
|
}
|
||||||
|
|
|
||||||
115
hosts/juni/flake.lock
generated
115
hosts/juni/flake.lock
generated
|
|
@ -38,19 +38,28 @@
|
||||||
},
|
},
|
||||||
"common": {
|
"common": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"dir": "flakes/common",
|
"path": "../../flakes/common",
|
||||||
"lastModified": 1768335461,
|
"type": "path"
|
||||||
"narHash": "sha256-hYJSJlGg1t1RBdlDWfpC5VT3bzl17DIk57XaDQI5O2I=",
|
|
||||||
"ref": "refs/heads/master",
|
|
||||||
"rev": "72724ae54a50c4465d9153a03b8e445164ad08a6",
|
|
||||||
"revCount": 1135,
|
|
||||||
"type": "git",
|
|
||||||
"url": "https://git.joshuabell.xyz/ringofstorms/dotfiles"
|
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"dir": "flakes/common",
|
"path": "../../flakes/common",
|
||||||
"type": "git",
|
"type": "path"
|
||||||
"url": "https://git.joshuabell.xyz/ringofstorms/dotfiles"
|
},
|
||||||
|
"parent": []
|
||||||
|
},
|
||||||
|
"crane": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1768319649,
|
||||||
|
"narHash": "sha256-VFkNyxHxkqGp8gf8kfFMW1j6XeBy609kv6TE9uF/0Js=",
|
||||||
|
"owner": "ipetkov",
|
||||||
|
"repo": "crane",
|
||||||
|
"rev": "4b6527687cfd20da3c2ef8287e01b74c2d6c705b",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "ipetkov",
|
||||||
|
"repo": "crane",
|
||||||
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"de_plasma": {
|
"de_plasma": {
|
||||||
|
|
@ -58,19 +67,31 @@
|
||||||
"plasma-manager": "plasma-manager"
|
"plasma-manager": "plasma-manager"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"dir": "flakes/de_plasma",
|
"path": "../../flakes/de_plasma",
|
||||||
"lastModified": 1768335461,
|
"type": "path"
|
||||||
"narHash": "sha256-hYJSJlGg1t1RBdlDWfpC5VT3bzl17DIk57XaDQI5O2I=",
|
|
||||||
"ref": "refs/heads/master",
|
|
||||||
"rev": "72724ae54a50c4465d9153a03b8e445164ad08a6",
|
|
||||||
"revCount": 1135,
|
|
||||||
"type": "git",
|
|
||||||
"url": "https://git.joshuabell.xyz/ringofstorms/dotfiles"
|
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"dir": "flakes/de_plasma",
|
"path": "../../flakes/de_plasma",
|
||||||
"type": "git",
|
"type": "path"
|
||||||
"url": "https://git.joshuabell.xyz/ringofstorms/dotfiles"
|
},
|
||||||
|
"parent": []
|
||||||
|
},
|
||||||
|
"flake-utils": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": "systems"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1731533236,
|
||||||
|
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"flatpaks": {
|
"flatpaks": {
|
||||||
|
|
@ -318,6 +339,22 @@
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"nixpkgs_7": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1768127708,
|
||||||
|
"narHash": "sha256-1Sm77VfZh3mU0F5OqKABNLWxOuDeHIlcFjsXeeiPazs=",
|
||||||
|
"owner": "nixos",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "ffbc9f8cbaacfb331b6017d5a5abb21a492c9a38",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nixos",
|
||||||
|
"ref": "nixos-unstable",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
"nvim_plugin-Almo7aya/openingh.nvim": {
|
"nvim_plugin-Almo7aya/openingh.nvim": {
|
||||||
"flake": false,
|
"flake": false,
|
||||||
"locked": {
|
"locked": {
|
||||||
|
|
@ -1232,7 +1269,8 @@
|
||||||
"nixpkgs-unstable": "nixpkgs-unstable",
|
"nixpkgs-unstable": "nixpkgs-unstable",
|
||||||
"opencode": "opencode",
|
"opencode": "opencode",
|
||||||
"ros_neovim": "ros_neovim",
|
"ros_neovim": "ros_neovim",
|
||||||
"secrets-bao": "secrets-bao"
|
"secrets-bao": "secrets-bao",
|
||||||
|
"stt_ime": "stt_ime"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ros_neovim": {
|
"ros_neovim": {
|
||||||
|
|
@ -1339,6 +1377,37 @@
|
||||||
"type": "path"
|
"type": "path"
|
||||||
},
|
},
|
||||||
"parent": []
|
"parent": []
|
||||||
|
},
|
||||||
|
"stt_ime": {
|
||||||
|
"inputs": {
|
||||||
|
"crane": "crane",
|
||||||
|
"flake-utils": "flake-utils",
|
||||||
|
"nixpkgs": "nixpkgs_7"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"path": "../../flakes/stt_ime",
|
||||||
|
"type": "path"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"path": "../../flakes/stt_ime",
|
||||||
|
"type": "path"
|
||||||
|
},
|
||||||
|
"parent": []
|
||||||
|
},
|
||||||
|
"systems": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"root": "root",
|
"root": "root",
|
||||||
|
|
|
||||||
|
|
@ -9,16 +9,18 @@
|
||||||
impermanence.url = "github:nix-community/impermanence";
|
impermanence.url = "github:nix-community/impermanence";
|
||||||
|
|
||||||
# Use relative to get current version for testin
|
# Use relative to get current version for testin
|
||||||
# common.url = "path:../../flakes/common";
|
common.url = "path:../../flakes/common";
|
||||||
common.url = "git+https://git.joshuabell.xyz/ringofstorms/dotfiles?dir=flakes/common";
|
# common.url = "git+https://git.joshuabell.xyz/ringofstorms/dotfiles?dir=flakes/common";
|
||||||
# secrets-bao.url = "path:../../flakes/secrets-bao";
|
# secrets-bao.url = "path:../../flakes/secrets-bao";
|
||||||
secrets-bao.url = "path:../../flakes/secrets-bao";
|
secrets-bao.url = "path:../../flakes/secrets-bao";
|
||||||
# flatpaks.url = "path:../../flakes/flatpaks";
|
# flatpaks.url = "path:../../flakes/flatpaks";
|
||||||
flatpaks.url = "git+https://git.joshuabell.xyz/ringofstorms/dotfiles?dir=flakes/flatpaks";
|
flatpaks.url = "git+https://git.joshuabell.xyz/ringofstorms/dotfiles?dir=flakes/flatpaks";
|
||||||
# beszel.url = "path:../../flakes/beszel";
|
# beszel.url = "path:../../flakes/beszel";
|
||||||
beszel.url = "git+https://git.joshuabell.xyz/ringofstorms/dotfiles?dir=flakes/beszel";
|
beszel.url = "git+https://git.joshuabell.xyz/ringofstorms/dotfiles?dir=flakes/beszel";
|
||||||
# de_plasma.url = "path:../../flakes/de_plasma";
|
de_plasma.url = "path:../../flakes/de_plasma";
|
||||||
de_plasma.url = "git+https://git.joshuabell.xyz/ringofstorms/dotfiles?dir=flakes/de_plasma";
|
# de_plasma.url = "git+https://git.joshuabell.xyz/ringofstorms/dotfiles?dir=flakes/de_plasma";
|
||||||
|
stt_ime.url = "path:../../flakes/stt_ime";
|
||||||
|
# stt_ime.url = "git+https://git.joshuabell.xyz/ringofstorms/dotfiles?dir=flakes/stt_ime";
|
||||||
|
|
||||||
opencode.url = "github:sst/opencode";
|
opencode.url = "github:sst/opencode";
|
||||||
ros_neovim.url = "git+https://git.joshuabell.xyz/ringofstorms/nvim";
|
ros_neovim.url = "git+https://git.joshuabell.xyz/ringofstorms/nvim";
|
||||||
|
|
@ -69,6 +71,13 @@
|
||||||
};
|
};
|
||||||
})
|
})
|
||||||
inputs.common.nixosModules.jetbrains_font
|
inputs.common.nixosModules.jetbrains_font
|
||||||
|
inputs.stt_ime.nixosModules.default
|
||||||
|
({
|
||||||
|
ringofstorms.sttIme = {
|
||||||
|
enable = true;
|
||||||
|
model = "tiny.en";
|
||||||
|
};
|
||||||
|
})
|
||||||
|
|
||||||
inputs.ros_neovim.nixosModules.default
|
inputs.ros_neovim.nixosModules.default
|
||||||
({
|
({
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,9 @@
|
||||||
|
|
||||||
".local/share/zoxide"
|
".local/share/zoxide"
|
||||||
|
|
||||||
|
# Hugging Face cache (e.g. whisper.cpp models via hf-hub)
|
||||||
|
".cache/huggingface"
|
||||||
|
|
||||||
".config/opencode"
|
".config/opencode"
|
||||||
|
|
||||||
# KDE
|
# KDE
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue