stt test
This commit is contained in:
parent
c408693861
commit
02f24bb524
15 changed files with 4184 additions and 15 deletions
42
flakes/stt_ime/fcitx5-stt/CMakeLists.txt
Normal file
42
flakes/stt_ime/fcitx5-stt/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
cmake_minimum_required(VERSION 3.16)
|
||||
project(fcitx5-stt VERSION 0.1.0 LANGUAGES CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
# Find Fcitx5
|
||||
find_package(Fcitx5Core REQUIRED)
|
||||
find_package(Fcitx5Utils REQUIRED)
|
||||
|
||||
# Path to stt-stream binary (set by Nix)
|
||||
if(NOT DEFINED STT_STREAM_PATH)
|
||||
set(STT_STREAM_PATH "stt-stream")
|
||||
endif()
|
||||
|
||||
# Configure header with path
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/config.h
|
||||
)
|
||||
|
||||
# Build the addon shared library
|
||||
add_library(stt MODULE
|
||||
src/stt.cpp
|
||||
)
|
||||
|
||||
target_include_directories(stt PRIVATE
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
)
|
||||
|
||||
target_link_libraries(stt PRIVATE
|
||||
Fcitx5::Core
|
||||
Fcitx5::Utils
|
||||
)
|
||||
|
||||
# Set output name without "lib" prefix
|
||||
set_target_properties(stt PROPERTIES PREFIX "")
|
||||
|
||||
# Install targets - use standard paths, Nix postInstall will handle fcitx5 paths
|
||||
install(TARGETS stt DESTINATION lib/fcitx5)
|
||||
install(FILES data/stt.conf DESTINATION share/fcitx5/addon)
|
||||
install(FILES data/stt-im.conf DESTINATION share/fcitx5/inputmethod)
|
||||
7
flakes/stt_ime/fcitx5-stt/data/stt-im.conf
Normal file
7
flakes/stt_ime/fcitx5-stt/data/stt-im.conf
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
[InputMethod]
|
||||
Name=Speech to Text
|
||||
Icon=audio-input-microphone
|
||||
Label=STT
|
||||
LangCode=
|
||||
Addon=stt
|
||||
Configurable=False
|
||||
7
flakes/stt_ime/fcitx5-stt/data/stt.conf
Normal file
7
flakes/stt_ime/fcitx5-stt/data/stt.conf
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
[Addon]
|
||||
Name=stt
|
||||
Category=InputMethod
|
||||
Library=stt
|
||||
Type=SharedLibrary
|
||||
OnDemand=True
|
||||
Configurable=False
|
||||
4
flakes/stt_ime/fcitx5-stt/src/config.h.in
Normal file
4
flakes/stt_ime/fcitx5-stt/src/config.h.in
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
#pragma once
|
||||
|
||||
// Path to stt-stream binary
|
||||
#define STT_STREAM_PATH "@STT_STREAM_PATH@"
|
||||
533
flakes/stt_ime/fcitx5-stt/src/stt.cpp
Normal file
533
flakes/stt_ime/fcitx5-stt/src/stt.cpp
Normal file
|
|
@ -0,0 +1,533 @@
|
|||
/*
|
||||
* fcitx5-stt: Speech-to-Text Input Method Engine for Fcitx5
|
||||
*
|
||||
* This is a thin shim that spawns the stt-stream Rust binary and
|
||||
* bridges its JSON events to Fcitx5's input method API.
|
||||
*
|
||||
* Modes:
|
||||
* - Oneshot: Record until silence, commit, reset
|
||||
* - Continuous: Always listen, commit on silence
|
||||
* - Manual: Start/stop via hotkey
|
||||
*
|
||||
* UX:
|
||||
* - Partial text shown as preedit (underlined)
|
||||
* - Final text committed on stop/silence
|
||||
* - Escape cancels without committing
|
||||
* - Enter accepts current preedit
|
||||
*/
|
||||
|
||||
#include <fcitx/addonfactory.h>
|
||||
#include <fcitx/addonmanager.h>
|
||||
#include <fcitx/inputcontext.h>
|
||||
#include <fcitx/inputcontextmanager.h>
|
||||
#include <fcitx/inputmethodengine.h>
|
||||
#include <fcitx/inputpanel.h>
|
||||
#include <fcitx/instance.h>
|
||||
#include <fcitx-utils/event.h>
|
||||
#include <fcitx-utils/i18n.h>
|
||||
#include <fcitx-utils/log.h>
|
||||
#include <fcitx-utils/utf8.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <sys/wait.h>
|
||||
#include <fcntl.h>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
namespace {
|
||||
|
||||
FCITX_DEFINE_LOG_CATEGORY(stt_log, "stt");
|
||||
#define STT_DEBUG() FCITX_LOGC(stt_log, Debug)
|
||||
#define STT_INFO() FCITX_LOGC(stt_log, Info)
|
||||
#define STT_WARN() FCITX_LOGC(stt_log, Warn)
|
||||
#define STT_ERROR() FCITX_LOGC(stt_log, Error)
|
||||
|
||||
// Operating modes
|
||||
enum class SttMode {
|
||||
Oneshot,
|
||||
Continuous,
|
||||
Manual
|
||||
};
|
||||
|
||||
// Simple JSON parsing (we only need a few fields)
|
||||
struct JsonEvent {
|
||||
std::string type;
|
||||
std::string text;
|
||||
std::string message;
|
||||
|
||||
static JsonEvent parse(const std::string& line) {
|
||||
JsonEvent ev;
|
||||
// Very basic JSON parsing - find "type" and "text" fields
|
||||
auto findValue = [&line](const std::string& key) -> std::string {
|
||||
std::string search = "\"" + key + "\":\"";
|
||||
auto pos = line.find(search);
|
||||
if (pos == std::string::npos) return "";
|
||||
pos += search.length();
|
||||
auto end = line.find("\"", pos);
|
||||
if (end == std::string::npos) return "";
|
||||
return line.substr(pos, end - pos);
|
||||
};
|
||||
|
||||
ev.type = findValue("type");
|
||||
ev.text = findValue("text");
|
||||
ev.message = findValue("message");
|
||||
return ev;
|
||||
}
|
||||
};
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
class SttEngine;
|
||||
|
||||
class SttState : public fcitx::InputContextProperty {
|
||||
public:
|
||||
SttState(SttEngine* engine, fcitx::InputContext* ic)
|
||||
: engine_(engine), ic_(ic) {}
|
||||
|
||||
void setPreedit(const std::string& text);
|
||||
void commit(const std::string& text);
|
||||
void clear();
|
||||
|
||||
bool isRecording() const { return recording_; }
|
||||
void setRecording(bool r) { recording_ = r; }
|
||||
|
||||
const std::string& preeditText() const { return preedit_; }
|
||||
|
||||
private:
|
||||
SttEngine* engine_;
|
||||
fcitx::InputContext* ic_;
|
||||
std::string preedit_;
|
||||
bool recording_ = false;
|
||||
};
|
||||
|
||||
class SttEngine : public fcitx::InputMethodEngineV2 {
|
||||
public:
|
||||
SttEngine(fcitx::Instance* instance);
|
||||
~SttEngine() override;
|
||||
|
||||
// InputMethodEngine interface
|
||||
void activate(const fcitx::InputMethodEntry& entry,
|
||||
fcitx::InputContextEvent& event) override;
|
||||
void deactivate(const fcitx::InputMethodEntry& entry,
|
||||
fcitx::InputContextEvent& event) override;
|
||||
void keyEvent(const fcitx::InputMethodEntry& entry,
|
||||
fcitx::KeyEvent& keyEvent) override;
|
||||
void reset(const fcitx::InputMethodEntry& entry,
|
||||
fcitx::InputContextEvent& event) override;
|
||||
|
||||
// List input methods this engine provides
|
||||
std::vector<fcitx::InputMethodEntry> listInputMethods() override {
|
||||
std::vector<fcitx::InputMethodEntry> result;
|
||||
result.emplace_back(
|
||||
"stt", // unique name
|
||||
_("Speech to Text"), // display name
|
||||
"*", // language (any)
|
||||
"stt" // addon name
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
fcitx::Instance* instance() { return instance_; }
|
||||
|
||||
// Process management
|
||||
void startProcess();
|
||||
void stopProcess();
|
||||
void sendCommand(const std::string& cmd);
|
||||
|
||||
// Mode
|
||||
SttMode mode() const { return mode_; }
|
||||
void setMode(SttMode m);
|
||||
void cycleMode();
|
||||
|
||||
private:
|
||||
void onProcessOutput();
|
||||
void handleEvent(const JsonEvent& ev);
|
||||
|
||||
fcitx::Instance* instance_;
|
||||
fcitx::FactoryFor<SttState> factory_;
|
||||
|
||||
// Process state
|
||||
pid_t childPid_ = -1;
|
||||
int stdinFd_ = -1;
|
||||
int stdoutFd_ = -1;
|
||||
std::unique_ptr<fcitx::EventSourceIO> ioEvent_;
|
||||
std::string readBuffer_;
|
||||
|
||||
// Mode
|
||||
SttMode mode_ = SttMode::Manual;
|
||||
|
||||
// Current state
|
||||
bool ready_ = false;
|
||||
fcitx::InputContext* activeIc_ = nullptr;
|
||||
};
|
||||
|
||||
// SttState implementation
|
||||
void SttState::setPreedit(const std::string& text) {
|
||||
preedit_ = text;
|
||||
if (ic_->hasFocus()) {
|
||||
fcitx::Text preeditText;
|
||||
preeditText.append(text, fcitx::TextFormatFlag::Underline);
|
||||
preeditText.setCursor(text.length());
|
||||
ic_->inputPanel().setClientPreedit(preeditText);
|
||||
ic_->updatePreedit();
|
||||
}
|
||||
}
|
||||
|
||||
void SttState::commit(const std::string& text) {
|
||||
if (!text.empty() && ic_->hasFocus()) {
|
||||
ic_->commitString(text);
|
||||
}
|
||||
clear();
|
||||
}
|
||||
|
||||
void SttState::clear() {
|
||||
preedit_.clear();
|
||||
if (ic_->hasFocus()) {
|
||||
ic_->inputPanel().reset();
|
||||
ic_->updatePreedit();
|
||||
ic_->updateUserInterface(fcitx::UserInterfaceComponent::InputPanel);
|
||||
}
|
||||
}
|
||||
|
||||
// SttEngine implementation
|
||||
SttEngine::SttEngine(fcitx::Instance* instance)
|
||||
: instance_(instance),
|
||||
factory_([this](fcitx::InputContext& ic) {
|
||||
return new SttState(this, &ic);
|
||||
}) {
|
||||
instance_->inputContextManager().registerProperty("sttState", &factory_);
|
||||
STT_INFO() << "SttEngine initialized";
|
||||
}
|
||||
|
||||
SttEngine::~SttEngine() {
|
||||
stopProcess();
|
||||
}
|
||||
|
||||
void SttEngine::activate(const fcitx::InputMethodEntry& entry,
|
||||
fcitx::InputContextEvent& event) {
|
||||
FCITX_UNUSED(entry);
|
||||
auto* ic = event.inputContext();
|
||||
activeIc_ = ic;
|
||||
|
||||
STT_INFO() << "STT activated";
|
||||
|
||||
// Start the backend process if not running
|
||||
if (childPid_ < 0) {
|
||||
startProcess();
|
||||
}
|
||||
|
||||
// In continuous mode, start recording automatically
|
||||
if (mode_ == SttMode::Continuous && ready_) {
|
||||
sendCommand("start");
|
||||
auto* state = ic->propertyFor(&factory_);
|
||||
state->setRecording(true);
|
||||
}
|
||||
}
|
||||
|
||||
void SttEngine::deactivate(const fcitx::InputMethodEntry& entry,
|
||||
fcitx::InputContextEvent& event) {
|
||||
FCITX_UNUSED(entry);
|
||||
auto* ic = event.inputContext();
|
||||
auto* state = ic->propertyFor(&factory_);
|
||||
|
||||
// Stop recording if active
|
||||
if (state->isRecording()) {
|
||||
sendCommand("cancel");
|
||||
state->setRecording(false);
|
||||
}
|
||||
state->clear();
|
||||
|
||||
activeIc_ = nullptr;
|
||||
STT_INFO() << "STT deactivated";
|
||||
}
|
||||
|
||||
void SttEngine::keyEvent(const fcitx::InputMethodEntry& entry,
|
||||
fcitx::KeyEvent& keyEvent) {
|
||||
FCITX_UNUSED(entry);
|
||||
auto* ic = keyEvent.inputContext();
|
||||
auto* state = ic->propertyFor(&factory_);
|
||||
|
||||
// Handle special keys
|
||||
if (keyEvent.isRelease()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto key = keyEvent.key();
|
||||
|
||||
// Escape: cancel recording/preedit
|
||||
if (key.check(FcitxKey_Escape)) {
|
||||
if (state->isRecording() || !state->preeditText().empty()) {
|
||||
sendCommand("cancel");
|
||||
state->setRecording(false);
|
||||
state->clear();
|
||||
keyEvent.filterAndAccept();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Enter/Return: accept preedit
|
||||
if (key.check(FcitxKey_Return) || key.check(FcitxKey_KP_Enter)) {
|
||||
if (!state->preeditText().empty()) {
|
||||
state->commit(state->preeditText());
|
||||
sendCommand("cancel");
|
||||
state->setRecording(false);
|
||||
keyEvent.filterAndAccept();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Space or Ctrl+R: toggle recording (in manual mode)
|
||||
if (mode_ == SttMode::Manual) {
|
||||
if (key.check(FcitxKey_space, fcitx::KeyState::Ctrl) ||
|
||||
key.check(FcitxKey_r, fcitx::KeyState::Ctrl)) {
|
||||
if (state->isRecording()) {
|
||||
sendCommand("stop");
|
||||
state->setRecording(false);
|
||||
} else {
|
||||
state->clear();
|
||||
sendCommand("start");
|
||||
state->setRecording(true);
|
||||
}
|
||||
keyEvent.filterAndAccept();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Ctrl+M: cycle mode
|
||||
if (key.check(FcitxKey_m, fcitx::KeyState::Ctrl)) {
|
||||
cycleMode();
|
||||
keyEvent.filterAndAccept();
|
||||
return;
|
||||
}
|
||||
|
||||
// In recording state, absorb most keys
|
||||
if (state->isRecording()) {
|
||||
keyEvent.filterAndAccept();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void SttEngine::reset(const fcitx::InputMethodEntry& entry,
|
||||
fcitx::InputContextEvent& event) {
|
||||
FCITX_UNUSED(entry);
|
||||
auto* ic = event.inputContext();
|
||||
auto* state = ic->propertyFor(&factory_);
|
||||
state->clear();
|
||||
}
|
||||
|
||||
void SttEngine::startProcess() {
|
||||
if (childPid_ > 0) {
|
||||
return; // Already running
|
||||
}
|
||||
|
||||
int stdinPipe[2];
|
||||
int stdoutPipe[2];
|
||||
|
||||
if (pipe(stdinPipe) < 0 || pipe(stdoutPipe) < 0) {
|
||||
STT_ERROR() << "Failed to create pipes";
|
||||
return;
|
||||
}
|
||||
|
||||
pid_t pid = fork();
|
||||
if (pid < 0) {
|
||||
STT_ERROR() << "Failed to fork";
|
||||
close(stdinPipe[0]);
|
||||
close(stdinPipe[1]);
|
||||
close(stdoutPipe[0]);
|
||||
close(stdoutPipe[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (pid == 0) {
|
||||
// Child process
|
||||
close(stdinPipe[1]);
|
||||
close(stdoutPipe[0]);
|
||||
|
||||
dup2(stdinPipe[0], STDIN_FILENO);
|
||||
dup2(stdoutPipe[1], STDOUT_FILENO);
|
||||
|
||||
close(stdinPipe[0]);
|
||||
close(stdoutPipe[1]);
|
||||
|
||||
// Determine mode string
|
||||
const char* modeStr = "manual";
|
||||
switch (mode_) {
|
||||
case SttMode::Oneshot: modeStr = "oneshot"; break;
|
||||
case SttMode::Continuous: modeStr = "continuous"; break;
|
||||
case SttMode::Manual: modeStr = "manual"; break;
|
||||
}
|
||||
|
||||
execlp(STT_STREAM_PATH, "stt-stream", "--mode", modeStr, nullptr);
|
||||
_exit(127);
|
||||
}
|
||||
|
||||
// Parent process
|
||||
close(stdinPipe[0]);
|
||||
close(stdoutPipe[1]);
|
||||
|
||||
childPid_ = pid;
|
||||
stdinFd_ = stdinPipe[1];
|
||||
stdoutFd_ = stdoutPipe[0];
|
||||
|
||||
// Set stdout non-blocking
|
||||
int flags = fcntl(stdoutFd_, F_GETFL, 0);
|
||||
fcntl(stdoutFd_, F_SETFL, flags | O_NONBLOCK);
|
||||
|
||||
// Watch stdout for events
|
||||
ioEvent_ = instance_->eventLoop().addIOEvent(
|
||||
stdoutFd_,
|
||||
fcitx::IOEventFlag::In,
|
||||
[this](fcitx::EventSourceIO*, int, fcitx::IOEventFlags) {
|
||||
onProcessOutput();
|
||||
return true;
|
||||
}
|
||||
);
|
||||
|
||||
STT_INFO() << "Started stt-stream process (pid=" << childPid_ << ")";
|
||||
}
|
||||
|
||||
void SttEngine::stopProcess() {
|
||||
if (childPid_ < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
ioEvent_.reset();
|
||||
|
||||
sendCommand("shutdown");
|
||||
close(stdinFd_);
|
||||
close(stdoutFd_);
|
||||
|
||||
// Wait for child to exit
|
||||
int status;
|
||||
waitpid(childPid_, &status, 0);
|
||||
|
||||
stdinFd_ = -1;
|
||||
stdoutFd_ = -1;
|
||||
childPid_ = -1;
|
||||
ready_ = false;
|
||||
|
||||
STT_INFO() << "Stopped stt-stream process";
|
||||
}
|
||||
|
||||
void SttEngine::sendCommand(const std::string& cmd) {
|
||||
if (stdinFd_ < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::string line = cmd + "\n";
|
||||
write(stdinFd_, line.c_str(), line.length());
|
||||
}
|
||||
|
||||
void SttEngine::onProcessOutput() {
|
||||
char buf[4096];
|
||||
ssize_t n;
|
||||
|
||||
while ((n = read(stdoutFd_, buf, sizeof(buf) - 1)) > 0) {
|
||||
buf[n] = '\0';
|
||||
readBuffer_ += buf;
|
||||
|
||||
// Process complete lines
|
||||
size_t pos;
|
||||
while ((pos = readBuffer_.find('\n')) != std::string::npos) {
|
||||
std::string line = readBuffer_.substr(0, pos);
|
||||
readBuffer_ = readBuffer_.substr(pos + 1);
|
||||
|
||||
if (!line.empty()) {
|
||||
auto ev = JsonEvent::parse(line);
|
||||
handleEvent(ev);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SttEngine::handleEvent(const JsonEvent& ev) {
|
||||
STT_DEBUG() << "Event: type=" << ev.type << " text=" << ev.text;
|
||||
|
||||
if (ev.type == "ready") {
|
||||
ready_ = true;
|
||||
STT_INFO() << "stt-stream ready";
|
||||
} else if (ev.type == "recording_started") {
|
||||
// Update UI to show recording state
|
||||
if (activeIc_) {
|
||||
auto* state = activeIc_->propertyFor(&factory_);
|
||||
state->setRecording(true);
|
||||
}
|
||||
} else if (ev.type == "recording_stopped") {
|
||||
if (activeIc_) {
|
||||
auto* state = activeIc_->propertyFor(&factory_);
|
||||
state->setRecording(false);
|
||||
}
|
||||
} else if (ev.type == "partial") {
|
||||
if (activeIc_) {
|
||||
auto* state = activeIc_->propertyFor(&factory_);
|
||||
state->setPreedit(ev.text);
|
||||
}
|
||||
} else if (ev.type == "final") {
|
||||
if (activeIc_) {
|
||||
auto* state = activeIc_->propertyFor(&factory_);
|
||||
state->commit(ev.text);
|
||||
state->setRecording(false);
|
||||
|
||||
// In oneshot mode, we're done
|
||||
// In continuous mode, keep listening
|
||||
if (mode_ == SttMode::Continuous && ready_) {
|
||||
sendCommand("start");
|
||||
state->setRecording(true);
|
||||
}
|
||||
}
|
||||
} else if (ev.type == "error") {
|
||||
STT_ERROR() << "stt-stream error: " << ev.message;
|
||||
} else if (ev.type == "shutdown") {
|
||||
ready_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void SttEngine::setMode(SttMode m) {
|
||||
if (mode_ == m) return;
|
||||
|
||||
mode_ = m;
|
||||
|
||||
// Notify the backend
|
||||
const char* modeStr = "manual";
|
||||
switch (m) {
|
||||
case SttMode::Oneshot: modeStr = "oneshot"; break;
|
||||
case SttMode::Continuous: modeStr = "continuous"; break;
|
||||
case SttMode::Manual: modeStr = "manual"; break;
|
||||
}
|
||||
|
||||
std::string cmd = "{\"cmd\":\"set_mode\",\"mode\":\"";
|
||||
cmd += modeStr;
|
||||
cmd += "\"}";
|
||||
sendCommand(cmd);
|
||||
|
||||
STT_INFO() << "Mode changed to: " << modeStr;
|
||||
}
|
||||
|
||||
void SttEngine::cycleMode() {
|
||||
switch (mode_) {
|
||||
case SttMode::Manual:
|
||||
setMode(SttMode::Oneshot);
|
||||
break;
|
||||
case SttMode::Oneshot:
|
||||
setMode(SttMode::Continuous);
|
||||
break;
|
||||
case SttMode::Continuous:
|
||||
setMode(SttMode::Manual);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Addon factory
|
||||
class SttEngineFactory : public fcitx::AddonFactory {
|
||||
public:
|
||||
fcitx::AddonInstance* create(fcitx::AddonManager* manager) override {
|
||||
return new SttEngine(manager->instance());
|
||||
}
|
||||
};
|
||||
|
||||
FCITX_ADDON_FACTORY(SttEngineFactory);
|
||||
Loading…
Add table
Add a link
Reference in a new issue