This commit is contained in:
RingOfStorms (Joshua Bell) 2026-01-13 14:10:10 -06:00
parent c408693861
commit 02f24bb524
15 changed files with 4184 additions and 15 deletions

View file

@ -0,0 +1,4 @@
#pragma once
// Path to stt-stream binary
#define STT_STREAM_PATH "@STT_STREAM_PATH@"

View file

@ -0,0 +1,533 @@
/*
* fcitx5-stt: Speech-to-Text Input Method Engine for Fcitx5
*
* This is a thin shim that spawns the stt-stream Rust binary and
* bridges its JSON events to Fcitx5's input method API.
*
* Modes:
* - Oneshot: Record until silence, commit, reset
* - Continuous: Always listen, commit on silence
* - Manual: Start/stop via hotkey
*
* UX:
* - Partial text shown as preedit (underlined)
* - Final text committed on stop/silence
* - Escape cancels without committing
* - Enter accepts current preedit
*/
#include <fcitx/addonfactory.h>
#include <fcitx/addonmanager.h>
#include <fcitx/inputcontext.h>
#include <fcitx/inputcontextmanager.h>
#include <fcitx/inputmethodengine.h>
#include <fcitx/inputpanel.h>
#include <fcitx/instance.h>
#include <fcitx-utils/event.h>
#include <fcitx-utils/i18n.h>
#include <fcitx-utils/log.h>
#include <fcitx-utils/utf8.h>
#include <memory>
#include <string>
#include <unistd.h>
#include <signal.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <cstring>
#include <sstream>
#include "config.h"
namespace {
FCITX_DEFINE_LOG_CATEGORY(stt_log, "stt");
#define STT_DEBUG() FCITX_LOGC(stt_log, Debug)
#define STT_INFO() FCITX_LOGC(stt_log, Info)
#define STT_WARN() FCITX_LOGC(stt_log, Warn)
#define STT_ERROR() FCITX_LOGC(stt_log, Error)
// Operating modes
enum class SttMode {
Oneshot,
Continuous,
Manual
};
// Simple JSON parsing (we only need a few fields)
struct JsonEvent {
std::string type;
std::string text;
std::string message;
static JsonEvent parse(const std::string& line) {
JsonEvent ev;
// Very basic JSON parsing - find "type" and "text" fields
auto findValue = [&line](const std::string& key) -> std::string {
std::string search = "\"" + key + "\":\"";
auto pos = line.find(search);
if (pos == std::string::npos) return "";
pos += search.length();
auto end = line.find("\"", pos);
if (end == std::string::npos) return "";
return line.substr(pos, end - pos);
};
ev.type = findValue("type");
ev.text = findValue("text");
ev.message = findValue("message");
return ev;
}
};
} // anonymous namespace
class SttEngine;
class SttState : public fcitx::InputContextProperty {
public:
SttState(SttEngine* engine, fcitx::InputContext* ic)
: engine_(engine), ic_(ic) {}
void setPreedit(const std::string& text);
void commit(const std::string& text);
void clear();
bool isRecording() const { return recording_; }
void setRecording(bool r) { recording_ = r; }
const std::string& preeditText() const { return preedit_; }
private:
SttEngine* engine_;
fcitx::InputContext* ic_;
std::string preedit_;
bool recording_ = false;
};
class SttEngine : public fcitx::InputMethodEngineV2 {
public:
SttEngine(fcitx::Instance* instance);
~SttEngine() override;
// InputMethodEngine interface
void activate(const fcitx::InputMethodEntry& entry,
fcitx::InputContextEvent& event) override;
void deactivate(const fcitx::InputMethodEntry& entry,
fcitx::InputContextEvent& event) override;
void keyEvent(const fcitx::InputMethodEntry& entry,
fcitx::KeyEvent& keyEvent) override;
void reset(const fcitx::InputMethodEntry& entry,
fcitx::InputContextEvent& event) override;
// List input methods this engine provides
std::vector<fcitx::InputMethodEntry> listInputMethods() override {
std::vector<fcitx::InputMethodEntry> result;
result.emplace_back(
"stt", // unique name
_("Speech to Text"), // display name
"*", // language (any)
"stt" // addon name
);
return result;
}
fcitx::Instance* instance() { return instance_; }
// Process management
void startProcess();
void stopProcess();
void sendCommand(const std::string& cmd);
// Mode
SttMode mode() const { return mode_; }
void setMode(SttMode m);
void cycleMode();
private:
void onProcessOutput();
void handleEvent(const JsonEvent& ev);
fcitx::Instance* instance_;
fcitx::FactoryFor<SttState> factory_;
// Process state
pid_t childPid_ = -1;
int stdinFd_ = -1;
int stdoutFd_ = -1;
std::unique_ptr<fcitx::EventSourceIO> ioEvent_;
std::string readBuffer_;
// Mode
SttMode mode_ = SttMode::Manual;
// Current state
bool ready_ = false;
fcitx::InputContext* activeIc_ = nullptr;
};
// SttState implementation
void SttState::setPreedit(const std::string& text) {
preedit_ = text;
if (ic_->hasFocus()) {
fcitx::Text preeditText;
preeditText.append(text, fcitx::TextFormatFlag::Underline);
preeditText.setCursor(text.length());
ic_->inputPanel().setClientPreedit(preeditText);
ic_->updatePreedit();
}
}
void SttState::commit(const std::string& text) {
if (!text.empty() && ic_->hasFocus()) {
ic_->commitString(text);
}
clear();
}
void SttState::clear() {
preedit_.clear();
if (ic_->hasFocus()) {
ic_->inputPanel().reset();
ic_->updatePreedit();
ic_->updateUserInterface(fcitx::UserInterfaceComponent::InputPanel);
}
}
// SttEngine implementation
SttEngine::SttEngine(fcitx::Instance* instance)
: instance_(instance),
factory_([this](fcitx::InputContext& ic) {
return new SttState(this, &ic);
}) {
instance_->inputContextManager().registerProperty("sttState", &factory_);
STT_INFO() << "SttEngine initialized";
}
SttEngine::~SttEngine() {
stopProcess();
}
void SttEngine::activate(const fcitx::InputMethodEntry& entry,
fcitx::InputContextEvent& event) {
FCITX_UNUSED(entry);
auto* ic = event.inputContext();
activeIc_ = ic;
STT_INFO() << "STT activated";
// Start the backend process if not running
if (childPid_ < 0) {
startProcess();
}
// In continuous mode, start recording automatically
if (mode_ == SttMode::Continuous && ready_) {
sendCommand("start");
auto* state = ic->propertyFor(&factory_);
state->setRecording(true);
}
}
void SttEngine::deactivate(const fcitx::InputMethodEntry& entry,
fcitx::InputContextEvent& event) {
FCITX_UNUSED(entry);
auto* ic = event.inputContext();
auto* state = ic->propertyFor(&factory_);
// Stop recording if active
if (state->isRecording()) {
sendCommand("cancel");
state->setRecording(false);
}
state->clear();
activeIc_ = nullptr;
STT_INFO() << "STT deactivated";
}
void SttEngine::keyEvent(const fcitx::InputMethodEntry& entry,
fcitx::KeyEvent& keyEvent) {
FCITX_UNUSED(entry);
auto* ic = keyEvent.inputContext();
auto* state = ic->propertyFor(&factory_);
// Handle special keys
if (keyEvent.isRelease()) {
return;
}
auto key = keyEvent.key();
// Escape: cancel recording/preedit
if (key.check(FcitxKey_Escape)) {
if (state->isRecording() || !state->preeditText().empty()) {
sendCommand("cancel");
state->setRecording(false);
state->clear();
keyEvent.filterAndAccept();
return;
}
}
// Enter/Return: accept preedit
if (key.check(FcitxKey_Return) || key.check(FcitxKey_KP_Enter)) {
if (!state->preeditText().empty()) {
state->commit(state->preeditText());
sendCommand("cancel");
state->setRecording(false);
keyEvent.filterAndAccept();
return;
}
}
// Space or Ctrl+R: toggle recording (in manual mode)
if (mode_ == SttMode::Manual) {
if (key.check(FcitxKey_space, fcitx::KeyState::Ctrl) ||
key.check(FcitxKey_r, fcitx::KeyState::Ctrl)) {
if (state->isRecording()) {
sendCommand("stop");
state->setRecording(false);
} else {
state->clear();
sendCommand("start");
state->setRecording(true);
}
keyEvent.filterAndAccept();
return;
}
}
// Ctrl+M: cycle mode
if (key.check(FcitxKey_m, fcitx::KeyState::Ctrl)) {
cycleMode();
keyEvent.filterAndAccept();
return;
}
// In recording state, absorb most keys
if (state->isRecording()) {
keyEvent.filterAndAccept();
return;
}
}
void SttEngine::reset(const fcitx::InputMethodEntry& entry,
fcitx::InputContextEvent& event) {
FCITX_UNUSED(entry);
auto* ic = event.inputContext();
auto* state = ic->propertyFor(&factory_);
state->clear();
}
void SttEngine::startProcess() {
if (childPid_ > 0) {
return; // Already running
}
int stdinPipe[2];
int stdoutPipe[2];
if (pipe(stdinPipe) < 0 || pipe(stdoutPipe) < 0) {
STT_ERROR() << "Failed to create pipes";
return;
}
pid_t pid = fork();
if (pid < 0) {
STT_ERROR() << "Failed to fork";
close(stdinPipe[0]);
close(stdinPipe[1]);
close(stdoutPipe[0]);
close(stdoutPipe[1]);
return;
}
if (pid == 0) {
// Child process
close(stdinPipe[1]);
close(stdoutPipe[0]);
dup2(stdinPipe[0], STDIN_FILENO);
dup2(stdoutPipe[1], STDOUT_FILENO);
close(stdinPipe[0]);
close(stdoutPipe[1]);
// Determine mode string
const char* modeStr = "manual";
switch (mode_) {
case SttMode::Oneshot: modeStr = "oneshot"; break;
case SttMode::Continuous: modeStr = "continuous"; break;
case SttMode::Manual: modeStr = "manual"; break;
}
execlp(STT_STREAM_PATH, "stt-stream", "--mode", modeStr, nullptr);
_exit(127);
}
// Parent process
close(stdinPipe[0]);
close(stdoutPipe[1]);
childPid_ = pid;
stdinFd_ = stdinPipe[1];
stdoutFd_ = stdoutPipe[0];
// Set stdout non-blocking
int flags = fcntl(stdoutFd_, F_GETFL, 0);
fcntl(stdoutFd_, F_SETFL, flags | O_NONBLOCK);
// Watch stdout for events
ioEvent_ = instance_->eventLoop().addIOEvent(
stdoutFd_,
fcitx::IOEventFlag::In,
[this](fcitx::EventSourceIO*, int, fcitx::IOEventFlags) {
onProcessOutput();
return true;
}
);
STT_INFO() << "Started stt-stream process (pid=" << childPid_ << ")";
}
void SttEngine::stopProcess() {
if (childPid_ < 0) {
return;
}
ioEvent_.reset();
sendCommand("shutdown");
close(stdinFd_);
close(stdoutFd_);
// Wait for child to exit
int status;
waitpid(childPid_, &status, 0);
stdinFd_ = -1;
stdoutFd_ = -1;
childPid_ = -1;
ready_ = false;
STT_INFO() << "Stopped stt-stream process";
}
void SttEngine::sendCommand(const std::string& cmd) {
if (stdinFd_ < 0) {
return;
}
std::string line = cmd + "\n";
write(stdinFd_, line.c_str(), line.length());
}
void SttEngine::onProcessOutput() {
char buf[4096];
ssize_t n;
while ((n = read(stdoutFd_, buf, sizeof(buf) - 1)) > 0) {
buf[n] = '\0';
readBuffer_ += buf;
// Process complete lines
size_t pos;
while ((pos = readBuffer_.find('\n')) != std::string::npos) {
std::string line = readBuffer_.substr(0, pos);
readBuffer_ = readBuffer_.substr(pos + 1);
if (!line.empty()) {
auto ev = JsonEvent::parse(line);
handleEvent(ev);
}
}
}
}
void SttEngine::handleEvent(const JsonEvent& ev) {
STT_DEBUG() << "Event: type=" << ev.type << " text=" << ev.text;
if (ev.type == "ready") {
ready_ = true;
STT_INFO() << "stt-stream ready";
} else if (ev.type == "recording_started") {
// Update UI to show recording state
if (activeIc_) {
auto* state = activeIc_->propertyFor(&factory_);
state->setRecording(true);
}
} else if (ev.type == "recording_stopped") {
if (activeIc_) {
auto* state = activeIc_->propertyFor(&factory_);
state->setRecording(false);
}
} else if (ev.type == "partial") {
if (activeIc_) {
auto* state = activeIc_->propertyFor(&factory_);
state->setPreedit(ev.text);
}
} else if (ev.type == "final") {
if (activeIc_) {
auto* state = activeIc_->propertyFor(&factory_);
state->commit(ev.text);
state->setRecording(false);
// In oneshot mode, we're done
// In continuous mode, keep listening
if (mode_ == SttMode::Continuous && ready_) {
sendCommand("start");
state->setRecording(true);
}
}
} else if (ev.type == "error") {
STT_ERROR() << "stt-stream error: " << ev.message;
} else if (ev.type == "shutdown") {
ready_ = false;
}
}
void SttEngine::setMode(SttMode m) {
if (mode_ == m) return;
mode_ = m;
// Notify the backend
const char* modeStr = "manual";
switch (m) {
case SttMode::Oneshot: modeStr = "oneshot"; break;
case SttMode::Continuous: modeStr = "continuous"; break;
case SttMode::Manual: modeStr = "manual"; break;
}
std::string cmd = "{\"cmd\":\"set_mode\",\"mode\":\"";
cmd += modeStr;
cmd += "\"}";
sendCommand(cmd);
STT_INFO() << "Mode changed to: " << modeStr;
}
void SttEngine::cycleMode() {
switch (mode_) {
case SttMode::Manual:
setMode(SttMode::Oneshot);
break;
case SttMode::Oneshot:
setMode(SttMode::Continuous);
break;
case SttMode::Continuous:
setMode(SttMode::Manual);
break;
}
}
// Addon factory
class SttEngineFactory : public fcitx::AddonFactory {
public:
fcitx::AddonInstance* create(fcitx::AddonManager* manager) override {
return new SttEngine(manager->instance());
}
};
FCITX_ADDON_FACTORY(SttEngineFactory);