From 9bf72169db38df20f0b98796a17ee3aa23929ad9 Mon Sep 17 00:00:00 2001 From: TomiEckert Date: Tue, 3 Mar 2026 12:15:52 +0100 Subject: [PATCH] refactor: modularize text injection with a factory and dedicated backend implementations, including a new Wayland clipboard option. --- Commands/OnboardCommand.cs | 2 +- Configuration/ToakConfig.cs | 2 +- Core/Constants.cs | 2 +- Core/DaemonService.cs | 2 +- Core/Interfaces/Interfaces.cs | 4 +- Core/TranscriptionOrchestrator.cs | 2 +- IO/Injectors/WlClipboardTextInjector.cs | 113 +++++++++++++++++++ IO/Injectors/WtypeTextInjector.cs | 81 ++++++++++++++ IO/Injectors/XdotoolTextInjector.cs | 81 ++++++++++++++ IO/Injectors/YdotoolTextInjector.cs | 71 ++++++++++++ IO/TextInjector.cs | 137 ------------------------ IO/TextInjectorFactory.cs | 23 ++++ README.md | 6 +- 13 files changed, 379 insertions(+), 147 deletions(-) create mode 100644 IO/Injectors/WlClipboardTextInjector.cs create mode 100644 IO/Injectors/WtypeTextInjector.cs create mode 100644 IO/Injectors/XdotoolTextInjector.cs create mode 100644 IO/Injectors/YdotoolTextInjector.cs delete mode 100644 IO/TextInjector.cs create mode 100644 IO/TextInjectorFactory.cs diff --git a/Commands/OnboardCommand.cs b/Commands/OnboardCommand.cs index 547ace8..c4a163a 100644 --- a/Commands/OnboardCommand.cs +++ b/Commands/OnboardCommand.cs @@ -134,7 +134,7 @@ public static class OnboardCommand })); var sessionType = Environment.GetEnvironmentVariable("XDG_SESSION_TYPE")?.ToLowerInvariant(); - var typingBackends = sessionType == "wayland" ? new[] { "wtype", "ydotool", "xdotool" } : new[] { "xdotool", "ydotool" }; + var typingBackends = sessionType == "wayland" ? new[] { "wtype", "wl-clipboard", "ydotool" } : new[] { "xdotool", "ydotool" }; config.TypingBackend = AnsiConsole.Prompt( new SelectionPrompt() diff --git a/Configuration/ToakConfig.cs b/Configuration/ToakConfig.cs index 612811d..287477e 100644 --- a/Configuration/ToakConfig.cs +++ b/Configuration/ToakConfig.cs @@ -8,7 +8,7 @@ public class ToakConfig public string FireworksApiKey { get; set; } = string.Empty; public string LlmProvider { get; set; } = "groq"; // groq, together, cerebras, or fireworks public string WhisperProvider { get; set; } = "groq"; // groq or fireworks - public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool + public string TypingBackend { get; set; } = "wtype"; // wtype, ydotool, wl-clipboard, or xdotool public string AudioBackend { get; set; } = "pw-record"; // pw-record or ffmpeg public bool ModulePunctuation { get; set; } = true; public bool ModuleTechnicalSanitization { get; set; } = true; diff --git a/Core/Constants.cs b/Core/Constants.cs index 0c06848..188cb5c 100644 --- a/Core/Constants.cs +++ b/Core/Constants.cs @@ -42,6 +42,6 @@ public static class Constants { public const string LlmModel = "openai/gpt-oss-20b"; public const string WhisperModel = "whisper-large-v3-turbo"; - public const int DefaultTypeDelayMs = 2; + public const int DefaultTypeDelayMs = 5; } } diff --git a/Core/DaemonService.cs b/Core/DaemonService.cs index 6f5c55f..40a5801 100644 --- a/Core/DaemonService.cs +++ b/Core/DaemonService.cs @@ -76,7 +76,7 @@ public static class DaemonService configManager, recorder, notifications, - new TextInjector(notifications), + TextInjectorFactory.Create(config.TypingBackend, notifications), new HistoryManager(), new ClipboardManager(notifications), stateTracker diff --git a/Core/Interfaces/Interfaces.cs b/Core/Interfaces/Interfaces.cs index c8166cf..3ba32e7 100644 --- a/Core/Interfaces/Interfaces.cs +++ b/Core/Interfaces/Interfaces.cs @@ -34,8 +34,8 @@ public interface INotifications public interface ITextInjector { - Task InjectStreamAsync(IAsyncEnumerable textStream, string backend = "xdotool"); - Task InjectTextAsync(string text, string backend = "xdotool"); + Task InjectStreamAsync(IAsyncEnumerable tokenStream); + Task InjectTextAsync(string text); } public interface IHistoryManager diff --git a/Core/TranscriptionOrchestrator.cs b/Core/TranscriptionOrchestrator.cs index 053ba39..7446704 100644 --- a/Core/TranscriptionOrchestrator.cs +++ b/Core/TranscriptionOrchestrator.cs @@ -121,7 +121,7 @@ public class TranscriptionOrchestrator( } else { - var fullText = await _textInjector.InjectStreamAsync(tokenStream, config.TypingBackend); + var fullText = await _textInjector.InjectStreamAsync(tokenStream); stopWatch.Stop(); _historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds); _notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms"); diff --git a/IO/Injectors/WlClipboardTextInjector.cs b/IO/Injectors/WlClipboardTextInjector.cs new file mode 100644 index 0000000..ae1ab98 --- /dev/null +++ b/IO/Injectors/WlClipboardTextInjector.cs @@ -0,0 +1,113 @@ +using System.Diagnostics; +using Toak.Core; +using Toak.Core.Interfaces; + +namespace Toak.IO.Injectors; + +/// +/// Text injector that writes text to the Wayland clipboard via wl-copy +/// and then pastes it using Shift+Insert via wtype. +/// Whitespace-only content is typed directly with wtype to avoid +/// wl-copy silently stripping leading/trailing spaces from clipboard content. +/// +public class WlClipboardTextInjector(INotifications notifications) : ITextInjector +{ + private readonly INotifications _notifications = notifications; + + public Task InjectTextAsync(string text) + { + Logger.LogDebug("Injecting text using wl-clipboard..."); + if (string.IsNullOrWhiteSpace(text)) return Task.CompletedTask; + + try + { + // Write the full text to wl-copy via stdin + var copyInfo = new ProcessStartInfo + { + FileName = Constants.Commands.ClipboardWayland, + UseShellExecute = false, + CreateNoWindow = true, + RedirectStandardInput = true + }; + using var copyProcess = Process.Start(copyInfo); + if (copyProcess != null) + { + copyProcess.StandardInput.Write(text); + copyProcess.StandardInput.Close(); + copyProcess.WaitForExit(); + } + + Task.Delay(100).Wait(); + + // Simulate Shift+Insert to paste into the focused window + var pasteInfo = new ProcessStartInfo + { + FileName = Constants.Commands.TypeWayland, + Arguments = "-M shift -k Insert -m shift", + UseShellExecute = false, + CreateNoWindow = true + }; + var pasteProcess = Process.Start(pasteInfo); + pasteProcess?.WaitForExit(); + } + catch (Exception ex) + { + Console.WriteLine($"[WlClipboardTextInjector] Error injecting text: {ex.Message}"); + _notifications.Notify("Injection Error", "Could not type text into window."); + } + + return Task.CompletedTask; + } + + public async Task InjectStreamAsync(IAsyncEnumerable tokenStream) + { + Logger.LogDebug("Setting up stream injection using wl-clipboard..."); + var fullText = string.Empty; + + try + { + // Collect all tokens first + await foreach (var token in tokenStream) + { + Logger.LogDebug($"Buffering token: '{token}'"); + fullText += token; + } + + // Write the full text to wl-copy via stdin + var copyInfo = new ProcessStartInfo + { + FileName = Constants.Commands.ClipboardWayland, + UseShellExecute = false, + CreateNoWindow = true, + RedirectStandardInput = true + }; + using var copyProcess = Process.Start(copyInfo); + if (copyProcess != null) + { + await copyProcess.StandardInput.WriteAsync(fullText); + copyProcess.StandardInput.Close(); + await copyProcess.WaitForExitAsync(); + } + + await Task.Delay(100); + + // Simulate Shift+Insert to paste into the focused window + var pasteInfo = new ProcessStartInfo + { + FileName = Constants.Commands.TypeWayland, + Arguments = "-M shift -k Insert -m shift", + UseShellExecute = false, + CreateNoWindow = true + }; + using var pasteProcess = Process.Start(pasteInfo); + if (pasteProcess != null) await pasteProcess.WaitForExitAsync(); + } + catch (Exception ex) + { + Console.WriteLine($"[WlClipboardTextInjector] Error injecting text stream: {ex.Message}"); + _notifications.Notify("Injection Error", "Could not type text stream into window."); + } + + return fullText; + } +} diff --git a/IO/Injectors/WtypeTextInjector.cs b/IO/Injectors/WtypeTextInjector.cs new file mode 100644 index 0000000..31d0430 --- /dev/null +++ b/IO/Injectors/WtypeTextInjector.cs @@ -0,0 +1,81 @@ +using System.Diagnostics; +using Toak.Core; +using Toak.Core.Interfaces; + +namespace Toak.IO.Injectors; + +/// +/// Text injector that uses wtype to type text on Wayland. +/// +public class WtypeTextInjector(INotifications notifications) : ITextInjector +{ + private readonly INotifications _notifications = notifications; + + public Task InjectTextAsync(string text) + { + Logger.LogDebug("Injecting text using wtype..."); + if (string.IsNullOrWhiteSpace(text)) return Task.CompletedTask; + + try + { + var pInfo = new ProcessStartInfo + { + FileName = Constants.Commands.TypeWayland, + Arguments = $"-d {Constants.Defaults.DefaultTypeDelayMs} \"{text.Replace("\"", "\\\"")}\"", + UseShellExecute = false, + CreateNoWindow = true + }; + var p = Process.Start(pInfo); + p?.WaitForExit(); + } + catch (Exception ex) + { + Console.WriteLine($"[WtypeTextInjector] Error injecting text: {ex.Message}"); + _notifications.Notify("Injection Error", "Could not type text into window."); + } + + return Task.CompletedTask; + } + + public async Task InjectStreamAsync(IAsyncEnumerable tokenStream) + { + Logger.LogDebug("Setting up stream injection using wtype..."); + var fullText = string.Empty; + + try + { + var pInfo = new ProcessStartInfo + { + FileName = Constants.Commands.TypeWayland, + Arguments = $"-d {Constants.Defaults.DefaultTypeDelayMs} -", + UseShellExecute = false, + CreateNoWindow = true, + RedirectStandardInput = true + }; + + using var process = Process.Start(pInfo); + if (process == null) return string.Empty; + + Logger.LogDebug("Started wtype stream process, waiting for tokens..."); + + await foreach (var token in tokenStream) + { + Logger.LogDebug($"Injecting token: '{token}'"); + fullText += token; + await process.StandardInput.WriteAsync(token); + await process.StandardInput.FlushAsync(); + } + + Logger.LogDebug("Stream injection complete. Closing standard input."); + process.StandardInput.Close(); + await process.WaitForExitAsync(); + } + catch (Exception ex) + { + Console.WriteLine($"[WtypeTextInjector] Error injecting text stream: {ex.Message}"); + _notifications.Notify("Injection Error", "Could not type text stream into window."); + } + + return fullText; + } +} diff --git a/IO/Injectors/XdotoolTextInjector.cs b/IO/Injectors/XdotoolTextInjector.cs new file mode 100644 index 0000000..8abeac9 --- /dev/null +++ b/IO/Injectors/XdotoolTextInjector.cs @@ -0,0 +1,81 @@ +using System.Diagnostics; +using Toak.Core; +using Toak.Core.Interfaces; + +namespace Toak.IO.Injectors; + +/// +/// Text injector that uses xdotool to type text on X11. +/// +public class XdotoolTextInjector(INotifications notifications) : ITextInjector +{ + private readonly INotifications _notifications = notifications; + + public Task InjectTextAsync(string text) + { + Logger.LogDebug("Injecting text using xdotool..."); + if (string.IsNullOrWhiteSpace(text)) return Task.CompletedTask; + + try + { + var pInfo = new ProcessStartInfo + { + FileName = Constants.Commands.TypeX11, + Arguments = $"type --clearmodifiers --delay {Constants.Defaults.DefaultTypeDelayMs} \"{text.Replace("\"", "\\\"")}\"", + UseShellExecute = false, + CreateNoWindow = true + }; + var p = Process.Start(pInfo); + p?.WaitForExit(); + } + catch (Exception ex) + { + Console.WriteLine($"[XdotoolTextInjector] Error injecting text: {ex.Message}"); + _notifications.Notify("Injection Error", "Could not type text into window."); + } + + return Task.CompletedTask; + } + + public async Task InjectStreamAsync(IAsyncEnumerable tokenStream) + { + Logger.LogDebug("Setting up stream injection using xdotool..."); + var fullText = string.Empty; + + try + { + var pInfo = new ProcessStartInfo + { + FileName = Constants.Commands.TypeX11, + Arguments = $"type --clearmodifiers --delay {Constants.Defaults.DefaultTypeDelayMs} --file -", + UseShellExecute = false, + CreateNoWindow = true, + RedirectStandardInput = true + }; + + using var process = Process.Start(pInfo); + if (process == null) return string.Empty; + + Logger.LogDebug("Started xdotool stream process, waiting for tokens..."); + + await foreach (var token in tokenStream) + { + Logger.LogDebug($"Injecting token: '{token}'"); + fullText += token; + await process.StandardInput.WriteAsync(token); + await process.StandardInput.FlushAsync(); + } + + Logger.LogDebug("Stream injection complete. Closing standard input."); + process.StandardInput.Close(); + await process.WaitForExitAsync(); + } + catch (Exception ex) + { + Console.WriteLine($"[XdotoolTextInjector] Error injecting text stream: {ex.Message}"); + _notifications.Notify("Injection Error", "Could not type text stream into window."); + } + + return fullText; + } +} diff --git a/IO/Injectors/YdotoolTextInjector.cs b/IO/Injectors/YdotoolTextInjector.cs new file mode 100644 index 0000000..7e5cb90 --- /dev/null +++ b/IO/Injectors/YdotoolTextInjector.cs @@ -0,0 +1,71 @@ +using System.Diagnostics; +using Toak.Core; +using Toak.Core.Interfaces; + +namespace Toak.IO.Injectors; + +/// +/// Text injector that uses ydotool to type text via virtual input (works on both X11 and Wayland). +/// +public class YdotoolTextInjector(INotifications notifications) : ITextInjector +{ + private readonly INotifications _notifications = notifications; + + public Task InjectTextAsync(string text) + { + Logger.LogDebug("Injecting text using ydotool..."); + if (string.IsNullOrWhiteSpace(text)) return Task.CompletedTask; + + try + { + var pInfo = new ProcessStartInfo + { + FileName = Constants.Commands.TypeYdotool, + Arguments = $"type \"{text.Replace("\"", "\\\"")}\"", + UseShellExecute = false, + CreateNoWindow = true + }; + var p = Process.Start(pInfo); + p?.WaitForExit(); + } + catch (Exception ex) + { + Console.WriteLine($"[YdotoolTextInjector] Error injecting text: {ex.Message}"); + _notifications.Notify("Injection Error", "Could not type text into window."); + } + + return Task.CompletedTask; + } + + public async Task InjectStreamAsync(IAsyncEnumerable tokenStream) + { + Logger.LogDebug("Setting up stream injection using ydotool (chunked)..."); + var fullText = string.Empty; + + try + { + await foreach (var token in tokenStream) + { + Logger.LogDebug($"Injecting token: '{token}'"); + fullText += token; + + var chunkInfo = new ProcessStartInfo + { + FileName = Constants.Commands.TypeYdotool, + Arguments = $"type \"{token.Replace("\"", "\\\"")}\"", + UseShellExecute = false, + CreateNoWindow = true + }; + var chunkP = Process.Start(chunkInfo); + if (chunkP != null) await chunkP.WaitForExitAsync(); + } + } + catch (Exception ex) + { + Console.WriteLine($"[YdotoolTextInjector] Error injecting text stream: {ex.Message}"); + _notifications.Notify("Injection Error", "Could not type text stream into window."); + } + + return fullText; + } +} diff --git a/IO/TextInjector.cs b/IO/TextInjector.cs deleted file mode 100644 index 5caa422..0000000 --- a/IO/TextInjector.cs +++ /dev/null @@ -1,137 +0,0 @@ -using System.Diagnostics; -using Toak.Core; -using Toak.Core.Interfaces; - -namespace Toak.IO; - -public class TextInjector(INotifications notifications) : ITextInjector -{ - private readonly INotifications _notifications = notifications; - - public Task InjectTextAsync(string text, string backend = "xdotool") - { - Logger.LogDebug($"Injecting text: '{text}' with {backend}"); - if (string.IsNullOrWhiteSpace(text)) return Task.CompletedTask; - - try - { - ProcessStartInfo pInfo; - if (backend.ToLowerInvariant() == "wtype") - { - Logger.LogDebug($"Injecting text using wtype..."); - pInfo = new ProcessStartInfo - { - FileName = Constants.Commands.TypeWayland, - Arguments = $"-d {Constants.Defaults.DefaultTypeDelayMs} \"{text.Replace("\"", "\\\"")}\"", - UseShellExecute = false, - CreateNoWindow = true - }; - } - else if (backend.ToLowerInvariant() == "ydotool") - { - Logger.LogDebug($"Injecting text using ydotool..."); - pInfo = new ProcessStartInfo - { - FileName = Constants.Commands.TypeYdotool, - Arguments = $"type \"{text.Replace("\"", "\\\"")}\"", - UseShellExecute = false, - CreateNoWindow = true - }; - } - else // xdotool - { - Logger.LogDebug($"Injecting text using xdotool..."); - pInfo = new ProcessStartInfo - { - FileName = Constants.Commands.TypeX11, - Arguments = $"type --clearmodifiers --delay {Constants.Defaults.DefaultTypeDelayMs} \"{text.Replace("\"", "\\\"")}\"", - UseShellExecute = false, - CreateNoWindow = true - }; - } - var process = Process.Start(pInfo); - process?.WaitForExit(); - } - catch (Exception ex) - { - Console.WriteLine($"[TextInjector] Error injecting text: {ex.Message}"); - _notifications.Notify("Injection Error", "Could not type text into window."); - } - return Task.CompletedTask; - } - - public async Task InjectStreamAsync(IAsyncEnumerable tokenStream, string backend) - { - var fullText = string.Empty; - try - { - ProcessStartInfo pInfo; - if (backend.ToLowerInvariant() == "wtype") - { - Logger.LogDebug($"Setting up stream injection using wtype..."); - pInfo = new ProcessStartInfo - { - FileName = Constants.Commands.TypeWayland, - Arguments = $"-d {Constants.Defaults.DefaultTypeDelayMs} -", - UseShellExecute = false, - CreateNoWindow = true, - RedirectStandardInput = true - }; - } - else if (backend.ToLowerInvariant() == "ydotool") - { - Logger.LogDebug($"Setting up stream injection using ydotool (chunked)..."); - await foreach (var token in tokenStream) - { - Logger.LogDebug($"Injecting token: '{token}'"); - fullText += token; - var chunkInfo = new ProcessStartInfo - { - FileName = Constants.Commands.TypeYdotool, - Arguments = $"type \"{token.Replace("\"", "\\\"")}\"", - UseShellExecute = false, - CreateNoWindow = true - }; - var chunkP = Process.Start(chunkInfo); - if (chunkP != null) await chunkP.WaitForExitAsync(); - } - return fullText; - } - else // xdotool - { - Logger.LogDebug($"Setting up stream injection using xdotool..."); - pInfo = new ProcessStartInfo - { - FileName = Constants.Commands.TypeX11, - Arguments = $"type --clearmodifiers --delay {Constants.Defaults.DefaultTypeDelayMs} --file -", - UseShellExecute = false, - CreateNoWindow = true, - RedirectStandardInput = true - }; - } - - using var process = Process.Start(pInfo); - if (process == null) return string.Empty; - - Logger.LogDebug("Started stream injection process, waiting for tokens..."); - - await foreach (var token in tokenStream) - { - Logger.LogDebug($"Injecting token: '{token}'"); - fullText += token; - await process.StandardInput.WriteAsync(token); - await process.StandardInput.FlushAsync(); - } - - Logger.LogDebug("Stream injection complete. Closing standard input."); - process.StandardInput.Close(); - await process.WaitForExitAsync(); - } - catch (Exception ex) - { - Console.WriteLine($"[TextInjector] Error injecting text stream: {ex.Message}"); - _notifications.Notify("Injection Error", "Could not type text stream into window."); - } - return fullText; - } -} diff --git a/IO/TextInjectorFactory.cs b/IO/TextInjectorFactory.cs new file mode 100644 index 0000000..9eacc37 --- /dev/null +++ b/IO/TextInjectorFactory.cs @@ -0,0 +1,23 @@ +using Toak.Core.Interfaces; +using Toak.IO.Injectors; + +namespace Toak.IO; + +/// +/// Resolves the correct implementation based on the configured backend name. +/// +public static class TextInjectorFactory +{ + /// + /// Creates the appropriate for the given string. + /// Supported values: wtype, wl-clipboard, ydotool, xdotool (default). + /// + public static ITextInjector Create(string backend, INotifications notifications) => + backend.ToLowerInvariant() switch + { + "wtype" => new WtypeTextInjector(notifications), + "wl-clipboard" => new WlClipboardTextInjector(notifications), + "ydotool" => new YdotoolTextInjector(notifications), + _ => new XdotoolTextInjector(notifications) + }; +} diff --git a/README.md b/README.md index d72b94f..f4307ad 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Built with **.NET 10** and compiled to **Native AOT**, Toak runs as a lightning- - **Multi-Provider Whisper**: Easily swap between Groq and Fireworks AI for state-of-the-art STT APIs. - **Reasoning Capabilities**: Optional reasoning effort settings for complex text processing. - **Modular Skills**: Actionable "System" commands for translation, terminal execution, professional rewriting, and summarization. -- **Multiple Backends**: Types directly into your active window (`wtype`, `xdotool`, or `ydotool`), copies to clipboard, or pipes to stdout. +- **Multiple Backends**: Types directly into your active window (`wtype`, `xdotool`, or `ydotool`), uses `wl-clipboard` (`wl-copy`/`wl-paste`) for Wayland paste injection, copies to clipboard, or pipes to stdout. - **High-Quality Audio**: Native support for **PipeWire** (`pw-record`) and **FFmpeg** for universal compatibility. - **Beautiful CLI**: Interactive onboarding and configuration powered by `Spectre.Console`. @@ -25,7 +25,7 @@ Built with **.NET 10** and compiled to **Native AOT**, Toak runs as a lightning- - **.NET 10 SDK** (for building from source) - **Audio Capture**: `pipewire` / `pw-record` (recommended) or `ffmpeg` -- **Typing Backend**: `wtype` (Wayland), `xdotool` (X11), or `ydotool` (Virtual Input) +- **Typing Backend**: `wtype` (Wayland), `wl-clipboard` (`wl-copy` + Ctrl+V via `wtype`, Wayland), `xdotool` (X11), or `ydotool` (Virtual Input) - **Clipboard**: `wl-copy` (Wayland) or `xclip` (X11) - **API Keys**: API Keys for your chosen providers (Groq, Together AI, Cerebras, or Fireworks). @@ -109,7 +109,7 @@ Key settings in `ToakConfig.cs` (managed via `toak onboard` or `toak config`): - `WhisperProvider`: Choice of `groq` (default) or `fireworks`. - `WhisperModel`: The STT model (default: `whisper-large-v3-turbo`). - `WhisperLanguage`: Set spoken language (e.g., `en`, `es`, `fr`). -- `TypingBackend`: Choose between `wtype`, `xdotool`, or `ydotool`. +- `TypingBackend`: Choose between `wtype`, `wl-clipboard`, `xdotool`, or `ydotool`. - `AudioBackend`: Choose between `pw-record` (PipeWire) or `ffmpeg`. - `MinRecordingDuration`: Set the minimum recording duration in ms (default: `500`). - `ModulePunctuation`: Toggle automatic grammar and punctuation fixing.