1
0

refactor: modularize text injection with a factory and dedicated backend implementations, including a new Wayland clipboard option.

This commit is contained in:
2026-03-03 12:15:52 +01:00
parent ffba480d28
commit 9bf72169db
13 changed files with 379 additions and 147 deletions

View File

@@ -134,7 +134,7 @@ public static class OnboardCommand
}));
var sessionType = Environment.GetEnvironmentVariable("XDG_SESSION_TYPE")?.ToLowerInvariant();
var typingBackends = sessionType == "wayland" ? new[] { "wtype", "ydotool", "xdotool" } : new[] { "xdotool", "ydotool" };
var typingBackends = sessionType == "wayland" ? new[] { "wtype", "wl-clipboard", "ydotool" } : new[] { "xdotool", "ydotool" };
config.TypingBackend = AnsiConsole.Prompt(
new SelectionPrompt<string>()

View File

@@ -8,7 +8,7 @@ public class ToakConfig
public string FireworksApiKey { get; set; } = string.Empty;
public string LlmProvider { get; set; } = "groq"; // groq, together, cerebras, or fireworks
public string WhisperProvider { get; set; } = "groq"; // groq or fireworks
public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool
public string TypingBackend { get; set; } = "wtype"; // wtype, ydotool, wl-clipboard, or xdotool
public string AudioBackend { get; set; } = "pw-record"; // pw-record or ffmpeg
public bool ModulePunctuation { get; set; } = true;
public bool ModuleTechnicalSanitization { get; set; } = true;

View File

@@ -42,6 +42,6 @@ public static class Constants
{
public const string LlmModel = "openai/gpt-oss-20b";
public const string WhisperModel = "whisper-large-v3-turbo";
public const int DefaultTypeDelayMs = 2;
public const int DefaultTypeDelayMs = 5;
}
}

View File

@@ -76,7 +76,7 @@ public static class DaemonService
configManager,
recorder,
notifications,
new TextInjector(notifications),
TextInjectorFactory.Create(config.TypingBackend, notifications),
new HistoryManager(),
new ClipboardManager(notifications),
stateTracker

View File

@@ -34,8 +34,8 @@ public interface INotifications
public interface ITextInjector
{
Task<string> InjectStreamAsync(IAsyncEnumerable<string> textStream, string backend = "xdotool");
Task InjectTextAsync(string text, string backend = "xdotool");
Task<string> InjectStreamAsync(IAsyncEnumerable<string> tokenStream);
Task InjectTextAsync(string text);
}
public interface IHistoryManager

View File

@@ -121,7 +121,7 @@ public class TranscriptionOrchestrator(
}
else
{
var fullText = await _textInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
var fullText = await _textInjector.InjectStreamAsync(tokenStream);
stopWatch.Stop();
_historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
_notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");

View File

@@ -0,0 +1,113 @@
using System.Diagnostics;
using Toak.Core;
using Toak.Core.Interfaces;
namespace Toak.IO.Injectors;
/// <summary>
/// Text injector that writes text to the Wayland clipboard via <c>wl-copy</c>
/// and then pastes it using Shift+Insert via <c>wtype</c>.
/// Whitespace-only content is typed directly with <c>wtype</c> to avoid
/// wl-copy silently stripping leading/trailing spaces from clipboard content.
/// </summary>
public class WlClipboardTextInjector(INotifications notifications) : ITextInjector
{
private readonly INotifications _notifications = notifications;
public Task InjectTextAsync(string text)
{
Logger.LogDebug("Injecting text using wl-clipboard...");
if (string.IsNullOrWhiteSpace(text)) return Task.CompletedTask;
try
{
// Write the full text to wl-copy via stdin
var copyInfo = new ProcessStartInfo
{
FileName = Constants.Commands.ClipboardWayland,
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardInput = true
};
using var copyProcess = Process.Start(copyInfo);
if (copyProcess != null)
{
copyProcess.StandardInput.Write(text);
copyProcess.StandardInput.Close();
copyProcess.WaitForExit();
}
Task.Delay(100).Wait();
// Simulate Shift+Insert to paste into the focused window
var pasteInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeWayland,
Arguments = "-M shift -k Insert -m shift",
UseShellExecute = false,
CreateNoWindow = true
};
var pasteProcess = Process.Start(pasteInfo);
pasteProcess?.WaitForExit();
}
catch (Exception ex)
{
Console.WriteLine($"[WlClipboardTextInjector] Error injecting text: {ex.Message}");
_notifications.Notify("Injection Error", "Could not type text into window.");
}
return Task.CompletedTask;
}
public async Task<string> InjectStreamAsync(IAsyncEnumerable<string> tokenStream)
{
Logger.LogDebug("Setting up stream injection using wl-clipboard...");
var fullText = string.Empty;
try
{
// Collect all tokens first
await foreach (var token in tokenStream)
{
Logger.LogDebug($"Buffering token: '{token}'");
fullText += token;
}
// Write the full text to wl-copy via stdin
var copyInfo = new ProcessStartInfo
{
FileName = Constants.Commands.ClipboardWayland,
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardInput = true
};
using var copyProcess = Process.Start(copyInfo);
if (copyProcess != null)
{
await copyProcess.StandardInput.WriteAsync(fullText);
copyProcess.StandardInput.Close();
await copyProcess.WaitForExitAsync();
}
await Task.Delay(100);
// Simulate Shift+Insert to paste into the focused window
var pasteInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeWayland,
Arguments = "-M shift -k Insert -m shift",
UseShellExecute = false,
CreateNoWindow = true
};
using var pasteProcess = Process.Start(pasteInfo);
if (pasteProcess != null) await pasteProcess.WaitForExitAsync();
}
catch (Exception ex)
{
Console.WriteLine($"[WlClipboardTextInjector] Error injecting text stream: {ex.Message}");
_notifications.Notify("Injection Error", "Could not type text stream into window.");
}
return fullText;
}
}

View File

@@ -0,0 +1,81 @@
using System.Diagnostics;
using Toak.Core;
using Toak.Core.Interfaces;
namespace Toak.IO.Injectors;
/// <summary>
/// Text injector that uses <c>wtype</c> to type text on Wayland.
/// </summary>
public class WtypeTextInjector(INotifications notifications) : ITextInjector
{
private readonly INotifications _notifications = notifications;
public Task InjectTextAsync(string text)
{
Logger.LogDebug("Injecting text using wtype...");
if (string.IsNullOrWhiteSpace(text)) return Task.CompletedTask;
try
{
var pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeWayland,
Arguments = $"-d {Constants.Defaults.DefaultTypeDelayMs} \"{text.Replace("\"", "\\\"")}\"",
UseShellExecute = false,
CreateNoWindow = true
};
var p = Process.Start(pInfo);
p?.WaitForExit();
}
catch (Exception ex)
{
Console.WriteLine($"[WtypeTextInjector] Error injecting text: {ex.Message}");
_notifications.Notify("Injection Error", "Could not type text into window.");
}
return Task.CompletedTask;
}
public async Task<string> InjectStreamAsync(IAsyncEnumerable<string> tokenStream)
{
Logger.LogDebug("Setting up stream injection using wtype...");
var fullText = string.Empty;
try
{
var pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeWayland,
Arguments = $"-d {Constants.Defaults.DefaultTypeDelayMs} -",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardInput = true
};
using var process = Process.Start(pInfo);
if (process == null) return string.Empty;
Logger.LogDebug("Started wtype stream process, waiting for tokens...");
await foreach (var token in tokenStream)
{
Logger.LogDebug($"Injecting token: '{token}'");
fullText += token;
await process.StandardInput.WriteAsync(token);
await process.StandardInput.FlushAsync();
}
Logger.LogDebug("Stream injection complete. Closing standard input.");
process.StandardInput.Close();
await process.WaitForExitAsync();
}
catch (Exception ex)
{
Console.WriteLine($"[WtypeTextInjector] Error injecting text stream: {ex.Message}");
_notifications.Notify("Injection Error", "Could not type text stream into window.");
}
return fullText;
}
}

View File

@@ -0,0 +1,81 @@
using System.Diagnostics;
using Toak.Core;
using Toak.Core.Interfaces;
namespace Toak.IO.Injectors;
/// <summary>
/// Text injector that uses <c>xdotool</c> to type text on X11.
/// </summary>
public class XdotoolTextInjector(INotifications notifications) : ITextInjector
{
private readonly INotifications _notifications = notifications;
public Task InjectTextAsync(string text)
{
Logger.LogDebug("Injecting text using xdotool...");
if (string.IsNullOrWhiteSpace(text)) return Task.CompletedTask;
try
{
var pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeX11,
Arguments = $"type --clearmodifiers --delay {Constants.Defaults.DefaultTypeDelayMs} \"{text.Replace("\"", "\\\"")}\"",
UseShellExecute = false,
CreateNoWindow = true
};
var p = Process.Start(pInfo);
p?.WaitForExit();
}
catch (Exception ex)
{
Console.WriteLine($"[XdotoolTextInjector] Error injecting text: {ex.Message}");
_notifications.Notify("Injection Error", "Could not type text into window.");
}
return Task.CompletedTask;
}
public async Task<string> InjectStreamAsync(IAsyncEnumerable<string> tokenStream)
{
Logger.LogDebug("Setting up stream injection using xdotool...");
var fullText = string.Empty;
try
{
var pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeX11,
Arguments = $"type --clearmodifiers --delay {Constants.Defaults.DefaultTypeDelayMs} --file -",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardInput = true
};
using var process = Process.Start(pInfo);
if (process == null) return string.Empty;
Logger.LogDebug("Started xdotool stream process, waiting for tokens...");
await foreach (var token in tokenStream)
{
Logger.LogDebug($"Injecting token: '{token}'");
fullText += token;
await process.StandardInput.WriteAsync(token);
await process.StandardInput.FlushAsync();
}
Logger.LogDebug("Stream injection complete. Closing standard input.");
process.StandardInput.Close();
await process.WaitForExitAsync();
}
catch (Exception ex)
{
Console.WriteLine($"[XdotoolTextInjector] Error injecting text stream: {ex.Message}");
_notifications.Notify("Injection Error", "Could not type text stream into window.");
}
return fullText;
}
}

View File

@@ -0,0 +1,71 @@
using System.Diagnostics;
using Toak.Core;
using Toak.Core.Interfaces;
namespace Toak.IO.Injectors;
/// <summary>
/// Text injector that uses <c>ydotool</c> to type text via virtual input (works on both X11 and Wayland).
/// </summary>
public class YdotoolTextInjector(INotifications notifications) : ITextInjector
{
private readonly INotifications _notifications = notifications;
public Task InjectTextAsync(string text)
{
Logger.LogDebug("Injecting text using ydotool...");
if (string.IsNullOrWhiteSpace(text)) return Task.CompletedTask;
try
{
var pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeYdotool,
Arguments = $"type \"{text.Replace("\"", "\\\"")}\"",
UseShellExecute = false,
CreateNoWindow = true
};
var p = Process.Start(pInfo);
p?.WaitForExit();
}
catch (Exception ex)
{
Console.WriteLine($"[YdotoolTextInjector] Error injecting text: {ex.Message}");
_notifications.Notify("Injection Error", "Could not type text into window.");
}
return Task.CompletedTask;
}
public async Task<string> InjectStreamAsync(IAsyncEnumerable<string> tokenStream)
{
Logger.LogDebug("Setting up stream injection using ydotool (chunked)...");
var fullText = string.Empty;
try
{
await foreach (var token in tokenStream)
{
Logger.LogDebug($"Injecting token: '{token}'");
fullText += token;
var chunkInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeYdotool,
Arguments = $"type \"{token.Replace("\"", "\\\"")}\"",
UseShellExecute = false,
CreateNoWindow = true
};
var chunkP = Process.Start(chunkInfo);
if (chunkP != null) await chunkP.WaitForExitAsync();
}
}
catch (Exception ex)
{
Console.WriteLine($"[YdotoolTextInjector] Error injecting text stream: {ex.Message}");
_notifications.Notify("Injection Error", "Could not type text stream into window.");
}
return fullText;
}
}

View File

@@ -1,137 +0,0 @@
using System.Diagnostics;
using Toak.Core;
using Toak.Core.Interfaces;
namespace Toak.IO;
public class TextInjector(INotifications notifications) : ITextInjector
{
private readonly INotifications _notifications = notifications;
public Task InjectTextAsync(string text, string backend = "xdotool")
{
Logger.LogDebug($"Injecting text: '{text}' with {backend}");
if (string.IsNullOrWhiteSpace(text)) return Task.CompletedTask;
try
{
ProcessStartInfo pInfo;
if (backend.ToLowerInvariant() == "wtype")
{
Logger.LogDebug($"Injecting text using wtype...");
pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeWayland,
Arguments = $"-d {Constants.Defaults.DefaultTypeDelayMs} \"{text.Replace("\"", "\\\"")}\"",
UseShellExecute = false,
CreateNoWindow = true
};
}
else if (backend.ToLowerInvariant() == "ydotool")
{
Logger.LogDebug($"Injecting text using ydotool...");
pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeYdotool,
Arguments = $"type \"{text.Replace("\"", "\\\"")}\"",
UseShellExecute = false,
CreateNoWindow = true
};
}
else // xdotool
{
Logger.LogDebug($"Injecting text using xdotool...");
pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeX11,
Arguments = $"type --clearmodifiers --delay {Constants.Defaults.DefaultTypeDelayMs} \"{text.Replace("\"", "\\\"")}\"",
UseShellExecute = false,
CreateNoWindow = true
};
}
var process = Process.Start(pInfo);
process?.WaitForExit();
}
catch (Exception ex)
{
Console.WriteLine($"[TextInjector] Error injecting text: {ex.Message}");
_notifications.Notify("Injection Error", "Could not type text into window.");
}
return Task.CompletedTask;
}
public async Task<string> InjectStreamAsync(IAsyncEnumerable<string> tokenStream, string backend)
{
var fullText = string.Empty;
try
{
ProcessStartInfo pInfo;
if (backend.ToLowerInvariant() == "wtype")
{
Logger.LogDebug($"Setting up stream injection using wtype...");
pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeWayland,
Arguments = $"-d {Constants.Defaults.DefaultTypeDelayMs} -",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardInput = true
};
}
else if (backend.ToLowerInvariant() == "ydotool")
{
Logger.LogDebug($"Setting up stream injection using ydotool (chunked)...");
await foreach (var token in tokenStream)
{
Logger.LogDebug($"Injecting token: '{token}'");
fullText += token;
var chunkInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeYdotool,
Arguments = $"type \"{token.Replace("\"", "\\\"")}\"",
UseShellExecute = false,
CreateNoWindow = true
};
var chunkP = Process.Start(chunkInfo);
if (chunkP != null) await chunkP.WaitForExitAsync();
}
return fullText;
}
else // xdotool
{
Logger.LogDebug($"Setting up stream injection using xdotool...");
pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.TypeX11,
Arguments = $"type --clearmodifiers --delay {Constants.Defaults.DefaultTypeDelayMs} --file -",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardInput = true
};
}
using var process = Process.Start(pInfo);
if (process == null) return string.Empty;
Logger.LogDebug("Started stream injection process, waiting for tokens...");
await foreach (var token in tokenStream)
{
Logger.LogDebug($"Injecting token: '{token}'");
fullText += token;
await process.StandardInput.WriteAsync(token);
await process.StandardInput.FlushAsync();
}
Logger.LogDebug("Stream injection complete. Closing standard input.");
process.StandardInput.Close();
await process.WaitForExitAsync();
}
catch (Exception ex)
{
Console.WriteLine($"[TextInjector] Error injecting text stream: {ex.Message}");
_notifications.Notify("Injection Error", "Could not type text stream into window.");
}
return fullText;
}
}

23
IO/TextInjectorFactory.cs Normal file
View File

@@ -0,0 +1,23 @@
using Toak.Core.Interfaces;
using Toak.IO.Injectors;
namespace Toak.IO;
/// <summary>
/// Resolves the correct <see cref="ITextInjector"/> implementation based on the configured backend name.
/// </summary>
public static class TextInjectorFactory
{
/// <summary>
/// Creates the appropriate <see cref="ITextInjector"/> for the given <paramref name="backend"/> string.
/// Supported values: <c>wtype</c>, <c>wl-clipboard</c>, <c>ydotool</c>, <c>xdotool</c> (default).
/// </summary>
public static ITextInjector Create(string backend, INotifications notifications) =>
backend.ToLowerInvariant() switch
{
"wtype" => new WtypeTextInjector(notifications),
"wl-clipboard" => new WlClipboardTextInjector(notifications),
"ydotool" => new YdotoolTextInjector(notifications),
_ => new XdotoolTextInjector(notifications)
};
}

View File

@@ -15,7 +15,7 @@ Built with **.NET 10** and compiled to **Native AOT**, Toak runs as a lightning-
- **Multi-Provider Whisper**: Easily swap between Groq and Fireworks AI for state-of-the-art STT APIs.
- **Reasoning Capabilities**: Optional reasoning effort settings for complex text processing.
- **Modular Skills**: Actionable "System" commands for translation, terminal execution, professional rewriting, and summarization.
- **Multiple Backends**: Types directly into your active window (`wtype`, `xdotool`, or `ydotool`), copies to clipboard, or pipes to stdout.
- **Multiple Backends**: Types directly into your active window (`wtype`, `xdotool`, or `ydotool`), uses `wl-clipboard` (`wl-copy`/`wl-paste`) for Wayland paste injection, copies to clipboard, or pipes to stdout.
- **High-Quality Audio**: Native support for **PipeWire** (`pw-record`) and **FFmpeg** for universal compatibility.
- **Beautiful CLI**: Interactive onboarding and configuration powered by `Spectre.Console`.
@@ -25,7 +25,7 @@ Built with **.NET 10** and compiled to **Native AOT**, Toak runs as a lightning-
- **.NET 10 SDK** (for building from source)
- **Audio Capture**: `pipewire` / `pw-record` (recommended) or `ffmpeg`
- **Typing Backend**: `wtype` (Wayland), `xdotool` (X11), or `ydotool` (Virtual Input)
- **Typing Backend**: `wtype` (Wayland), `wl-clipboard` (`wl-copy` + Ctrl+V via `wtype`, Wayland), `xdotool` (X11), or `ydotool` (Virtual Input)
- **Clipboard**: `wl-copy` (Wayland) or `xclip` (X11)
- **API Keys**: API Keys for your chosen providers (Groq, Together AI, Cerebras, or Fireworks).
@@ -109,7 +109,7 @@ Key settings in `ToakConfig.cs` (managed via `toak onboard` or `toak config`):
- `WhisperProvider`: Choice of `groq` (default) or `fireworks`.
- `WhisperModel`: The STT model (default: `whisper-large-v3-turbo`).
- `WhisperLanguage`: Set spoken language (e.g., `en`, `es`, `fr`).
- `TypingBackend`: Choose between `wtype`, `xdotool`, or `ydotool`.
- `TypingBackend`: Choose between `wtype`, `wl-clipboard`, `xdotool`, or `ydotool`.
- `AudioBackend`: Choose between `pw-record` (PipeWire) or `ffmpeg`.
- `MinRecordingDuration`: Set the minimum recording duration in ms (default: `500`).
- `ModulePunctuation`: Toggle automatic grammar and punctuation fixing.