From 3ceecbe5ee4f9c361bf730c9d78b002e3bb5488c Mon Sep 17 00:00:00 2001 From: TomiEckert Date: Sat, 28 Feb 2026 15:58:45 +0100 Subject: [PATCH] feat: Add FFmpeg audio recording and ydotool typing backends, making them configurable. --- Audio/FfmpegAudioRecorder.cs | 87 ++++++++++++++++++++++++++++++++++++ Commands/OnboardCommand.cs | 28 ++++++++++-- Configuration/ToakConfig.cs | 1 + Core/Constants.cs | 1 + Core/DaemonService.cs | 6 ++- IO/TextInjector.cs | 30 +++++++++++++ 6 files changed, 149 insertions(+), 4 deletions(-) create mode 100644 Audio/FfmpegAudioRecorder.cs diff --git a/Audio/FfmpegAudioRecorder.cs b/Audio/FfmpegAudioRecorder.cs new file mode 100644 index 0000000..95c168b --- /dev/null +++ b/Audio/FfmpegAudioRecorder.cs @@ -0,0 +1,87 @@ +using System; +using System.Diagnostics; +using System.IO; + +using Toak.Core; +using Toak.IO; +using Toak.Core.Interfaces; + +namespace Toak.Audio; + +public class FfmpegAudioRecorder : IAudioRecorder +{ + private readonly string WavPath = Constants.Paths.RecordingWavFile; + private readonly IRecordingStateTracker _stateTracker; + private readonly INotifications _notifications; + + public FfmpegAudioRecorder(IRecordingStateTracker stateTracker, INotifications notifications) + { + _stateTracker = stateTracker; + _notifications = notifications; + } + + public string GetWavPath() => WavPath; + + public void StartRecording() + { + if (File.Exists(WavPath)) + { + Logger.LogDebug($"Deleting old audio file: {WavPath}"); + File.Delete(WavPath); + } + + Logger.LogDebug("Starting ffmpeg to record audio..."); + + var pInfo = new ProcessStartInfo + { + FileName = Constants.Commands.AudioFfmpeg, + Arguments = $"-f pulse -i default -ac 1 -ar 16000 \"{WavPath}\"", + UseShellExecute = false, + CreateNoWindow = true, + RedirectStandardOutput = true, + RedirectStandardError = true + }; + + var process = Process.Start(pInfo); + if (process != null) + { + _stateTracker.SetRecording(process.Id); + _notifications.Notify("Recording Started (FFmpeg)"); + } + } + + public void StopRecording() + { + var pid = _stateTracker.GetRecordingPid(); + if (pid.HasValue) + { + Logger.LogDebug($"Found active ffmpeg process with PID {pid.Value}. Attempting to stop..."); + try + { + var process = Process.GetProcessById(pid.Value); + if (!process.HasExited) + { + // Gracefully stop ffmpeg using SIGINT to ensure WAV headers are finalizing cleanly + Process.Start(new ProcessStartInfo + { + FileName = Constants.Commands.ProcessKill, + Arguments = $"-INT {pid.Value}", + CreateNoWindow = true, + UseShellExecute = false + })?.WaitForExit(); + + process.WaitForExit(2000); // give it a moment to flush + } + } + catch (Exception ex) + { + // Process might already be dead + Console.WriteLine($"[FfmpegAudioRecorder] Error stopping ffmpeg: {ex.Message}"); + } + finally + { + _stateTracker.ClearRecording(); + } + } + } +} diff --git a/Commands/OnboardCommand.cs b/Commands/OnboardCommand.cs index f488a9a..7ba087f 100644 --- a/Commands/OnboardCommand.cs +++ b/Commands/OnboardCommand.cs @@ -1,4 +1,5 @@ using System; +using System.Diagnostics; using System.Linq; using System.Threading.Tasks; using Spectre.Console; @@ -52,13 +53,18 @@ public static class OnboardCommand })); var sessionType = Environment.GetEnvironmentVariable("XDG_SESSION_TYPE")?.ToLowerInvariant(); - var defaultBackend = sessionType == "wayland" ? "wtype" : "xdotool"; - var otherBackend = defaultBackend == "wtype" ? "xdotool" : "wtype"; + var typingBackends = sessionType == "wayland" ? new[] { "wtype", "ydotool", "xdotool" } : new[] { "xdotool", "ydotool" }; config.TypingBackend = AnsiConsole.Prompt( new SelectionPrompt() .Title($"Select [green]Typing Backend[/] (Detected: {sessionType}):") - .AddChoices(new[] { defaultBackend, otherBackend })); + .AddChoices(typingBackends)); + + config.AudioBackend = AnsiConsole.Prompt( + new SelectionPrompt() + .Title("Select [green]Audio Recording Backend[/]:") + .AddChoices(new[] { "pw-record", "ffmpeg" }) + .UseConverter(c => c == "pw-record" ? "pw-record (Default PipeWire)" : "ffmpeg (Universal PulseAudio)")); var availableSkills = SkillRegistry.AllSkills.Select(s => s.Name).ToList(); @@ -75,5 +81,21 @@ public static class OnboardCommand configManager.SaveConfig(config); AnsiConsole.MarkupLine("\n[bold green]Configuration saved successfully![/]"); + + try + { + var processInfo = new ProcessStartInfo("systemctl", "--user restart toak.service") + { + CreateNoWindow = true, + UseShellExecute = false + }; + Process.Start(processInfo)?.WaitForExit(); + AnsiConsole.MarkupLine("[grey]Restarted Toak daemon service.[/]"); + } + catch (Exception ex) + { + Toak.Core.Logger.LogDebug($"Failed to restart toak service: {ex.Message}"); + } } } + diff --git a/Configuration/ToakConfig.cs b/Configuration/ToakConfig.cs index f1e110d..2fe7f2d 100644 --- a/Configuration/ToakConfig.cs +++ b/Configuration/ToakConfig.cs @@ -4,6 +4,7 @@ public class ToakConfig { public string GroqApiKey { get; set; } = string.Empty; public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool + public string AudioBackend { get; set; } = "pw-record"; // pw-record or ffmpeg public bool ModulePunctuation { get; set; } = true; public bool ModuleTechnicalSanitization { get; set; } = true; diff --git a/Core/Constants.cs b/Core/Constants.cs index a15f90b..e63c267 100644 --- a/Core/Constants.cs +++ b/Core/Constants.cs @@ -34,6 +34,7 @@ public static class Constants public const string ProcessKill = "kill"; public const string TypeX11 = "xdotool"; public const string TypeWayland = "wtype"; + public const string TypeYdotool = "ydotool"; public const string Notify = "notify-send"; public const string PlaySound = "paplay"; public const string ClipboardX11 = "xclip"; diff --git a/Core/DaemonService.cs b/Core/DaemonService.cs index 041684a..c48ca49 100644 --- a/Core/DaemonService.cs +++ b/Core/DaemonService.cs @@ -54,11 +54,15 @@ public static class DaemonService var notifications = new Notifications(); var groqClient = new GroqApiClient(config.GroqApiKey); + IAudioRecorder recorder = config.AudioBackend == "ffmpeg" + ? new FfmpegAudioRecorder(stateTracker, notifications) + : new AudioRecorder(stateTracker, notifications); + var orchestrator = new TranscriptionOrchestrator( groqClient, groqClient, configManager, - new AudioRecorder(stateTracker, notifications), + recorder, notifications, new TextInjector(notifications), new HistoryManager(), diff --git a/IO/TextInjector.cs b/IO/TextInjector.cs index 31324ae..d414424 100644 --- a/IO/TextInjector.cs +++ b/IO/TextInjector.cs @@ -34,6 +34,17 @@ public class TextInjector : ITextInjector CreateNoWindow = true }; } + else if (backend.ToLowerInvariant() == "ydotool") + { + Logger.LogDebug($"Injecting text using ydotool..."); + pInfo = new ProcessStartInfo + { + FileName = Toak.Core.Constants.Commands.TypeYdotool, + Arguments = $"type \"{text.Replace("\"", "\\\"")}\"", + UseShellExecute = false, + CreateNoWindow = true + }; + } else // xdotool { Logger.LogDebug($"Injecting text using xdotool..."); @@ -74,6 +85,25 @@ public class TextInjector : ITextInjector RedirectStandardInput = true }; } + else if (backend.ToLowerInvariant() == "ydotool") + { + Logger.LogDebug($"Setting up stream injection using ydotool (chunked)..."); + await foreach (var token in tokenStream) + { + Logger.LogDebug($"Injecting token: '{token}'"); + fullText += token; + var chunkInfo = new ProcessStartInfo + { + FileName = Toak.Core.Constants.Commands.TypeYdotool, + Arguments = $"type \"{token.Replace("\"", "\\\"")}\"", + UseShellExecute = false, + CreateNoWindow = true + }; + var chunkP = Process.Start(chunkInfo); + if (chunkP != null) await chunkP.WaitForExitAsync(); + } + return fullText; + } else // xdotool { Logger.LogDebug($"Setting up stream injection using xdotool...");