1
0

feat: Add FFmpeg audio recording and ydotool typing backends, making them configurable.

This commit is contained in:
2026-02-28 15:58:45 +01:00
parent 96ccf0ea9a
commit 3ceecbe5ee
6 changed files with 149 additions and 4 deletions

View File

@@ -0,0 +1,87 @@
using System;
using System.Diagnostics;
using System.IO;
using Toak.Core;
using Toak.IO;
using Toak.Core.Interfaces;
namespace Toak.Audio;
public class FfmpegAudioRecorder : IAudioRecorder
{
private readonly string WavPath = Constants.Paths.RecordingWavFile;
private readonly IRecordingStateTracker _stateTracker;
private readonly INotifications _notifications;
public FfmpegAudioRecorder(IRecordingStateTracker stateTracker, INotifications notifications)
{
_stateTracker = stateTracker;
_notifications = notifications;
}
public string GetWavPath() => WavPath;
public void StartRecording()
{
if (File.Exists(WavPath))
{
Logger.LogDebug($"Deleting old audio file: {WavPath}");
File.Delete(WavPath);
}
Logger.LogDebug("Starting ffmpeg to record audio...");
var pInfo = new ProcessStartInfo
{
FileName = Constants.Commands.AudioFfmpeg,
Arguments = $"-f pulse -i default -ac 1 -ar 16000 \"{WavPath}\"",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardOutput = true,
RedirectStandardError = true
};
var process = Process.Start(pInfo);
if (process != null)
{
_stateTracker.SetRecording(process.Id);
_notifications.Notify("Recording Started (FFmpeg)");
}
}
public void StopRecording()
{
var pid = _stateTracker.GetRecordingPid();
if (pid.HasValue)
{
Logger.LogDebug($"Found active ffmpeg process with PID {pid.Value}. Attempting to stop...");
try
{
var process = Process.GetProcessById(pid.Value);
if (!process.HasExited)
{
// Gracefully stop ffmpeg using SIGINT to ensure WAV headers are finalizing cleanly
Process.Start(new ProcessStartInfo
{
FileName = Constants.Commands.ProcessKill,
Arguments = $"-INT {pid.Value}",
CreateNoWindow = true,
UseShellExecute = false
})?.WaitForExit();
process.WaitForExit(2000); // give it a moment to flush
}
}
catch (Exception ex)
{
// Process might already be dead
Console.WriteLine($"[FfmpegAudioRecorder] Error stopping ffmpeg: {ex.Message}");
}
finally
{
_stateTracker.ClearRecording();
}
}
}
}

View File

@@ -1,4 +1,5 @@
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading.Tasks;
using Spectre.Console;
@@ -52,13 +53,18 @@ public static class OnboardCommand
}));
var sessionType = Environment.GetEnvironmentVariable("XDG_SESSION_TYPE")?.ToLowerInvariant();
var defaultBackend = sessionType == "wayland" ? "wtype" : "xdotool";
var otherBackend = defaultBackend == "wtype" ? "xdotool" : "wtype";
var typingBackends = sessionType == "wayland" ? new[] { "wtype", "ydotool", "xdotool" } : new[] { "xdotool", "ydotool" };
config.TypingBackend = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title($"Select [green]Typing Backend[/] (Detected: {sessionType}):")
.AddChoices(new[] { defaultBackend, otherBackend }));
.AddChoices(typingBackends));
config.AudioBackend = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title("Select [green]Audio Recording Backend[/]:")
.AddChoices(new[] { "pw-record", "ffmpeg" })
.UseConverter(c => c == "pw-record" ? "pw-record (Default PipeWire)" : "ffmpeg (Universal PulseAudio)"));
var availableSkills = SkillRegistry.AllSkills.Select(s => s.Name).ToList();
@@ -75,5 +81,21 @@ public static class OnboardCommand
configManager.SaveConfig(config);
AnsiConsole.MarkupLine("\n[bold green]Configuration saved successfully![/]");
try
{
var processInfo = new ProcessStartInfo("systemctl", "--user restart toak.service")
{
CreateNoWindow = true,
UseShellExecute = false
};
Process.Start(processInfo)?.WaitForExit();
AnsiConsole.MarkupLine("[grey]Restarted Toak daemon service.[/]");
}
catch (Exception ex)
{
Toak.Core.Logger.LogDebug($"Failed to restart toak service: {ex.Message}");
}
}
}

View File

@@ -4,6 +4,7 @@ public class ToakConfig
{
public string GroqApiKey { get; set; } = string.Empty;
public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool
public string AudioBackend { get; set; } = "pw-record"; // pw-record or ffmpeg
public bool ModulePunctuation { get; set; } = true;
public bool ModuleTechnicalSanitization { get; set; } = true;

View File

@@ -34,6 +34,7 @@ public static class Constants
public const string ProcessKill = "kill";
public const string TypeX11 = "xdotool";
public const string TypeWayland = "wtype";
public const string TypeYdotool = "ydotool";
public const string Notify = "notify-send";
public const string PlaySound = "paplay";
public const string ClipboardX11 = "xclip";

View File

@@ -54,11 +54,15 @@ public static class DaemonService
var notifications = new Notifications();
var groqClient = new GroqApiClient(config.GroqApiKey);
IAudioRecorder recorder = config.AudioBackend == "ffmpeg"
? new FfmpegAudioRecorder(stateTracker, notifications)
: new AudioRecorder(stateTracker, notifications);
var orchestrator = new TranscriptionOrchestrator(
groqClient,
groqClient,
configManager,
new AudioRecorder(stateTracker, notifications),
recorder,
notifications,
new TextInjector(notifications),
new HistoryManager(),

View File

@@ -34,6 +34,17 @@ public class TextInjector : ITextInjector
CreateNoWindow = true
};
}
else if (backend.ToLowerInvariant() == "ydotool")
{
Logger.LogDebug($"Injecting text using ydotool...");
pInfo = new ProcessStartInfo
{
FileName = Toak.Core.Constants.Commands.TypeYdotool,
Arguments = $"type \"{text.Replace("\"", "\\\"")}\"",
UseShellExecute = false,
CreateNoWindow = true
};
}
else // xdotool
{
Logger.LogDebug($"Injecting text using xdotool...");
@@ -74,6 +85,25 @@ public class TextInjector : ITextInjector
RedirectStandardInput = true
};
}
else if (backend.ToLowerInvariant() == "ydotool")
{
Logger.LogDebug($"Setting up stream injection using ydotool (chunked)...");
await foreach (var token in tokenStream)
{
Logger.LogDebug($"Injecting token: '{token}'");
fullText += token;
var chunkInfo = new ProcessStartInfo
{
FileName = Toak.Core.Constants.Commands.TypeYdotool,
Arguments = $"type \"{token.Replace("\"", "\\\"")}\"",
UseShellExecute = false,
CreateNoWindow = true
};
var chunkP = Process.Start(chunkInfo);
if (chunkP != null) await chunkP.WaitForExitAsync();
}
return fullText;
}
else // xdotool
{
Logger.LogDebug($"Setting up stream injection using xdotool...");