feat: Add FFmpeg audio recording and ydotool typing backends, making them configurable.
This commit is contained in:
87
Audio/FfmpegAudioRecorder.cs
Normal file
87
Audio/FfmpegAudioRecorder.cs
Normal file
@@ -0,0 +1,87 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
|
||||
using Toak.Core;
|
||||
using Toak.IO;
|
||||
using Toak.Core.Interfaces;
|
||||
|
||||
namespace Toak.Audio;
|
||||
|
||||
public class FfmpegAudioRecorder : IAudioRecorder
|
||||
{
|
||||
private readonly string WavPath = Constants.Paths.RecordingWavFile;
|
||||
private readonly IRecordingStateTracker _stateTracker;
|
||||
private readonly INotifications _notifications;
|
||||
|
||||
public FfmpegAudioRecorder(IRecordingStateTracker stateTracker, INotifications notifications)
|
||||
{
|
||||
_stateTracker = stateTracker;
|
||||
_notifications = notifications;
|
||||
}
|
||||
|
||||
public string GetWavPath() => WavPath;
|
||||
|
||||
public void StartRecording()
|
||||
{
|
||||
if (File.Exists(WavPath))
|
||||
{
|
||||
Logger.LogDebug($"Deleting old audio file: {WavPath}");
|
||||
File.Delete(WavPath);
|
||||
}
|
||||
|
||||
Logger.LogDebug("Starting ffmpeg to record audio...");
|
||||
|
||||
var pInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = Constants.Commands.AudioFfmpeg,
|
||||
Arguments = $"-f pulse -i default -ac 1 -ar 16000 \"{WavPath}\"",
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true
|
||||
};
|
||||
|
||||
var process = Process.Start(pInfo);
|
||||
if (process != null)
|
||||
{
|
||||
_stateTracker.SetRecording(process.Id);
|
||||
_notifications.Notify("Recording Started (FFmpeg)");
|
||||
}
|
||||
}
|
||||
|
||||
public void StopRecording()
|
||||
{
|
||||
var pid = _stateTracker.GetRecordingPid();
|
||||
if (pid.HasValue)
|
||||
{
|
||||
Logger.LogDebug($"Found active ffmpeg process with PID {pid.Value}. Attempting to stop...");
|
||||
try
|
||||
{
|
||||
var process = Process.GetProcessById(pid.Value);
|
||||
if (!process.HasExited)
|
||||
{
|
||||
// Gracefully stop ffmpeg using SIGINT to ensure WAV headers are finalizing cleanly
|
||||
Process.Start(new ProcessStartInfo
|
||||
{
|
||||
FileName = Constants.Commands.ProcessKill,
|
||||
Arguments = $"-INT {pid.Value}",
|
||||
CreateNoWindow = true,
|
||||
UseShellExecute = false
|
||||
})?.WaitForExit();
|
||||
|
||||
process.WaitForExit(2000); // give it a moment to flush
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Process might already be dead
|
||||
Console.WriteLine($"[FfmpegAudioRecorder] Error stopping ffmpeg: {ex.Message}");
|
||||
}
|
||||
finally
|
||||
{
|
||||
_stateTracker.ClearRecording();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
using Spectre.Console;
|
||||
@@ -52,13 +53,18 @@ public static class OnboardCommand
|
||||
}));
|
||||
|
||||
var sessionType = Environment.GetEnvironmentVariable("XDG_SESSION_TYPE")?.ToLowerInvariant();
|
||||
var defaultBackend = sessionType == "wayland" ? "wtype" : "xdotool";
|
||||
var otherBackend = defaultBackend == "wtype" ? "xdotool" : "wtype";
|
||||
var typingBackends = sessionType == "wayland" ? new[] { "wtype", "ydotool", "xdotool" } : new[] { "xdotool", "ydotool" };
|
||||
|
||||
config.TypingBackend = AnsiConsole.Prompt(
|
||||
new SelectionPrompt<string>()
|
||||
.Title($"Select [green]Typing Backend[/] (Detected: {sessionType}):")
|
||||
.AddChoices(new[] { defaultBackend, otherBackend }));
|
||||
.AddChoices(typingBackends));
|
||||
|
||||
config.AudioBackend = AnsiConsole.Prompt(
|
||||
new SelectionPrompt<string>()
|
||||
.Title("Select [green]Audio Recording Backend[/]:")
|
||||
.AddChoices(new[] { "pw-record", "ffmpeg" })
|
||||
.UseConverter(c => c == "pw-record" ? "pw-record (Default PipeWire)" : "ffmpeg (Universal PulseAudio)"));
|
||||
|
||||
var availableSkills = SkillRegistry.AllSkills.Select(s => s.Name).ToList();
|
||||
|
||||
@@ -75,5 +81,21 @@ public static class OnboardCommand
|
||||
configManager.SaveConfig(config);
|
||||
|
||||
AnsiConsole.MarkupLine("\n[bold green]Configuration saved successfully![/]");
|
||||
|
||||
try
|
||||
{
|
||||
var processInfo = new ProcessStartInfo("systemctl", "--user restart toak.service")
|
||||
{
|
||||
CreateNoWindow = true,
|
||||
UseShellExecute = false
|
||||
};
|
||||
Process.Start(processInfo)?.WaitForExit();
|
||||
AnsiConsole.MarkupLine("[grey]Restarted Toak daemon service.[/]");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Toak.Core.Logger.LogDebug($"Failed to restart toak service: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ public class ToakConfig
|
||||
{
|
||||
public string GroqApiKey { get; set; } = string.Empty;
|
||||
public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool
|
||||
public string AudioBackend { get; set; } = "pw-record"; // pw-record or ffmpeg
|
||||
public bool ModulePunctuation { get; set; } = true;
|
||||
public bool ModuleTechnicalSanitization { get; set; } = true;
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ public static class Constants
|
||||
public const string ProcessKill = "kill";
|
||||
public const string TypeX11 = "xdotool";
|
||||
public const string TypeWayland = "wtype";
|
||||
public const string TypeYdotool = "ydotool";
|
||||
public const string Notify = "notify-send";
|
||||
public const string PlaySound = "paplay";
|
||||
public const string ClipboardX11 = "xclip";
|
||||
|
||||
@@ -54,11 +54,15 @@ public static class DaemonService
|
||||
var notifications = new Notifications();
|
||||
|
||||
var groqClient = new GroqApiClient(config.GroqApiKey);
|
||||
IAudioRecorder recorder = config.AudioBackend == "ffmpeg"
|
||||
? new FfmpegAudioRecorder(stateTracker, notifications)
|
||||
: new AudioRecorder(stateTracker, notifications);
|
||||
|
||||
var orchestrator = new TranscriptionOrchestrator(
|
||||
groqClient,
|
||||
groqClient,
|
||||
configManager,
|
||||
new AudioRecorder(stateTracker, notifications),
|
||||
recorder,
|
||||
notifications,
|
||||
new TextInjector(notifications),
|
||||
new HistoryManager(),
|
||||
|
||||
@@ -34,6 +34,17 @@ public class TextInjector : ITextInjector
|
||||
CreateNoWindow = true
|
||||
};
|
||||
}
|
||||
else if (backend.ToLowerInvariant() == "ydotool")
|
||||
{
|
||||
Logger.LogDebug($"Injecting text using ydotool...");
|
||||
pInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = Toak.Core.Constants.Commands.TypeYdotool,
|
||||
Arguments = $"type \"{text.Replace("\"", "\\\"")}\"",
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
}
|
||||
else // xdotool
|
||||
{
|
||||
Logger.LogDebug($"Injecting text using xdotool...");
|
||||
@@ -74,6 +85,25 @@ public class TextInjector : ITextInjector
|
||||
RedirectStandardInput = true
|
||||
};
|
||||
}
|
||||
else if (backend.ToLowerInvariant() == "ydotool")
|
||||
{
|
||||
Logger.LogDebug($"Setting up stream injection using ydotool (chunked)...");
|
||||
await foreach (var token in tokenStream)
|
||||
{
|
||||
Logger.LogDebug($"Injecting token: '{token}'");
|
||||
fullText += token;
|
||||
var chunkInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = Toak.Core.Constants.Commands.TypeYdotool,
|
||||
Arguments = $"type \"{token.Replace("\"", "\\\"")}\"",
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
var chunkP = Process.Start(chunkInfo);
|
||||
if (chunkP != null) await chunkP.WaitForExitAsync();
|
||||
}
|
||||
return fullText;
|
||||
}
|
||||
else // xdotool
|
||||
{
|
||||
Logger.LogDebug($"Setting up stream injection using xdotool...");
|
||||
|
||||
Reference in New Issue
Block a user