feat: Add FFmpeg audio recording and ydotool typing backends, making them configurable.
This commit is contained in:
87
Audio/FfmpegAudioRecorder.cs
Normal file
87
Audio/FfmpegAudioRecorder.cs
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
using System;
|
||||||
|
using System.Diagnostics;
|
||||||
|
using System.IO;
|
||||||
|
|
||||||
|
using Toak.Core;
|
||||||
|
using Toak.IO;
|
||||||
|
using Toak.Core.Interfaces;
|
||||||
|
|
||||||
|
namespace Toak.Audio;
|
||||||
|
|
||||||
|
public class FfmpegAudioRecorder : IAudioRecorder
|
||||||
|
{
|
||||||
|
private readonly string WavPath = Constants.Paths.RecordingWavFile;
|
||||||
|
private readonly IRecordingStateTracker _stateTracker;
|
||||||
|
private readonly INotifications _notifications;
|
||||||
|
|
||||||
|
public FfmpegAudioRecorder(IRecordingStateTracker stateTracker, INotifications notifications)
|
||||||
|
{
|
||||||
|
_stateTracker = stateTracker;
|
||||||
|
_notifications = notifications;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string GetWavPath() => WavPath;
|
||||||
|
|
||||||
|
public void StartRecording()
|
||||||
|
{
|
||||||
|
if (File.Exists(WavPath))
|
||||||
|
{
|
||||||
|
Logger.LogDebug($"Deleting old audio file: {WavPath}");
|
||||||
|
File.Delete(WavPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
Logger.LogDebug("Starting ffmpeg to record audio...");
|
||||||
|
|
||||||
|
var pInfo = new ProcessStartInfo
|
||||||
|
{
|
||||||
|
FileName = Constants.Commands.AudioFfmpeg,
|
||||||
|
Arguments = $"-f pulse -i default -ac 1 -ar 16000 \"{WavPath}\"",
|
||||||
|
UseShellExecute = false,
|
||||||
|
CreateNoWindow = true,
|
||||||
|
RedirectStandardOutput = true,
|
||||||
|
RedirectStandardError = true
|
||||||
|
};
|
||||||
|
|
||||||
|
var process = Process.Start(pInfo);
|
||||||
|
if (process != null)
|
||||||
|
{
|
||||||
|
_stateTracker.SetRecording(process.Id);
|
||||||
|
_notifications.Notify("Recording Started (FFmpeg)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void StopRecording()
|
||||||
|
{
|
||||||
|
var pid = _stateTracker.GetRecordingPid();
|
||||||
|
if (pid.HasValue)
|
||||||
|
{
|
||||||
|
Logger.LogDebug($"Found active ffmpeg process with PID {pid.Value}. Attempting to stop...");
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var process = Process.GetProcessById(pid.Value);
|
||||||
|
if (!process.HasExited)
|
||||||
|
{
|
||||||
|
// Gracefully stop ffmpeg using SIGINT to ensure WAV headers are finalizing cleanly
|
||||||
|
Process.Start(new ProcessStartInfo
|
||||||
|
{
|
||||||
|
FileName = Constants.Commands.ProcessKill,
|
||||||
|
Arguments = $"-INT {pid.Value}",
|
||||||
|
CreateNoWindow = true,
|
||||||
|
UseShellExecute = false
|
||||||
|
})?.WaitForExit();
|
||||||
|
|
||||||
|
process.WaitForExit(2000); // give it a moment to flush
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
// Process might already be dead
|
||||||
|
Console.WriteLine($"[FfmpegAudioRecorder] Error stopping ffmpeg: {ex.Message}");
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
_stateTracker.ClearRecording();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
using System;
|
using System;
|
||||||
|
using System.Diagnostics;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using Spectre.Console;
|
using Spectre.Console;
|
||||||
@@ -52,13 +53,18 @@ public static class OnboardCommand
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
var sessionType = Environment.GetEnvironmentVariable("XDG_SESSION_TYPE")?.ToLowerInvariant();
|
var sessionType = Environment.GetEnvironmentVariable("XDG_SESSION_TYPE")?.ToLowerInvariant();
|
||||||
var defaultBackend = sessionType == "wayland" ? "wtype" : "xdotool";
|
var typingBackends = sessionType == "wayland" ? new[] { "wtype", "ydotool", "xdotool" } : new[] { "xdotool", "ydotool" };
|
||||||
var otherBackend = defaultBackend == "wtype" ? "xdotool" : "wtype";
|
|
||||||
|
|
||||||
config.TypingBackend = AnsiConsole.Prompt(
|
config.TypingBackend = AnsiConsole.Prompt(
|
||||||
new SelectionPrompt<string>()
|
new SelectionPrompt<string>()
|
||||||
.Title($"Select [green]Typing Backend[/] (Detected: {sessionType}):")
|
.Title($"Select [green]Typing Backend[/] (Detected: {sessionType}):")
|
||||||
.AddChoices(new[] { defaultBackend, otherBackend }));
|
.AddChoices(typingBackends));
|
||||||
|
|
||||||
|
config.AudioBackend = AnsiConsole.Prompt(
|
||||||
|
new SelectionPrompt<string>()
|
||||||
|
.Title("Select [green]Audio Recording Backend[/]:")
|
||||||
|
.AddChoices(new[] { "pw-record", "ffmpeg" })
|
||||||
|
.UseConverter(c => c == "pw-record" ? "pw-record (Default PipeWire)" : "ffmpeg (Universal PulseAudio)"));
|
||||||
|
|
||||||
var availableSkills = SkillRegistry.AllSkills.Select(s => s.Name).ToList();
|
var availableSkills = SkillRegistry.AllSkills.Select(s => s.Name).ToList();
|
||||||
|
|
||||||
@@ -75,5 +81,21 @@ public static class OnboardCommand
|
|||||||
configManager.SaveConfig(config);
|
configManager.SaveConfig(config);
|
||||||
|
|
||||||
AnsiConsole.MarkupLine("\n[bold green]Configuration saved successfully![/]");
|
AnsiConsole.MarkupLine("\n[bold green]Configuration saved successfully![/]");
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var processInfo = new ProcessStartInfo("systemctl", "--user restart toak.service")
|
||||||
|
{
|
||||||
|
CreateNoWindow = true,
|
||||||
|
UseShellExecute = false
|
||||||
|
};
|
||||||
|
Process.Start(processInfo)?.WaitForExit();
|
||||||
|
AnsiConsole.MarkupLine("[grey]Restarted Toak daemon service.[/]");
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Toak.Core.Logger.LogDebug($"Failed to restart toak service: {ex.Message}");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ public class ToakConfig
|
|||||||
{
|
{
|
||||||
public string GroqApiKey { get; set; } = string.Empty;
|
public string GroqApiKey { get; set; } = string.Empty;
|
||||||
public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool
|
public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool
|
||||||
|
public string AudioBackend { get; set; } = "pw-record"; // pw-record or ffmpeg
|
||||||
public bool ModulePunctuation { get; set; } = true;
|
public bool ModulePunctuation { get; set; } = true;
|
||||||
public bool ModuleTechnicalSanitization { get; set; } = true;
|
public bool ModuleTechnicalSanitization { get; set; } = true;
|
||||||
|
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ public static class Constants
|
|||||||
public const string ProcessKill = "kill";
|
public const string ProcessKill = "kill";
|
||||||
public const string TypeX11 = "xdotool";
|
public const string TypeX11 = "xdotool";
|
||||||
public const string TypeWayland = "wtype";
|
public const string TypeWayland = "wtype";
|
||||||
|
public const string TypeYdotool = "ydotool";
|
||||||
public const string Notify = "notify-send";
|
public const string Notify = "notify-send";
|
||||||
public const string PlaySound = "paplay";
|
public const string PlaySound = "paplay";
|
||||||
public const string ClipboardX11 = "xclip";
|
public const string ClipboardX11 = "xclip";
|
||||||
|
|||||||
@@ -54,11 +54,15 @@ public static class DaemonService
|
|||||||
var notifications = new Notifications();
|
var notifications = new Notifications();
|
||||||
|
|
||||||
var groqClient = new GroqApiClient(config.GroqApiKey);
|
var groqClient = new GroqApiClient(config.GroqApiKey);
|
||||||
|
IAudioRecorder recorder = config.AudioBackend == "ffmpeg"
|
||||||
|
? new FfmpegAudioRecorder(stateTracker, notifications)
|
||||||
|
: new AudioRecorder(stateTracker, notifications);
|
||||||
|
|
||||||
var orchestrator = new TranscriptionOrchestrator(
|
var orchestrator = new TranscriptionOrchestrator(
|
||||||
groqClient,
|
groqClient,
|
||||||
groqClient,
|
groqClient,
|
||||||
configManager,
|
configManager,
|
||||||
new AudioRecorder(stateTracker, notifications),
|
recorder,
|
||||||
notifications,
|
notifications,
|
||||||
new TextInjector(notifications),
|
new TextInjector(notifications),
|
||||||
new HistoryManager(),
|
new HistoryManager(),
|
||||||
|
|||||||
@@ -34,6 +34,17 @@ public class TextInjector : ITextInjector
|
|||||||
CreateNoWindow = true
|
CreateNoWindow = true
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
else if (backend.ToLowerInvariant() == "ydotool")
|
||||||
|
{
|
||||||
|
Logger.LogDebug($"Injecting text using ydotool...");
|
||||||
|
pInfo = new ProcessStartInfo
|
||||||
|
{
|
||||||
|
FileName = Toak.Core.Constants.Commands.TypeYdotool,
|
||||||
|
Arguments = $"type \"{text.Replace("\"", "\\\"")}\"",
|
||||||
|
UseShellExecute = false,
|
||||||
|
CreateNoWindow = true
|
||||||
|
};
|
||||||
|
}
|
||||||
else // xdotool
|
else // xdotool
|
||||||
{
|
{
|
||||||
Logger.LogDebug($"Injecting text using xdotool...");
|
Logger.LogDebug($"Injecting text using xdotool...");
|
||||||
@@ -74,6 +85,25 @@ public class TextInjector : ITextInjector
|
|||||||
RedirectStandardInput = true
|
RedirectStandardInput = true
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
else if (backend.ToLowerInvariant() == "ydotool")
|
||||||
|
{
|
||||||
|
Logger.LogDebug($"Setting up stream injection using ydotool (chunked)...");
|
||||||
|
await foreach (var token in tokenStream)
|
||||||
|
{
|
||||||
|
Logger.LogDebug($"Injecting token: '{token}'");
|
||||||
|
fullText += token;
|
||||||
|
var chunkInfo = new ProcessStartInfo
|
||||||
|
{
|
||||||
|
FileName = Toak.Core.Constants.Commands.TypeYdotool,
|
||||||
|
Arguments = $"type \"{token.Replace("\"", "\\\"")}\"",
|
||||||
|
UseShellExecute = false,
|
||||||
|
CreateNoWindow = true
|
||||||
|
};
|
||||||
|
var chunkP = Process.Start(chunkInfo);
|
||||||
|
if (chunkP != null) await chunkP.WaitForExitAsync();
|
||||||
|
}
|
||||||
|
return fullText;
|
||||||
|
}
|
||||||
else // xdotool
|
else // xdotool
|
||||||
{
|
{
|
||||||
Logger.LogDebug($"Setting up stream injection using xdotool...");
|
Logger.LogDebug($"Setting up stream injection using xdotool...");
|
||||||
|
|||||||
Reference in New Issue
Block a user