diff --git a/Commands/DiscardCommand.cs b/Commands/DiscardCommand.cs index da6d181..5fbe0bd 100644 --- a/Commands/DiscardCommand.cs +++ b/Commands/DiscardCommand.cs @@ -1,9 +1,8 @@ -using System.IO; +using System; +using System.Net.Sockets; using System.Threading.Tasks; using Spectre.Console; -using Toak.Audio; using Toak.Core; -using Toak.IO; namespace Toak.Commands; @@ -13,23 +12,34 @@ public static class DiscardCommand { Logger.Verbose = verbose; - if (StateTracker.IsRecording()) + var socketPath = DaemonService.GetSocketPath(); + + try { - AudioRecorder.StopRecording(); - var wavPath = AudioRecorder.GetWavPath(); - if (File.Exists(wavPath)) File.Delete(wavPath); - Notifications.Notify("Toak", "Recording discarded"); - if (!pipeToStdout) + using var socket = new Socket(AddressFamily.Unix, SocketType.Stream, ProtocolType.Unspecified); + var endPoint = new UnixDomainSocketEndPoint(socketPath); + await socket.ConnectAsync(endPoint); + + // Send ABORT (cmd == 3) + await socket.SendAsync(new byte[] { 3 }, SocketFlags.None); + + if (verbose) { - AnsiConsole.MarkupLine("[yellow]Recording discarded.[/]"); + Console.WriteLine("Sent ABORT command to daemon."); } } - else + catch (SocketException) { - if (!pipeToStdout) + if (!pipeToStdout) { - AnsiConsole.MarkupLine("[grey]No active recording to discard.[/]"); + AnsiConsole.MarkupLine("[red]Failed to connect to Toak daemon.[/]"); + AnsiConsole.MarkupLine("Please ensure the daemon is running in the background:"); + AnsiConsole.MarkupLine(" [dim]toak daemon[/]"); } } + catch (Exception ex) + { + if (!pipeToStdout) AnsiConsole.MarkupLine($"[red]Error:[/] {ex.Message}"); + } } } diff --git a/Commands/ToggleCommand.cs b/Commands/ToggleCommand.cs index 886c701..c5060fb 100644 --- a/Commands/ToggleCommand.cs +++ b/Commands/ToggleCommand.cs @@ -1,13 +1,8 @@ using System; -using System.Diagnostics; -using System.IO; +using System.Net.Sockets; using System.Threading.Tasks; using Spectre.Console; -using Toak.Audio; -using Toak.Configuration; -using Toak.Api; using Toak.Core; -using Toak.IO; namespace Toak.Commands; @@ -17,143 +12,31 @@ public static class ToggleCommand { Logger.Verbose = verbose; - if (StateTracker.IsRecording()) + var socketPath = DaemonService.GetSocketPath(); + + try { - var config = ConfigManager.LoadConfig(); - Notifications.PlaySound(config.StopSoundPath); + using var socket = new Socket(AddressFamily.Unix, SocketType.Stream, ProtocolType.Unspecified); + var endPoint = new UnixDomainSocketEndPoint(socketPath); + await socket.ConnectAsync(endPoint); - if (!pipeToStdout) AnsiConsole.MarkupLine("[yellow]Stopping recording and transcribing...[/]"); - if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing..."); + // Send TOGGLE (cmd == 4) + await socket.SendAsync(new byte[] { 4 }, SocketFlags.None); - AudioRecorder.StopRecording(); - - Logger.LogDebug($"Loaded configuration: LLM={config.LlmModel}, Whisper={config.WhisperModel}, Typing={config.TypingBackend}"); - - if (string.IsNullOrWhiteSpace(config.GroqApiKey)) + if (verbose) { - Notifications.Notify("Toak Error", "Groq API Key is not configured. Run 'toak onboard'."); - AnsiConsole.MarkupLine("[red]Groq API Key is not configured.[/] Run 'toak onboard'."); - return; - } - - var groq = new GroqApiClient(config.GroqApiKey); - var wavPath = AudioRecorder.GetWavPath(); - - if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0) - { - if (!pipeToStdout) Notifications.Notify("Toak", "No audio recorded."); - return; - } - - try - { - var stopWatch = Stopwatch.StartNew(); - - // 1. STT - Logger.LogDebug($"Starting STT transcription via Whisper for {wavPath}..."); - - string transcript = string.Empty; - - if (!pipeToStdout) - { - await AnsiConsole.Status().StartAsync("Transcribing...", async ctx => { - transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel); - }); - } - else - { - transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel); - } - - Logger.LogDebug($"Raw transcript received: '{transcript}'"); - - if (string.IsNullOrWhiteSpace(transcript)) - { - if (!pipeToStdout) Notifications.Notify("Toak", "No speech detected."); - return; - } - - // 2. LLM Refinement - var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills); - string systemPrompt; - if (detectedSkill != null) - { - Logger.LogDebug($"Skill detected: {detectedSkill.Name}"); - if (!pipeToStdout) Notifications.Notify("Toak Skill Detected", detectedSkill.Name); - systemPrompt = detectedSkill.GetSystemPrompt(transcript); - } - else - { - systemPrompt = PromptBuilder.BuildPrompt(config); - } - - bool isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution; - - // 3. Output - if (isExecutionSkill || pipeToStdout || copyToClipboard) - { - Logger.LogDebug("Starting LLM text refinement (synchronous)..."); - - string finalText = string.Empty; - if (!pipeToStdout) { - await AnsiConsole.Status().StartAsync("Refining text...", async ctx => { - finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); - }); - } else { - finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); - } - - Logger.LogDebug($"Refined text received: '{finalText}'"); - if (string.IsNullOrWhiteSpace(finalText)) - { - if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio."); - return; - } - - if (isExecutionSkill) - { - detectedSkill!.Execute(finalText); - stopWatch.Stop(); - Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms"); - } - else if (pipeToStdout) - { - Console.WriteLine(finalText); - } - else - { - ClipboardManager.Copy(finalText); - stopWatch.Stop(); - Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms"); - } - } - else - { - Logger.LogDebug("Starting LLM text refinement (streaming)..."); - var tokenStream = groq.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel); - Logger.LogDebug("Starting to inject text..."); - await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend); - stopWatch.Stop(); - Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms"); - } - } - catch (Exception ex) - { - if (!pipeToStdout) Notifications.Notify("Toak Error", ex.Message); - if (!pipeToStdout) AnsiConsole.MarkupLine($"[red]Error:[/] {ex.Message}"); - } - finally - { - if (File.Exists(wavPath)) File.Delete(wavPath); + Console.WriteLine("Sent TOGGLE command to daemon."); } } - else + catch (SocketException) { - // Start recording - if (!pipeToStdout) AnsiConsole.MarkupLine("[green]Starting recording...[/]"); - var config = ConfigManager.LoadConfig(); - Notifications.PlaySound(config.StartSoundPath); - AudioRecorder.StartRecording(); + AnsiConsole.MarkupLine("[red]Failed to connect to Toak daemon.[/]"); + AnsiConsole.MarkupLine("Please ensure the daemon is running in the background:"); + AnsiConsole.MarkupLine(" [dim]toak daemon[/]"); + } + catch (Exception ex) + { + AnsiConsole.MarkupLine($"[red]Error:[/] {ex.Message}"); } } } diff --git a/Core/DaemonService.cs b/Core/DaemonService.cs new file mode 100644 index 0000000..2c3c4bc --- /dev/null +++ b/Core/DaemonService.cs @@ -0,0 +1,200 @@ +using System; +using System.Diagnostics; +using System.IO; +using System.Net.Sockets; +using System.Threading; +using System.Threading.Tasks; +using Toak.Audio; +using Toak.Configuration; +using Toak.Api; +using Toak.IO; + +namespace Toak.Core; + +public static class DaemonService +{ + private static GroqApiClient? _groqClient; + + public static string GetSocketPath() + { + var runtimeDir = Environment.GetEnvironmentVariable("XDG_RUNTIME_DIR"); + if (string.IsNullOrEmpty(runtimeDir)) + { + runtimeDir = Path.GetTempPath(); + } + return Path.Combine(runtimeDir, "toak.sock"); + } + + public static async Task StartAsync(bool verbose) + { + Logger.Verbose = verbose; + var socketPath = GetSocketPath(); + + if (File.Exists(socketPath)) + { + try { File.Delete(socketPath); } catch { } + } + + var config = ConfigManager.LoadConfig(); + if (string.IsNullOrWhiteSpace(config.GroqApiKey)) + { + Console.WriteLine("Groq API Key is not configured. Run 'toak onboard'."); + return; + } + + _groqClient = new GroqApiClient(config.GroqApiKey); + + using var socket = new Socket(AddressFamily.Unix, SocketType.Stream, ProtocolType.Unspecified); + var endPoint = new UnixDomainSocketEndPoint(socketPath); + + try + { + socket.Bind(endPoint); + socket.Listen(10); + Logger.LogDebug($"Daemon listening on {socketPath}"); + Console.WriteLine($"Toak daemon started, listening on {socketPath}"); + + while (true) + { + var client = await socket.AcceptAsync(); + _ = Task.Run(() => HandleClientAsync(client)); + } + } + catch (Exception ex) + { + Logger.LogDebug($"Daemon error: {ex.Message}"); + } + finally + { + if (File.Exists(socketPath)) + { + File.Delete(socketPath); + } + } + } + + private static async Task HandleClientAsync(Socket client) + { + try + { + var buffer = new byte[1]; + int bytesRead = await client.ReceiveAsync(buffer, SocketFlags.None); + if (bytesRead > 0) + { + byte cmd = buffer[0]; + if (cmd == 1) // START + { + await ProcessStartRecordingAsync(); + } + else if (cmd == 2) // STOP + { + await ProcessStopRecordingAsync(); + } + else if (cmd == 3) // ABORT + { + ProcessAbortAsync(); + } + else if (cmd == 4) // TOGGLE + { + if (StateTracker.IsRecording()) + await ProcessStopRecordingAsync(); + else + await ProcessStartRecordingAsync(); + } + } + } + catch (Exception ex) + { + Logger.LogDebug($"HandleClient error: {ex.Message}"); + } + finally + { + client.Close(); + } + } + + private static async Task ProcessStartRecordingAsync() + { + if (StateTracker.IsRecording()) return; + + Logger.LogDebug("Received START command"); + var config = ConfigManager.LoadConfig(); + Notifications.PlaySound(config.StartSoundPath); + AudioRecorder.StartRecording(); + } + + private static async Task ProcessStopRecordingAsync() + { + if (!StateTracker.IsRecording()) return; + + Logger.LogDebug("Received STOP command"); + var config = ConfigManager.LoadConfig(); + Notifications.PlaySound(config.StopSoundPath); + Notifications.Notify("Toak", "Transcribing..."); + + AudioRecorder.StopRecording(); + + var wavPath = AudioRecorder.GetWavPath(); + if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0) + { + Notifications.Notify("Toak", "No audio recorded."); + return; + } + + try + { + var stopWatch = Stopwatch.StartNew(); + + Logger.LogDebug($"Starting STT via Whisper for {wavPath}..."); + var transcript = await _groqClient!.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel); + + if (string.IsNullOrWhiteSpace(transcript)) + { + Notifications.Notify("Toak", "No speech detected."); + return; + } + + // LLM Refinement + var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills); + string systemPrompt = detectedSkill != null ? detectedSkill.GetSystemPrompt(transcript) : PromptBuilder.BuildPrompt(config); + bool isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution; + + if (isExecutionSkill) + { + var finalText = await _groqClient.RefineTextAsync(transcript, systemPrompt, config.LlmModel); + if (!string.IsNullOrWhiteSpace(finalText)) + { + detectedSkill!.Execute(finalText); + stopWatch.Stop(); + Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms"); + } + } + else + { + Logger.LogDebug("Starting LLM text refinement (streaming)..."); + var tokenStream = _groqClient.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel); + await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend); + stopWatch.Stop(); + Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms"); + } + } + catch (Exception ex) + { + Notifications.Notify("Toak Error", ex.Message); + Logger.LogDebug($"Error during processing: {ex.Message}"); + } + finally + { + if (File.Exists(wavPath)) File.Delete(wavPath); + } + } + + private static void ProcessAbortAsync() + { + Logger.LogDebug("Received ABORT command"); + AudioRecorder.StopRecording(); + var wavPath = AudioRecorder.GetWavPath(); + if (File.Exists(wavPath)) File.Delete(wavPath); + Notifications.Notify("Toak", "Recording Aborted."); + } +} diff --git a/DAEMON_PLAN.md b/DAEMON_PLAN.md new file mode 100644 index 0000000..d0913a2 --- /dev/null +++ b/DAEMON_PLAN.md @@ -0,0 +1,57 @@ +# Toak: Client-Server & PipeWire Architecture Specification + +This document outlines the transition of Toak from a monolithic, ephemeral CLI application to a persistent, low-latency background daemon utilizing Linux Inter-Process Communication (IPC) and PipeWire. + +## 1. System Architecture Overview + +The system is divided into two distinct binaries to separate the heavy runtime environment from the instant-trigger mechanism. + +* **Toak Daemon (`toakd`):** A persistent C# background service. It holds the API connections, memory buffers, and audio routing open. +* **Toak Client (`toak`):** A lightweight, ephemeral trigger executed by the window manager that simply sends signals to the daemon. + +## 2. The Toak Daemon (Server) + +Built as a C# `.NET Hosted Service`, this component runs continuously in the background and manages three primary responsibilities: + +### A. Unix Domain Socket Listener + +* Listens on a secure, user-space socket (e.g., `/run/user/1000/toak.sock`). +* Awaits basic byte-sized instructions from the client (e.g., `START_RECORDING`, `STOP_RECORDING`, `ABORT`). +* Ensures single-instance execution and rejects unauthorized cross-user connections. + +### B. PipeWire Audio Node + +* Connects to the PipeWire graph as a native audio sink. +* Dynamically links to the default system microphone *only* upon receiving the `START_RECORDING` signal. +* Reads the audio stream directly into a pre-allocated C# `MemoryStream` via memory-mapped buffers (zero-copy), requesting the exact format required by the Groq Whisper API (e.g., 16kHz, mono). +* Unlinks from the microphone instantly upon receiving the `STOP_RECORDING` signal, freeing the hardware device. + +### C. State & API Management + +* Maintains a persistent `HttpClient` connection pool to Groq, eliminating TLS handshake overhead for each dictation. +* Triggers the Wayland (`wtype`) or X11 (`xdotool`) typing backend as a child process once the refined transcription is returned. + +## 3. The Toak Client (Trigger) + +A minimal executable designed to be fired by global window manager hotkeys (e.g., Sway, Hyprland, KDE). + +* **Stateless:** Contains no audio logic, API keys, or large library dependencies. +* **Execution:** Connects to the daemon's Unix socket, writes a specific control byte, and exits immediately. +* **Latency:** Execution time is measured in microseconds, preventing any blocking of the desktop compositor's input thread. + +## 4. Deployment & Lifecycle Management + +The daemon is managed by the host's native init system to ensure uptime and clean restarts. + +* **Systemd User Service:** Installed as `~/.config/systemd/user/toak.service`. +* **Lifecycle:** Starts automatically on user login (`default.target`), restarts automatically on failure, and manages its own logging via `journalctl`. +* **Environment:** Inherits the active Wayland/X11 display variables necessary for the typing backends to inject keystrokes into the active window. + +## 5. Execution Flow (The PTT Lifecycle) + +1. **Init:** User logs in. Systemd starts `toakd`. It allocates memory, opens API connections, and begins listening on the Unix socket. +2. **KeyDown:** User holds the Push-to-Talk hotkey. Window manager executes `toak --start`. +3. **Link:** `toakd` receives the signal over the socket and tells PipeWire to link the microphone to its internal buffer. +4. **Dictation:** User speaks. Audio fills the C# `MemoryStream`. +5. **KeyUp:** User releases the hotkey. Window manager executes `toak --stop`. +6. **Unlink & Send:** `toakd` unlinks the microphone, flushes the memory buffer directly to the Groq API, receives the transcription, and executes the typing backend. \ No newline at end of file diff --git a/IO/Notifications.cs b/IO/Notifications.cs index f7113ba..32f0573 100644 --- a/IO/Notifications.cs +++ b/IO/Notifications.cs @@ -4,8 +4,12 @@ namespace Toak.IO; public static class Notifications { + private static bool _notifySendAvailable = true; + public static void Notify(string summary, string body = "") { + if (!_notifySendAvailable) return; + try { var pInfo = new ProcessStartInfo @@ -17,15 +21,22 @@ public static class Notifications }; Process.Start(pInfo); } + catch (System.ComponentModel.Win32Exception) + { + Console.WriteLine("[Notifications] 'notify-send' executable not found. Notifications will be disabled."); + _notifySendAvailable = false; + } catch (Exception ex) { Console.WriteLine($"[Notifications] Failed to send notification: {ex.Message}"); } } + private static bool _paplayAvailable = true; + public static void PlaySound(string soundPath) { - if (string.IsNullOrWhiteSpace(soundPath)) return; + if (!_paplayAvailable || string.IsNullOrWhiteSpace(soundPath)) return; try { var absolutePath = soundPath; @@ -60,6 +71,11 @@ public static class Notifications }; Process.Start(pInfo); } + catch (System.ComponentModel.Win32Exception) + { + Console.WriteLine("[Notifications] 'paplay' executable not found. Sound effects will be disabled."); + _paplayAvailable = false; + } catch (Exception ex) { Console.WriteLine($"[Notifications] Failed to play sound: {ex.Message}"); diff --git a/Program.cs b/Program.cs index 64bff35..31cf216 100644 --- a/Program.cs +++ b/Program.cs @@ -23,6 +23,11 @@ public class Program toggleCmd.SetHandler(ToggleCommand.ExecuteAsync, pipeOption, copyOption, verboseOption); rootCommand.AddCommand(toggleCmd); + // Daemon Command + var daemonCmd = new Command("daemon", "Starts the background background service"); + daemonCmd.SetHandler(Toak.Core.DaemonService.StartAsync, verboseOption); + rootCommand.AddCommand(daemonCmd); + // Discard Command var discardCmd = new Command("discard", "Abort current recording without transcribing"); discardCmd.AddOption(pipeOption); diff --git a/_toak b/_toak index 20fa6de..b4963f8 100644 --- a/_toak +++ b/_toak @@ -13,6 +13,7 @@ _toak() { commands=( 'toggle:Starts or stops the recording' + 'daemon:Starts the background background service' 'discard:Abort current recording without transcribing' 'onboard:Configure the application' 'latency-test:Benchmark full pipeline without recording'