diff --git a/Commands/ConfigUpdaterCommand.cs b/Commands/ConfigUpdaterCommand.cs new file mode 100644 index 0000000..b128ded --- /dev/null +++ b/Commands/ConfigUpdaterCommand.cs @@ -0,0 +1,51 @@ +using System.Threading.Tasks; +using Spectre.Console; +using Toak.Configuration; + +namespace Toak.Commands; + +public static class ConfigUpdaterCommand +{ + public static async Task ExecuteAsync(string key, string val, bool verbose) + { + Toak.Core.Logger.Verbose = verbose; + var config = ConfigManager.LoadConfig(); + key = key.ToLowerInvariant(); + val = val.ToLowerInvariant(); + + switch (key) + { + case "llm": config.LlmModel = val; break; + case "whisper": config.WhisperModel = val; break; + case "style": + if (val == "professional" || val == "concise" || val == "casual") { config.StyleMode = val; } + else { AnsiConsole.MarkupLine("[red]Invalid style.[/] Use: professional, concise, casual"); return; } + break; + case "language": + case "lang": config.WhisperLanguage = val; break; + case "backend": config.TypingBackend = val; break; + case "punctuation": + if (bool.TryParse(val, out var p)) { config.ModulePunctuation = p; } + else { AnsiConsole.MarkupLine("[red]Invalid value. Use true or false.[/]"); return; } + break; + case "tech": + if (bool.TryParse(val, out var t)) { config.ModuleTechnicalSanitization = t; } + else { AnsiConsole.MarkupLine("[red]Invalid value. Use true or false.[/]"); return; } + break; + case "bullets": + if (bool.TryParse(val, out var b)) { config.StructureBulletPoints = b; } + else { AnsiConsole.MarkupLine("[red]Invalid value. Use true or false.[/]"); return; } + break; + case "paragraphs": + if (bool.TryParse(val, out var sp)) { config.StructureSmartParagraphing = sp; } + else { AnsiConsole.MarkupLine("[red]Invalid value. Use true or false.[/]"); return; } + break; + default: + AnsiConsole.MarkupLine($"[red]Unknown config key: {key}[/]"); + return; + } + + ConfigManager.SaveConfig(config); + AnsiConsole.MarkupLine($"[green]Successfully[/] set {key} to [blue]{val}[/]."); + } +} diff --git a/Commands/DiscardCommand.cs b/Commands/DiscardCommand.cs new file mode 100644 index 0000000..da6d181 --- /dev/null +++ b/Commands/DiscardCommand.cs @@ -0,0 +1,35 @@ +using System.IO; +using System.Threading.Tasks; +using Spectre.Console; +using Toak.Audio; +using Toak.Core; +using Toak.IO; + +namespace Toak.Commands; + +public static class DiscardCommand +{ + public static async Task ExecuteAsync(bool pipeToStdout, bool verbose) + { + Logger.Verbose = verbose; + + if (StateTracker.IsRecording()) + { + AudioRecorder.StopRecording(); + var wavPath = AudioRecorder.GetWavPath(); + if (File.Exists(wavPath)) File.Delete(wavPath); + Notifications.Notify("Toak", "Recording discarded"); + if (!pipeToStdout) + { + AnsiConsole.MarkupLine("[yellow]Recording discarded.[/]"); + } + } + else + { + if (!pipeToStdout) + { + AnsiConsole.MarkupLine("[grey]No active recording to discard.[/]"); + } + } + } +} diff --git a/Commands/LatencyTestCommand.cs b/Commands/LatencyTestCommand.cs new file mode 100644 index 0000000..bd19c29 --- /dev/null +++ b/Commands/LatencyTestCommand.cs @@ -0,0 +1,95 @@ +using System; +using System.Diagnostics; +using System.IO; +using System.Threading.Tasks; +using Spectre.Console; +using Toak.Api; +using Toak.Configuration; +using Toak.Core; + +namespace Toak.Commands; + +public static class LatencyTestCommand +{ + public static async Task ExecuteAsync(bool verbose) + { + Logger.Verbose = verbose; + var config = ConfigManager.LoadConfig(); + if (string.IsNullOrWhiteSpace(config.GroqApiKey)) + { + AnsiConsole.MarkupLine("[red]Groq API Key is not configured.[/] Run 'toak onboard'."); + return; + } + + AnsiConsole.MarkupLine("Generating 1-second silent audio file for testing..."); + var testWavPath = Path.Combine(Path.GetTempPath(), "toak_latency_test.wav"); + + var pInfo = new ProcessStartInfo + { + FileName = "ffmpeg", + Arguments = $"-f lavfi -i anullsrc=r=44100:cl=mono -t 1 -y {testWavPath}", + UseShellExecute = false, + CreateNoWindow = true, + RedirectStandardError = true, + RedirectStandardOutput = true + }; + var proc = Process.Start(pInfo); + proc?.WaitForExit(); + + if (!File.Exists(testWavPath)) + { + AnsiConsole.MarkupLine("[red]Failed to generate test audio file using ffmpeg.[/]"); + return; + } + + var groq = new GroqApiClient(config.GroqApiKey); + + try + { + await AnsiConsole.Status() + .StartAsync("Running latency test...", async ctx => + { + ctx.Status("Testing STT (Whisper)..."); + var sttWatch = Stopwatch.StartNew(); + var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel); + sttWatch.Stop(); + + ctx.Status("Testing LLM (Llama)..."); + var systemPrompt = PromptBuilder.BuildPrompt(config); + var llmWatch = Stopwatch.StartNew(); + var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel); + llmWatch.Stop(); + + var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds; + + AnsiConsole.WriteLine(); + var table = new Table(); + table.AddColumn("Operation"); + table.AddColumn("Latency (ms)"); + + table.AddRow("STT", sttWatch.ElapsedMilliseconds.ToString()); + table.AddRow("LLM", llmWatch.ElapsedMilliseconds.ToString()); + table.AddRow("[bold]Total[/]", $"[bold]{total}ms[/]"); + + AnsiConsole.Write(table); + + if (total < 1500) + { + AnsiConsole.MarkupLine($"[green]Status: OK (under 1.5s target). Total time: {(total / 1000.0):0.0}s.[/]"); + } + else + { + AnsiConsole.MarkupLine($"[yellow]Status: SLOW (over 1.5s target). Total time: {(total / 1000.0):0.0}s.[/]"); + } + }); + } + catch (Exception ex) + { + AnsiConsole.MarkupLine($"[red]Error during test: {ex.Message}[/]"); + } + finally + { + if (File.Exists(testWavPath)) File.Delete(testWavPath); + } + } +} diff --git a/Commands/OnboardCommand.cs b/Commands/OnboardCommand.cs new file mode 100644 index 0000000..985811f --- /dev/null +++ b/Commands/OnboardCommand.cs @@ -0,0 +1,74 @@ +using System; +using System.Linq; +using System.Threading.Tasks; +using Spectre.Console; +using Toak.Configuration; +using Toak.Core.Skills; + +namespace Toak.Commands; + +public static class OnboardCommand +{ + public static async Task ExecuteAsync(bool verbose) + { + Toak.Core.Logger.Verbose = verbose; + var config = ConfigManager.LoadConfig(); + + AnsiConsole.Write(new FigletText("Toak").Color(Color.Green)); + AnsiConsole.MarkupLine("[grey]Welcome to the Toak configuration wizard.[/]"); + AnsiConsole.WriteLine(); + + config.GroqApiKey = AnsiConsole.Prompt( + new TextPrompt("Groq API Key:") + .DefaultValue(string.IsNullOrWhiteSpace(config.GroqApiKey) ? "" : config.GroqApiKey) + .AllowEmpty()); + + config.LlmModel = AnsiConsole.Prompt( + new SelectionPrompt() + .Title("Select [green]LLM Model[/]:") + .AddChoices(new[] { "openai/gpt-oss-20b", "llama-3.1-8b-instant" }) + .UseConverter(c => c == "openai/gpt-oss-20b" ? "openai/gpt-oss-20b (Fastest)" : "llama-3.1-8b-instant (Cheapest)")); + + if (config.LlmModel.Contains(" ")) config.LlmModel = config.LlmModel.Split(' ')[0]; + + config.WhisperModel = AnsiConsole.Prompt( + new SelectionPrompt() + .Title("Select [green]Whisper Model[/]:") + .AddChoices(new[] { "whisper-large-v3", "whisper-large-v3-turbo" }) + .UseConverter(c => c == "whisper-large-v3" ? "whisper-large-v3 (Accurate)" : "whisper-large-v3-turbo (Fast)")); + + config.WhisperLanguage = AnsiConsole.Prompt( + new TextPrompt("Microphone Spoken Language (e.g. en, es, zh):") + .DefaultValue(string.IsNullOrWhiteSpace(config.WhisperLanguage) ? "en" : config.WhisperLanguage) + .AllowEmpty() + .Validate(lang => + { + if (string.IsNullOrWhiteSpace(lang)) return ValidationResult.Success(); + if (lang.Contains(",") || lang.Contains(" ")) + return ValidationResult.Error("[red]Please provide only one language code (e.g., 'en' not 'en, es')[/]"); + + return ValidationResult.Success(); + })); + + config.TypingBackend = AnsiConsole.Prompt( + new SelectionPrompt() + .Title("Select [green]Typing Backend[/]:") + .AddChoices(new[] { "wtype", "xdotool" })); + + var availableSkills = SkillRegistry.AllSkills.Select(s => s.Name).ToList(); + + if (availableSkills.Any()) + { + config.ActiveSkills = AnsiConsole.Prompt( + new MultiSelectionPrompt() + .Title("Select [green]Active Skills[/]:") + .NotRequired() + .InstructionsText("[grey](Press [blue][/] to toggle a skill, [green][/] to accept)[/]") + .AddChoices(availableSkills)); + } + + ConfigManager.SaveConfig(config); + + AnsiConsole.MarkupLine("\n[bold green]Configuration saved successfully![/]"); + } +} diff --git a/Commands/ShowCommand.cs b/Commands/ShowCommand.cs new file mode 100644 index 0000000..2621372 --- /dev/null +++ b/Commands/ShowCommand.cs @@ -0,0 +1,32 @@ +using System.Threading.Tasks; +using Spectre.Console; +using Toak.Configuration; + +namespace Toak.Commands; + +public static class ShowCommand +{ + public static async Task ExecuteAsync(bool verbose) + { + Toak.Core.Logger.Verbose = verbose; + var config = ConfigManager.LoadConfig(); + + var table = new Table(); + table.AddColumn("Setting"); + table.AddColumn("Value"); + + table.AddRow("Groq API Key", string.IsNullOrEmpty(config.GroqApiKey) ? "[red]Not Set[/]" : "[green]Set[/]"); + table.AddRow("LLM Model", $"[blue]{config.LlmModel}[/]"); + table.AddRow("Whisper Model", $"[blue]{config.WhisperModel}[/]"); + table.AddRow("Spoken Language", $"[yellow]{(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}[/]"); + table.AddRow("Typing Backend", config.TypingBackend); + table.AddRow("Active Skills", string.Join(", ", config.ActiveSkills)); + table.AddRow("Style Mode", config.StyleMode); + table.AddRow("Punctuation Module", config.ModulePunctuation.ToString()); + table.AddRow("Technical Sanitization", config.ModuleTechnicalSanitization.ToString()); + table.AddRow("Bullet Points", config.StructureBulletPoints.ToString()); + table.AddRow("Smart Paragraphing", config.StructureSmartParagraphing.ToString()); + + AnsiConsole.Write(table); + } +} diff --git a/Commands/ToggleCommand.cs b/Commands/ToggleCommand.cs new file mode 100644 index 0000000..886c701 --- /dev/null +++ b/Commands/ToggleCommand.cs @@ -0,0 +1,159 @@ +using System; +using System.Diagnostics; +using System.IO; +using System.Threading.Tasks; +using Spectre.Console; +using Toak.Audio; +using Toak.Configuration; +using Toak.Api; +using Toak.Core; +using Toak.IO; + +namespace Toak.Commands; + +public static class ToggleCommand +{ + public static async Task ExecuteAsync(bool pipeToStdout, bool copyToClipboard, bool verbose) + { + Logger.Verbose = verbose; + + if (StateTracker.IsRecording()) + { + var config = ConfigManager.LoadConfig(); + Notifications.PlaySound(config.StopSoundPath); + + if (!pipeToStdout) AnsiConsole.MarkupLine("[yellow]Stopping recording and transcribing...[/]"); + if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing..."); + + AudioRecorder.StopRecording(); + + Logger.LogDebug($"Loaded configuration: LLM={config.LlmModel}, Whisper={config.WhisperModel}, Typing={config.TypingBackend}"); + + if (string.IsNullOrWhiteSpace(config.GroqApiKey)) + { + Notifications.Notify("Toak Error", "Groq API Key is not configured. Run 'toak onboard'."); + AnsiConsole.MarkupLine("[red]Groq API Key is not configured.[/] Run 'toak onboard'."); + return; + } + + var groq = new GroqApiClient(config.GroqApiKey); + var wavPath = AudioRecorder.GetWavPath(); + + if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0) + { + if (!pipeToStdout) Notifications.Notify("Toak", "No audio recorded."); + return; + } + + try + { + var stopWatch = Stopwatch.StartNew(); + + // 1. STT + Logger.LogDebug($"Starting STT transcription via Whisper for {wavPath}..."); + + string transcript = string.Empty; + + if (!pipeToStdout) + { + await AnsiConsole.Status().StartAsync("Transcribing...", async ctx => { + transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel); + }); + } + else + { + transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel); + } + + Logger.LogDebug($"Raw transcript received: '{transcript}'"); + + if (string.IsNullOrWhiteSpace(transcript)) + { + if (!pipeToStdout) Notifications.Notify("Toak", "No speech detected."); + return; + } + + // 2. LLM Refinement + var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills); + string systemPrompt; + if (detectedSkill != null) + { + Logger.LogDebug($"Skill detected: {detectedSkill.Name}"); + if (!pipeToStdout) Notifications.Notify("Toak Skill Detected", detectedSkill.Name); + systemPrompt = detectedSkill.GetSystemPrompt(transcript); + } + else + { + systemPrompt = PromptBuilder.BuildPrompt(config); + } + + bool isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution; + + // 3. Output + if (isExecutionSkill || pipeToStdout || copyToClipboard) + { + Logger.LogDebug("Starting LLM text refinement (synchronous)..."); + + string finalText = string.Empty; + if (!pipeToStdout) { + await AnsiConsole.Status().StartAsync("Refining text...", async ctx => { + finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); + }); + } else { + finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); + } + + Logger.LogDebug($"Refined text received: '{finalText}'"); + if (string.IsNullOrWhiteSpace(finalText)) + { + if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio."); + return; + } + + if (isExecutionSkill) + { + detectedSkill!.Execute(finalText); + stopWatch.Stop(); + Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms"); + } + else if (pipeToStdout) + { + Console.WriteLine(finalText); + } + else + { + ClipboardManager.Copy(finalText); + stopWatch.Stop(); + Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms"); + } + } + else + { + Logger.LogDebug("Starting LLM text refinement (streaming)..."); + var tokenStream = groq.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel); + Logger.LogDebug("Starting to inject text..."); + await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend); + stopWatch.Stop(); + Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms"); + } + } + catch (Exception ex) + { + if (!pipeToStdout) Notifications.Notify("Toak Error", ex.Message); + if (!pipeToStdout) AnsiConsole.MarkupLine($"[red]Error:[/] {ex.Message}"); + } + finally + { + if (File.Exists(wavPath)) File.Delete(wavPath); + } + } + else + { + // Start recording + if (!pipeToStdout) AnsiConsole.MarkupLine("[green]Starting recording...[/]"); + var config = ConfigManager.LoadConfig(); + Notifications.PlaySound(config.StartSoundPath); + AudioRecorder.StartRecording(); + } + } +} diff --git a/Program.cs b/Program.cs index 8dd017c..64bff35 100644 --- a/Program.cs +++ b/Program.cs @@ -1,383 +1,58 @@ -using System.Diagnostics; -using Toak.Audio; -using Toak.Configuration; -using Toak.Api; -using Toak.Core; -using Toak.IO; +using System.CommandLine; +using System.Threading.Tasks; +using Toak.Commands; -bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p"); -bool copyToClipboard = args.Contains("--copy"); -bool verbose = args.Contains("-v") || args.Contains("--verbose"); +namespace Toak; -Logger.Verbose = verbose; - - -string command = ""; -if (args.Length > 0 && !args[0].StartsWith("-")) +public class Program { - command = args[0]; -} - -if (args.Contains("-h") || args.Contains("--help") || (string.IsNullOrEmpty(command) && args.Length == 0)) -{ - Console.WriteLine("Toak: High-speed Linux Dictation"); - Console.WriteLine("Usage:"); - Console.WriteLine(" toak toggle - Starts or stops the recording"); - Console.WriteLine(" toak discard - Abort current recording without transcribing"); - Console.WriteLine(" toak onboard - Configure the application"); - Console.WriteLine(" toak latency-test - Benchmark full pipeline without recording"); - Console.WriteLine(" toak config - Update a specific configuration setting"); - Console.WriteLine(" toak show - Show current configuration"); - Console.WriteLine("Flags:"); - Console.WriteLine(" -h, --help - Show this help message"); - Console.WriteLine(" -p, --pipe - Output transcription to stdout instead of typing"); - Console.WriteLine(" --copy - Copy to clipboard instead of typing"); - Console.WriteLine(" -v, --verbose - Enable detailed debug logging"); - return; -} - -if (string.IsNullOrEmpty(command)) -{ - Console.WriteLine("Error: Please specify a command (e.g. 'toggle'). Use 'toak --help' for usage."); - return; -} -if (command == "onboard") -{ - var config = ConfigManager.LoadConfig(); - Console.Write($"Groq API Key [{config.GroqApiKey}]: "); - var key = Console.ReadLine(); - if (!string.IsNullOrWhiteSpace(key)) config.GroqApiKey = key; - - Console.WriteLine(); - Console.WriteLine("LLM Model:"); - Console.WriteLine(" 1) openai/gpt-oss-20b -- fastest"); - Console.WriteLine(" 2) llama-3.1-8b-instant -- cheapest, but dumb"); - Console.Write($"Select 1 or 2 [{config.LlmModel}]: "); - var llmSelection = Console.ReadLine()?.Trim(); - if (llmSelection == "1" || llmSelection == "openai/gpt-oss-20b") config.LlmModel = "openai/gpt-oss-20b"; - else if (llmSelection == "2" || llmSelection == "llama-3.1-8b-instant") config.LlmModel = "llama-3.1-8b-instant"; - - Console.WriteLine(); - Console.WriteLine("Whisper Model:"); - Console.WriteLine(" 1) whisper-large-v3 -- large model, very accurate"); - Console.WriteLine(" 2) whisper-large-v3-turbo -- very fast, a bit less accurate"); - Console.Write($"Select 1 or 2 [{config.WhisperModel}]: "); - var whisperSelection = Console.ReadLine()?.Trim(); - if (whisperSelection == "1" || whisperSelection == "whisper-large-v3") config.WhisperModel = "whisper-large-v3"; - else if (whisperSelection == "2" || whisperSelection == "whisper-large-v3-turbo") config.WhisperModel = "whisper-large-v3-turbo"; - - Console.WriteLine(); - Console.Write($"Microphone Spoken Language (e.g. en, es, zh) [{config.WhisperLanguage}]: "); - var lang = Console.ReadLine(); - if (!string.IsNullOrWhiteSpace(lang)) config.WhisperLanguage = lang.ToLowerInvariant(); - - Console.Write($"Typing Backend (xdotool or wtype) [{config.TypingBackend}]: "); - var backend = Console.ReadLine(); - if (!string.IsNullOrWhiteSpace(backend)) config.TypingBackend = backend.ToLowerInvariant(); - - Console.WriteLine(); - var availableSkills = Toak.Core.Skills.SkillRegistry.AllSkills.Select(s => s.Name); - Console.WriteLine($"Active Skills (comma separated) [{string.Join(", ", config.ActiveSkills)}]:"); - Console.WriteLine($" Available: {string.Join(", ", availableSkills)}"); - Console.Write("Selection: "); - var skillsInput = Console.ReadLine(); - if (!string.IsNullOrWhiteSpace(skillsInput)) + public static async Task Main(string[] args) { - config.ActiveSkills = skillsInput.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); - } + var rootCommand = new RootCommand("Toak: High-speed Linux Dictation"); - ConfigManager.SaveConfig(config); - Console.WriteLine("Configuration saved."); - return; -} + var pipeOption = new Option(new[] { "--pipe", "-p" }, "Output transcription to stdout instead of typing"); + var copyOption = new Option("--copy", "Copy to clipboard instead of typing"); + var verboseOption = new Option(new[] { "--verbose", "-v" }, "Enable detailed debug logging"); -if (command == "show") -{ - var config = ConfigManager.LoadConfig(); - Console.WriteLine("Current Configuration:"); - Console.WriteLine($" Groq API Key: {(string.IsNullOrEmpty(config.GroqApiKey) ? "Not Set" : "Set")}"); - Console.WriteLine($" LLM Model: {config.LlmModel}"); - Console.WriteLine($" Whisper Model: {config.WhisperModel}"); - Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}"); - Console.WriteLine($" Typing Backend: {config.TypingBackend}"); - Console.WriteLine($" Active Skills: {string.Join(", ", config.ActiveSkills)}"); - Console.WriteLine($" Style Mode: {config.StyleMode}"); - Console.WriteLine($" Punctuation Module: {config.ModulePunctuation}"); - Console.WriteLine($" Technical Sanitization: {config.ModuleTechnicalSanitization}"); - Console.WriteLine($" Bullet Points: {config.StructureBulletPoints}"); - Console.WriteLine($" Smart Paragraphing: {config.StructureSmartParagraphing}"); - return; -} + rootCommand.AddGlobalOption(verboseOption); -if (command == "config") -{ - var argsNoFlags = args.Where(a => !a.StartsWith("--")).ToArray(); - if (argsNoFlags.Length < 3) - { - Console.WriteLine("Usage: toak config "); - Console.WriteLine("Keys: llm, whisper, style, language, backend, punctuation, tech, bullets, paragraphs"); - return; - } - - var key = argsNoFlags[1].ToLowerInvariant(); - var val = argsNoFlags[2].ToLowerInvariant(); - var config = ConfigManager.LoadConfig(); - - switch (key) - { - case "llm": - config.LlmModel = val; - Console.WriteLine($"LLM Model set to {val}"); - break; - case "whisper": - config.WhisperModel = val; - Console.WriteLine($"Whisper Model set to {val}"); - break; - case "style": - if (val == "professional" || val == "concise" || val == "casual") { - config.StyleMode = val; - Console.WriteLine($"StyleMode set to {val}"); - } else { - Console.WriteLine("Invalid style. Use: professional, concise, casual"); - } - break; - case "language": - case "lang": - config.WhisperLanguage = val; - Console.WriteLine($"Spoken Language set to {val}"); - break; - case "backend": - config.TypingBackend = val; - Console.WriteLine($"TypingBackend set to {val}"); - break; - case "punctuation": - if (bool.TryParse(val, out var p)) { config.ModulePunctuation = p; Console.WriteLine($"Punctuation set to {p}"); } - else Console.WriteLine("Invalid value. Use true or false."); - break; - case "tech": - if (bool.TryParse(val, out var t)) { config.ModuleTechnicalSanitization = t; Console.WriteLine($"TechnicalSanitization set to {t}"); } - else Console.WriteLine("Invalid value. Use true or false."); - break; - case "bullets": - if (bool.TryParse(val, out var b)) { config.StructureBulletPoints = b; Console.WriteLine($"BulletPoints set to {b}"); } - else Console.WriteLine("Invalid value. Use true or false."); - break; - case "paragraphs": - if (bool.TryParse(val, out var sp)) { config.StructureSmartParagraphing = sp; Console.WriteLine($"SmartParagraphing set to {sp}"); } - else Console.WriteLine("Invalid value. Use true or false."); - break; - default: - Console.WriteLine($"Unknown config key: {key}"); - return; - } - ConfigManager.SaveConfig(config); - return; -} + // Toggle Command + var toggleCmd = new Command("toggle", "Starts or stops the recording"); + toggleCmd.AddOption(pipeOption); + toggleCmd.AddOption(copyOption); + toggleCmd.SetHandler(ToggleCommand.ExecuteAsync, pipeOption, copyOption, verboseOption); + rootCommand.AddCommand(toggleCmd); -if (command == "discard") -{ - if (StateTracker.IsRecording()) - { - AudioRecorder.StopRecording(); - var wavPath = AudioRecorder.GetWavPath(); - if (File.Exists(wavPath)) File.Delete(wavPath); - Notifications.Notify("Toak", "Recording discarded"); - if (!pipeToStdout) Console.WriteLine("Recording discarded."); - } - else - { - if (!pipeToStdout) Console.WriteLine("No active recording to discard."); - } - return; -} + // Discard Command + var discardCmd = new Command("discard", "Abort current recording without transcribing"); + discardCmd.AddOption(pipeOption); + discardCmd.SetHandler(DiscardCommand.ExecuteAsync, pipeOption, verboseOption); + rootCommand.AddCommand(discardCmd); -if (command == "latency-test") -{ - var config = ConfigManager.LoadConfig(); - if (string.IsNullOrWhiteSpace(config.GroqApiKey)) - { - Console.WriteLine("Groq API Key is not configured. Run 'toak onboard'."); - return; - } + // Onboard Command + var onboardCmd = new Command("onboard", "Configure the application"); + onboardCmd.SetHandler(OnboardCommand.ExecuteAsync, verboseOption); + rootCommand.AddCommand(onboardCmd); - Console.WriteLine("Generating 1-second silent audio file for testing..."); - var testWavPath = Path.Combine(Path.GetTempPath(), "toak_latency_test.wav"); - - var pInfo = new ProcessStartInfo - { - FileName = "ffmpeg", - Arguments = $"-f lavfi -i anullsrc=r=44100:cl=mono -t 1 -y {testWavPath}", - UseShellExecute = false, - CreateNoWindow = true, - RedirectStandardError = true, - RedirectStandardOutput = true - }; - var proc = Process.Start(pInfo); - proc?.WaitForExit(); + // Latency Test Command + var latencyCmd = new Command("latency-test", "Benchmark full pipeline without recording"); + latencyCmd.SetHandler(LatencyTestCommand.ExecuteAsync, verboseOption); + rootCommand.AddCommand(latencyCmd); - if (!File.Exists(testWavPath)) - { - Console.WriteLine("Failed to generate test audio file using ffmpeg."); - return; - } + // Show Command + var showCmd = new Command("show", "Show current configuration"); + showCmd.SetHandler(ShowCommand.ExecuteAsync, verboseOption); + rootCommand.AddCommand(showCmd); - var groq = new GroqApiClient(config.GroqApiKey); - - try - { - Console.WriteLine("Testing STT (Whisper)..."); - var sttWatch = Stopwatch.StartNew(); - var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel); - sttWatch.Stop(); - - Console.WriteLine("Testing LLM (Llama)..."); - var systemPrompt = PromptBuilder.BuildPrompt(config); - var llmWatch = Stopwatch.StartNew(); - var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel); - llmWatch.Stop(); + // Config Command + var configCmd = new Command("config", "Update a specific configuration setting"); + var keyArg = new Argument("key", "Configuration key (e.g., llm, whisper, lang)"); + var valArg = new Argument("value", "Configuration value"); + configCmd.AddArgument(keyArg); + configCmd.AddArgument(valArg); + configCmd.SetHandler(ConfigUpdaterCommand.ExecuteAsync, keyArg, valArg, verboseOption); + rootCommand.AddCommand(configCmd); - var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds; - - Console.WriteLine(); - Console.WriteLine($"STT latency: {sttWatch.ElapsedMilliseconds}ms"); - Console.WriteLine($"LLM latency: {llmWatch.ElapsedMilliseconds}ms"); - Console.WriteLine($"Total: {(total / 1000.0):0.0}s ({total}ms)"); - Console.WriteLine($"Status: {(total < 1500 ? "OK (under 1.5s target)" : "SLOW (over 1.5s target)")}"); - } - catch (Exception ex) - { - Console.WriteLine($"Error during test: {ex.Message}"); - } - finally - { - if (File.Exists(testWavPath)) File.Delete(testWavPath); - } - - return; -} - -if (command == "toggle") -{ - if (StateTracker.IsRecording()) - { - var config = ConfigManager.LoadConfig(); - Notifications.PlaySound(config.StopSoundPath); - - if (!pipeToStdout) Console.WriteLine("Stopping recording and transcribing..."); - if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing..."); - - AudioRecorder.StopRecording(); - - Logger.LogDebug($"Loaded configuration: LLM={config.LlmModel}, Whisper={config.WhisperModel}, Typing={config.TypingBackend}"); - - if (string.IsNullOrWhiteSpace(config.GroqApiKey)) - { - Notifications.Notify("Toak Error", "Groq API Key is not configured. Run 'toak onboard'."); - return; - } - - var groq = new GroqApiClient(config.GroqApiKey); - var wavPath = AudioRecorder.GetWavPath(); - - if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0) - { - if (!pipeToStdout) Notifications.Notify("Toak", "No audio recorded."); - return; - } - - try - { - var stopWatch = Stopwatch.StartNew(); - - // 1. STT - Logger.LogDebug($"Starting STT transcription via Whisper for {wavPath}..."); - var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel); - Logger.LogDebug($"Raw transcript received: '{transcript}'"); - - if (string.IsNullOrWhiteSpace(transcript)) - { - if (!pipeToStdout) Notifications.Notify("Toak", "No speech detected."); - return; - } - - // 2. LLM Refinement - var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills); - string systemPrompt; - if (detectedSkill != null) - { - Logger.LogDebug($"Skill detected: {detectedSkill.Name}"); - if (!pipeToStdout) Notifications.Notify("Toak Skill Detected", detectedSkill.Name); - systemPrompt = detectedSkill.GetSystemPrompt(transcript); - } - else - { - systemPrompt = PromptBuilder.BuildPrompt(config); - } - - // 3. Output - if (detectedSkill != null && detectedSkill.HandlesExecution) - { - Logger.LogDebug($"Executing skill synchronously: {detectedSkill.Name}"); - string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); - Logger.LogDebug($"Skill refined text: '{finalText}'"); - if (string.IsNullOrWhiteSpace(finalText)) - { - if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio."); - return; - } - - detectedSkill.Execute(finalText); - stopWatch.Stop(); - Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms"); - } - else if (pipeToStdout || copyToClipboard) - { - Logger.LogDebug("Starting LLM text refinement (synchronous)..."); - string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); - Logger.LogDebug($"Refined text received: '{finalText}'"); - if (string.IsNullOrWhiteSpace(finalText)) - { - if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio."); - return; - } - - if (pipeToStdout) - { - Console.WriteLine(finalText); - } - else - { - ClipboardManager.Copy(finalText); - stopWatch.Stop(); - Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms"); - } - } - else - { - Logger.LogDebug("Starting LLM text refinement (streaming)..."); - var tokenStream = groq.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel); - Logger.LogDebug("Starting to inject text..."); - await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend); - stopWatch.Stop(); - Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms"); - } - } - catch (Exception ex) - { - if (!pipeToStdout) Notifications.Notify("Toak Error", ex.Message); - if (!pipeToStdout) Console.WriteLine(ex.ToString()); - } - finally - { - if (File.Exists(wavPath)) File.Delete(wavPath); - } - } - else - { - // Start recording - if (!pipeToStdout) Console.WriteLine("Starting recording..."); - var config = ConfigManager.LoadConfig(); - Notifications.PlaySound(config.StartSoundPath); - AudioRecorder.StartRecording(); + return await rootCommand.InvokeAsync(args); } } diff --git a/Toak.csproj b/Toak.csproj index 1b7c082..c806a24 100644 --- a/Toak.csproj +++ b/Toak.csproj @@ -12,4 +12,9 @@ + + + + +