using System.Diagnostics; using Toak.Audio; using Toak.Configuration; using Toak.Api; using Toak.Core; using Toak.IO; bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p") || Console.IsOutputRedirected; bool copyToClipboard = args.Contains("--copy"); string command = ""; if (args.Length > 0 && !args[0].StartsWith("-")) { command = args[0]; } if (args.Contains("-h") || args.Contains("--help") || (string.IsNullOrEmpty(command) && args.Length == 0)) { Console.WriteLine("Toak: High-speed Linux Dictation"); Console.WriteLine("Usage:"); Console.WriteLine(" toak toggle - Starts or stops the recording"); Console.WriteLine(" toak discard - Abort current recording without transcribing"); Console.WriteLine(" toak onboard - Configure the application"); Console.WriteLine(" toak latency-test - Benchmark full pipeline without recording"); Console.WriteLine(" toak config - Update a specific configuration setting"); Console.WriteLine(" toak show - Show current configuration"); Console.WriteLine("Flags:"); Console.WriteLine(" -h, --help - Show this help message"); Console.WriteLine(" -p, --pipe - Output transcription to stdout instead of typing"); Console.WriteLine(" --copy - Copy to clipboard instead of typing"); return; } if (string.IsNullOrEmpty(command)) { Console.WriteLine("Error: Please specify a command (e.g. 'toggle'). Use 'toak --help' for usage."); return; } if (command == "onboard") { var config = ConfigManager.LoadConfig(); Console.Write($"Groq API Key [{config.GroqApiKey}]: "); var key = Console.ReadLine(); if (!string.IsNullOrWhiteSpace(key)) config.GroqApiKey = key; Console.WriteLine(); Console.WriteLine("LLM Model:"); Console.WriteLine(" 1) openai/gpt-oss-20b -- fastest"); Console.WriteLine(" 2) llama-3.1-8b-instant -- cheapest, but dumb"); Console.Write($"Select 1 or 2 [{config.LlmModel}]: "); var llmSelection = Console.ReadLine()?.Trim(); if (llmSelection == "1" || llmSelection == "openai/gpt-oss-20b") config.LlmModel = "openai/gpt-oss-20b"; else if (llmSelection == "2" || llmSelection == "llama-3.1-8b-instant") config.LlmModel = "llama-3.1-8b-instant"; Console.WriteLine(); Console.WriteLine("Whisper Model:"); Console.WriteLine(" 1) whisper-large-v3 -- large model, very accurate"); Console.WriteLine(" 2) whisper-large-v3-turbo -- very fast, a bit less accurate"); Console.Write($"Select 1 or 2 [{config.WhisperModel}]: "); var whisperSelection = Console.ReadLine()?.Trim(); if (whisperSelection == "1" || whisperSelection == "whisper-large-v3") config.WhisperModel = "whisper-large-v3"; else if (whisperSelection == "2" || whisperSelection == "whisper-large-v3-turbo") config.WhisperModel = "whisper-large-v3-turbo"; Console.WriteLine(); Console.Write($"Microphone Spoken Language (e.g. en, es, zh) [{config.WhisperLanguage}]: "); var lang = Console.ReadLine(); if (!string.IsNullOrWhiteSpace(lang)) config.WhisperLanguage = lang.ToLowerInvariant(); Console.Write($"Typing Backend (xdotool or wtype) [{config.TypingBackend}]: "); var backend = Console.ReadLine(); if (!string.IsNullOrWhiteSpace(backend)) config.TypingBackend = backend.ToLowerInvariant(); ConfigManager.SaveConfig(config); Console.WriteLine("Configuration saved."); return; } if (command == "show") { var config = ConfigManager.LoadConfig(); Console.WriteLine("Current Configuration:"); Console.WriteLine($" Groq API Key: {(string.IsNullOrEmpty(config.GroqApiKey) ? "Not Set" : "Set")}"); Console.WriteLine($" LLM Model: {config.LlmModel}"); Console.WriteLine($" Whisper Model: {config.WhisperModel}"); Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}"); Console.WriteLine($" Typing Backend: {config.TypingBackend}"); Console.WriteLine($" Style Mode: {config.StyleMode}"); Console.WriteLine($" Punctuation Module: {config.ModulePunctuation}"); Console.WriteLine($" Technical Sanitization: {config.ModuleTechnicalSanitization}"); Console.WriteLine($" Bullet Points: {config.StructureBulletPoints}"); Console.WriteLine($" Smart Paragraphing: {config.StructureSmartParagraphing}"); return; } if (command == "config") { var argsNoFlags = args.Where(a => !a.StartsWith("--")).ToArray(); if (argsNoFlags.Length < 3) { Console.WriteLine("Usage: toak config "); Console.WriteLine("Keys: llm, whisper, style, language, backend, punctuation, tech, bullets, paragraphs"); return; } var key = argsNoFlags[1].ToLowerInvariant(); var val = argsNoFlags[2].ToLowerInvariant(); var config = ConfigManager.LoadConfig(); switch (key) { case "llm": config.LlmModel = val; Console.WriteLine($"LLM Model set to {val}"); break; case "whisper": config.WhisperModel = val; Console.WriteLine($"Whisper Model set to {val}"); break; case "style": if (val == "professional" || val == "concise" || val == "casual") { config.StyleMode = val; Console.WriteLine($"StyleMode set to {val}"); } else { Console.WriteLine("Invalid style. Use: professional, concise, casual"); } break; case "language": case "lang": config.WhisperLanguage = val; Console.WriteLine($"Spoken Language set to {val}"); break; case "backend": config.TypingBackend = val; Console.WriteLine($"TypingBackend set to {val}"); break; case "punctuation": if (bool.TryParse(val, out var p)) { config.ModulePunctuation = p; Console.WriteLine($"Punctuation set to {p}"); } else Console.WriteLine("Invalid value. Use true or false."); break; case "tech": if (bool.TryParse(val, out var t)) { config.ModuleTechnicalSanitization = t; Console.WriteLine($"TechnicalSanitization set to {t}"); } else Console.WriteLine("Invalid value. Use true or false."); break; case "bullets": if (bool.TryParse(val, out var b)) { config.StructureBulletPoints = b; Console.WriteLine($"BulletPoints set to {b}"); } else Console.WriteLine("Invalid value. Use true or false."); break; case "paragraphs": if (bool.TryParse(val, out var sp)) { config.StructureSmartParagraphing = sp; Console.WriteLine($"SmartParagraphing set to {sp}"); } else Console.WriteLine("Invalid value. Use true or false."); break; default: Console.WriteLine($"Unknown config key: {key}"); return; } ConfigManager.SaveConfig(config); return; } if (command == "discard") { if (StateTracker.IsRecording()) { AudioRecorder.StopRecording(); var wavPath = AudioRecorder.GetWavPath(); if (File.Exists(wavPath)) File.Delete(wavPath); Notifications.Notify("Toak", "Recording discarded"); if (!pipeToStdout) Console.WriteLine("Recording discarded."); } else { if (!pipeToStdout) Console.WriteLine("No active recording to discard."); } return; } if (command == "latency-test") { var config = ConfigManager.LoadConfig(); if (string.IsNullOrWhiteSpace(config.GroqApiKey)) { Console.WriteLine("Groq API Key is not configured. Run 'toak onboard'."); return; } Console.WriteLine("Generating 1-second silent audio file for testing..."); var testWavPath = Path.Combine(Path.GetTempPath(), "toak_latency_test.wav"); var pInfo = new ProcessStartInfo { FileName = "ffmpeg", Arguments = $"-f lavfi -i anullsrc=r=44100:cl=mono -t 1 -y {testWavPath}", UseShellExecute = false, CreateNoWindow = true, RedirectStandardError = true, RedirectStandardOutput = true }; var proc = Process.Start(pInfo); proc?.WaitForExit(); if (!File.Exists(testWavPath)) { Console.WriteLine("Failed to generate test audio file using ffmpeg."); return; } var groq = new GroqApiClient(config.GroqApiKey); try { Console.WriteLine("Testing STT (Whisper)..."); var sttWatch = Stopwatch.StartNew(); var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel); sttWatch.Stop(); Console.WriteLine("Testing LLM (Llama)..."); var systemPrompt = PromptBuilder.BuildPrompt(config); var llmWatch = Stopwatch.StartNew(); var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel); llmWatch.Stop(); var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds; Console.WriteLine(); Console.WriteLine($"STT latency: {sttWatch.ElapsedMilliseconds}ms"); Console.WriteLine($"LLM latency: {llmWatch.ElapsedMilliseconds}ms"); Console.WriteLine($"Total: {(total / 1000.0):0.0}s ({total}ms)"); Console.WriteLine($"Status: {(total < 1500 ? "OK (under 1.5s target)" : "SLOW (over 1.5s target)")}"); } catch (Exception ex) { Console.WriteLine($"Error during test: {ex.Message}"); } finally { if (File.Exists(testWavPath)) File.Delete(testWavPath); } return; } if (command == "toggle") { if (StateTracker.IsRecording()) { if (!pipeToStdout) Console.WriteLine("Stopping recording and transcribing..."); if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing..."); AudioRecorder.StopRecording(); var config = ConfigManager.LoadConfig(); if (string.IsNullOrWhiteSpace(config.GroqApiKey)) { Notifications.Notify("Toak Error", "Groq API Key is not configured. Run 'toak onboard'."); return; } var groq = new GroqApiClient(config.GroqApiKey); var wavPath = AudioRecorder.GetWavPath(); if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0) { if (!pipeToStdout) Notifications.Notify("Toak", "No audio recorded."); return; } try { var stopWatch = Stopwatch.StartNew(); // 1. STT var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel); if (string.IsNullOrWhiteSpace(transcript)) { if (!pipeToStdout) Notifications.Notify("Toak", "No speech detected."); return; } string finalText = transcript; // 2. LLM Refinement var systemPrompt = PromptBuilder.BuildPrompt(config); finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); if (string.IsNullOrWhiteSpace(finalText)) { if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio."); return; } // 3. Output if (pipeToStdout) { Console.WriteLine(finalText); } else if (copyToClipboard) { ClipboardManager.Copy(finalText); stopWatch.Stop(); Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms"); } else { TextInjector.Inject(finalText, config.TypingBackend); stopWatch.Stop(); Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms"); } } catch (Exception ex) { if (!pipeToStdout) Notifications.Notify("Toak Error", ex.Message); if (!pipeToStdout) Console.WriteLine(ex.ToString()); } finally { if (File.Exists(wavPath)) File.Delete(wavPath); } } else { // Start recording if (!pipeToStdout) Console.WriteLine("Starting recording..."); AudioRecorder.StartRecording(); } }