diff --git a/Api/GroqApiClient.cs b/Api/GroqApiClient.cs index 077eaff..ba33606 100644 --- a/Api/GroqApiClient.cs +++ b/Api/GroqApiClient.cs @@ -4,6 +4,7 @@ using System.Text.Json.Serialization; using Toak.Api.Models; using Toak.Serialization; +using Toak.Core; namespace Toak.Api; @@ -39,7 +40,9 @@ public class GroqApiClient content.Add(new StringContent(firstLang), "language"); } + Logger.LogDebug($"Sending Whisper API request ({modelToUse})..."); var response = await _httpClient.PostAsync("audio/transcriptions", content); + Logger.LogDebug($"Whisper API response status: {response.StatusCode}"); if (!response.IsSuccessStatusCode) { @@ -67,7 +70,9 @@ public class GroqApiClient var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json"); + Logger.LogDebug($"Sending Llama API request (model: {requestBody.Model})..."); var response = await _httpClient.PostAsync("chat/completions", jsonContent); + Logger.LogDebug($"Llama API response status: {response.StatusCode}"); if (!response.IsSuccessStatusCode) { @@ -80,4 +85,55 @@ public class GroqApiClient return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty; } + + public async IAsyncEnumerable RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = "openai/gpt-oss-20b") + { + var requestBody = new LlamaRequest + { + Model = string.IsNullOrWhiteSpace(model) ? "openai/gpt-oss-20b" : model, + Temperature = 0.0, + Stream = true, + Messages = new[] + { + new LlamaRequestMessage { Role = "system", Content = systemPrompt }, + new LlamaRequestMessage { Role = "user", Content = $"{rawTranscript}" } + } + }; + + var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json"); + + using var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions") { Content = jsonContent }; + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/event-stream")); + + Logger.LogDebug($"Sending Llama Steam API request (model: {requestBody.Model})..."); + using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead); + Logger.LogDebug($"Llama Stream API response status: {response.StatusCode}"); + + if (!response.IsSuccessStatusCode) + { + var error = await response.Content.ReadAsStringAsync(); + throw new Exception($"Llama API Error: {response.StatusCode} - {error}"); + } + + using var stream = await response.Content.ReadAsStreamAsync(); + using var reader = new StreamReader(stream); + + string? line; + while ((line = await reader.ReadLineAsync()) != null) + { + if (string.IsNullOrWhiteSpace(line)) continue; + if (line.StartsWith("data: ")) + { + var data = line.Substring("data: ".Length).Trim(); + if (data == "[DONE]") break; + + var chunk = JsonSerializer.Deserialize(data, AppJsonSerializerContext.Default.LlamaStreamResponse); + var content = chunk?.Choices?.FirstOrDefault()?.Delta?.Content; + if (!string.IsNullOrEmpty(content)) + { + yield return content; + } + } + } + } } diff --git a/Api/Models/LlamaModels.cs b/Api/Models/LlamaModels.cs index 405bfac..a84cbac 100644 --- a/Api/Models/LlamaModels.cs +++ b/Api/Models/LlamaModels.cs @@ -18,6 +18,8 @@ public class LlamaRequest public LlamaRequestMessage[] Messages { get; set; } = Array.Empty(); [JsonPropertyName("temperature")] public double Temperature { get; set; } = 0.0; + [JsonPropertyName("stream")] + public bool? Stream { get; set; } } public class LlamaResponse @@ -31,3 +33,22 @@ public class LlamaChoice [JsonPropertyName("message")] public LlamaRequestMessage Message { get; set; } = new(); } + +public class LlamaStreamResponse +{ + [JsonPropertyName("choices")] + public LlamaStreamChoice[] Choices { get; set; } = Array.Empty(); +} + +public class LlamaStreamChoice +{ + [JsonPropertyName("delta")] + public LlamaStreamDelta Delta { get; set; } = new(); +} + +public class LlamaStreamDelta +{ + [JsonPropertyName("content")] + public string? Content { get; set; } +} + diff --git a/Assets/Audio/beep.wav b/Assets/Audio/beep.wav new file mode 100644 index 0000000..aab932d Binary files /dev/null and b/Assets/Audio/beep.wav differ diff --git a/Audio/AudioRecorder.cs b/Audio/AudioRecorder.cs index 2c02142..0a76180 100644 --- a/Audio/AudioRecorder.cs +++ b/Audio/AudioRecorder.cs @@ -15,9 +15,12 @@ public static class AudioRecorder { if (File.Exists(WavPath)) { + Logger.LogDebug($"Deleting old audio file: {WavPath}"); File.Delete(WavPath); } + Logger.LogDebug("Starting ffmpeg to record audio..."); + var pInfo = new ProcessStartInfo { FileName = "ffmpeg", @@ -41,6 +44,7 @@ public static class AudioRecorder var pid = StateTracker.GetRecordingPid(); if (pid.HasValue) { + Logger.LogDebug($"Found active recording process with PID {pid.Value}. Attempting to stop..."); try { var process = Process.GetProcessById(pid.Value); diff --git a/Configuration/ToakConfig.cs b/Configuration/ToakConfig.cs index 2c54f2c..108009e 100644 --- a/Configuration/ToakConfig.cs +++ b/Configuration/ToakConfig.cs @@ -13,4 +13,7 @@ public class ToakConfig public string WhisperLanguage { get; set; } = string.Empty; public string LlmModel { get; set; } = "openai/gpt-oss-20b"; public string WhisperModel { get; set; } = "whisper-large-v3-turbo"; + public string StartSoundPath { get; set; } = "Assets/Audio/beep.wav"; + public string StopSoundPath { get; set; } = "Assets/Audio/beep.wav"; + public List ActiveSkills { get; set; } = new List { "Terminal", "Translate" }; } diff --git a/Core/Logger.cs b/Core/Logger.cs new file mode 100644 index 0000000..a7ae4dc --- /dev/null +++ b/Core/Logger.cs @@ -0,0 +1,15 @@ +namespace Toak.Core; + +public static class Logger +{ + public static bool Verbose { get; set; } = false; + + public static void LogDebug(string message) + { + if (Verbose) + { + var logLine = $"[DEBUG] {DateTime.Now:HH:mm:ss.fff} - {message}"; + Console.WriteLine(logLine); + } + } +} diff --git a/Core/PromptBuilder.cs b/Core/PromptBuilder.cs index eafe4f0..f4366ec 100644 --- a/Core/PromptBuilder.cs +++ b/Core/PromptBuilder.cs @@ -23,6 +23,7 @@ public static class PromptBuilder sb.AppendLine(); sb.AppendLine("FORMATTING RULES:"); sb.AppendLine("- CRITICAL: If the contains nothing, or very short gibberish, output NOTHING AT ALL (an empty string)."); + sb.AppendLine("- LANGUAGE DETECT: The transcript may be in English or a different language (e.g., Hungarian, Spanish). Detect the language and ensure your output and grammar corrections are STRICTLY in that same language."); diff --git a/Core/Skills/ISkill.cs b/Core/Skills/ISkill.cs new file mode 100644 index 0000000..6a8f911 --- /dev/null +++ b/Core/Skills/ISkill.cs @@ -0,0 +1,13 @@ +namespace Toak.Core.Skills; + +public interface ISkill +{ + string Name { get; } + string Description { get; } + string[] Hotwords { get; } + + bool HandlesExecution { get; } + + string GetSystemPrompt(string rawTranscript); + void Execute(string llmResult); +} diff --git a/Core/Skills/SkillRegistry.cs b/Core/Skills/SkillRegistry.cs new file mode 100644 index 0000000..3798946 --- /dev/null +++ b/Core/Skills/SkillRegistry.cs @@ -0,0 +1,29 @@ +namespace Toak.Core.Skills; + +public static class SkillRegistry +{ + public static readonly ISkill[] AllSkills = new ISkill[] + { + new TerminalSkill(), + new TranslateSkill() + }; + + public static ISkill? DetectSkill(string transcript, IEnumerable activeSkillNames) + { + var activeSkills = AllSkills.Where(s => activeSkillNames.Contains(s.Name, StringComparer.OrdinalIgnoreCase)).ToList(); + + string normalizedTranscript = transcript.Trim(); + + foreach (var skill in activeSkills) + { + foreach (var hotword in skill.Hotwords) + { + if (normalizedTranscript.StartsWith(hotword, StringComparison.OrdinalIgnoreCase)) + { + return skill; + } + } + } + return null; + } +} diff --git a/Core/Skills/TerminalSkill.cs b/Core/Skills/TerminalSkill.cs new file mode 100644 index 0000000..2e5c476 --- /dev/null +++ b/Core/Skills/TerminalSkill.cs @@ -0,0 +1,41 @@ +using System.Diagnostics; + +namespace Toak.Core.Skills; + +public class TerminalSkill : ISkill +{ + public string Name => "Terminal"; + public string Description => "Translates an intent into a bash command and runs it in the background."; + public string[] Hotwords => new[] { "System terminal", "System command" }; + + public bool HandlesExecution => true; + + public string GetSystemPrompt(string rawTranscript) + { + return @"You are a command-line assistant. The user will ask you to perform a task. +Translate the request into a single bash command. +Output ONLY the raw bash command to achieve this task. Do not include markdown formatting, backticks, or explanations."; + } + + public void Execute(string llmResult) + { + try + { + Console.WriteLine($"[TerminalSkill] Executing: {llmResult}"); + var escapedCmd = llmResult.Replace("\"", "\\\""); + var pInfo = new ProcessStartInfo + { + FileName = "bash", + Arguments = $"-c \"{escapedCmd}\"", + UseShellExecute = false, + CreateNoWindow = true + }; + Process.Start(pInfo); + IO.Notifications.Notify("Toak Terminal Executed", llmResult); + } + catch (Exception ex) + { + Console.WriteLine($"[TerminalSkill Error] {ex.Message}"); + } + } +} diff --git a/Core/Skills/TranslateSkill.cs b/Core/Skills/TranslateSkill.cs new file mode 100644 index 0000000..3017b31 --- /dev/null +++ b/Core/Skills/TranslateSkill.cs @@ -0,0 +1,23 @@ +namespace Toak.Core.Skills; + +public class TranslateSkill : ISkill +{ + public string Name => "Translate"; + public string Description => "Translates the spoken text into another language on the fly."; + public string[] Hotwords => new[] { "System translate to", "System translate into" }; + + public bool HandlesExecution => false; + + public string GetSystemPrompt(string rawTranscript) + { + return @"You are an expert translator. The user wants to translate the following text. +The first few words identify the target language (e.g. 'Translate to Spanish:', 'Translate into Hungarian:'). +Translate the REST of the transcript into that target language. +Output ONLY the final translated text. Do not include markdown, explanations, or quotes."; + } + + public void Execute(string llmResult) + { + // Not used since HandlesExecution is false + } +} diff --git a/Core/StateTracker.cs b/Core/StateTracker.cs index 5d22d44..8cc55b3 100644 --- a/Core/StateTracker.cs +++ b/Core/StateTracker.cs @@ -11,6 +11,7 @@ public static class StateTracker public static void SetRecording(int ffmpegPid) { + Logger.LogDebug($"Setting recording state with PID {ffmpegPid}"); File.WriteAllText(StateFilePath, ffmpegPid.ToString()); } @@ -21,6 +22,7 @@ public static class StateTracker var content = File.ReadAllText(StateFilePath).Trim(); if (int.TryParse(content, out var pid)) { + Logger.LogDebug($"Read recording PID {pid} from state file"); return pid; } } @@ -31,6 +33,7 @@ public static class StateTracker { if (File.Exists(StateFilePath)) { + Logger.LogDebug("Clearing recording state file"); File.Delete(StateFilePath); } } diff --git a/IDEAS.md b/IDEAS.md index c99bb69..98d04f9 100644 --- a/IDEAS.md +++ b/IDEAS.md @@ -197,6 +197,61 @@ toak status # Check if daemon is running --- +## Future Innovations + +### Hotword Commands (LLM Routing) +Instruct the LLM in `PromptBuilder` to output a specific JSON structure if given a command phrase. If a specific hotword like "System command" or "Computer dictate" is detected at the start of the audio, Toak parses the JSON, skips typng out via `xdotool`/`wtype`, and instead executes a pre-defined background action. + +If it doesn't hear a command phrase, it simply returns the text normally and types it. + +**How it works (Under the Hood):** +The LLM is prompted to always return JSON in the background when a command is directed at the assistant. +```json +{ + "is_command": true, + "action": "append_to_notes", + "content": "Buy milk and eggs", + "meta": {} +} +``` + +**Alternative Hotword Ideas:** +Since "Toak" is not a real English word, Whisper might transcribe it as "talk", "toke", or "oak." It is highly recommended to use distinct, phonetically clear hotwords such as: +- **"System..."** (e.g. "System note:") +- **"Computer..."** (e.g. "Computer search:") +- **"Action..."** (e.g. "Action commit:") +- **"Dictate..."** (e.g. "Dictate terminal:") +- **"Listen up..."** (e.g. "Listen up translate...") + +**Prompt Ideas & Use Cases:** + +1. **Quick Notes / Brainstorming:** + - *Hotword:* `"System note:"` or `"Drop this in my notes:"` + - *Action:* Appends the spoken text to a configured `~/notes.md` file in the background without interrupting your current window. + - *Example:* "System note: I need to remember to check the database migrations later today." + +2. **Terminal / CLI Execution:** + - *Hotword:* `"Computer terminal:"` or `"Command:"` + - *Action:* Takes the natural language command, asks the LLM to translate it into a bash command, and types it into a new tmux window or background process. + - *Example:* "Computer terminal: find all python files modified in the last 2 days." + +3. **Git Commit Messages:** + - *Hotword:* `"Action commit:"` + - *Action:* Automatically formats the dictated text into a standard conventional commit message, stages all files, and commits them. + - *Example:* "Action commit: I refactored the audio recorder to use native processes instead of the old library." -> LLM outputs `refactor(audio): migrate to native processes` and runs `git commit -am "..."`. + +4. **Web Search / Lookup:** + - *Hotword:* `"System search:"` or `"Look up:"` + - *Action:* Opens your default browser and performs a search for the spoken phrase. + - *Example:* "System search: MDN documentation for grid layout." + +5. **Translating on the fly:** + - *Hotword:* `"Translate to Spanish:"` + - *Action:* Instead of typing English, it types the translated version of the rest of the sentence. + - *Example:* "Translate to Spanish: Hello, how are you today?" -> Types out `Hola, ¿cómo estás hoy?`. + +--- + ## Implementation Priority ### Tier 1: High Impact, Low Effort diff --git a/IO/Notifications.cs b/IO/Notifications.cs index 2397b40..f7113ba 100644 --- a/IO/Notifications.cs +++ b/IO/Notifications.cs @@ -22,4 +22,47 @@ public static class Notifications Console.WriteLine($"[Notifications] Failed to send notification: {ex.Message}"); } } + + public static void PlaySound(string soundPath) + { + if (string.IsNullOrWhiteSpace(soundPath)) return; + try + { + var absolutePath = soundPath; + if (!Path.IsPathRooted(absolutePath)) + absolutePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, absolutePath); + + if (!File.Exists(absolutePath)) + { + var resourceName = "Toak." + soundPath.Replace("/", ".").Replace("\\", "."); + using var stream = System.Reflection.Assembly.GetExecutingAssembly().GetManifestResourceStream(resourceName); + if (stream != null) + { + absolutePath = Path.Combine(Path.GetTempPath(), "toak_" + Path.GetFileName(soundPath)); + if (!File.Exists(absolutePath)) + { + using var fileStream = File.Create(absolutePath); + stream.CopyTo(fileStream); + } + } + else + { + return; + } + } + + var pInfo = new ProcessStartInfo + { + FileName = "paplay", + Arguments = $"\"{absolutePath}\"", + UseShellExecute = false, + CreateNoWindow = true + }; + Process.Start(pInfo); + } + catch (Exception ex) + { + Console.WriteLine($"[Notifications] Failed to play sound: {ex.Message}"); + } + } } diff --git a/IO/TextInjector.cs b/IO/TextInjector.cs index 1844358..33678c7 100644 --- a/IO/TextInjector.cs +++ b/IO/TextInjector.cs @@ -1,11 +1,14 @@ using System.Diagnostics; +using Toak.Core; + namespace Toak.IO; public static class TextInjector { public static void Inject(string text, string backend) { + Logger.LogDebug($"Injecting text: '{text}' with {backend}"); if (string.IsNullOrWhiteSpace(text)) return; try @@ -13,6 +16,7 @@ public static class TextInjector ProcessStartInfo pInfo; if (backend.ToLowerInvariant() == "wtype") { + Logger.LogDebug($"Injecting text using wtype..."); pInfo = new ProcessStartInfo { FileName = "wtype", @@ -23,6 +27,7 @@ public static class TextInjector } else // xdotool { + Logger.LogDebug($"Injecting text using xdotool..."); pInfo = new ProcessStartInfo { FileName = "xdotool", @@ -40,4 +45,57 @@ public static class TextInjector Notifications.Notify("Injection Error", "Could not type text into window."); } } + + public static async Task InjectStreamAsync(IAsyncEnumerable tokenStream, string backend) + { + try + { + ProcessStartInfo pInfo; + if (backend.ToLowerInvariant() == "wtype") + { + Logger.LogDebug($"Setting up stream injection using wtype..."); + pInfo = new ProcessStartInfo + { + FileName = "wtype", + Arguments = "-", + UseShellExecute = false, + CreateNoWindow = true, + RedirectStandardInput = true + }; + } + else // xdotool + { + Logger.LogDebug($"Setting up stream injection using xdotool..."); + pInfo = new ProcessStartInfo + { + FileName = "xdotool", + Arguments = "type --clearmodifiers --delay 0 --file -", + UseShellExecute = false, + CreateNoWindow = true, + RedirectStandardInput = true + }; + } + + using var process = Process.Start(pInfo); + if (process == null) return; + + Logger.LogDebug("Started stream injection process, waiting for tokens..."); + + await foreach (var token in tokenStream) + { + Logger.LogDebug($"Injecting token: '{token}'"); + await process.StandardInput.WriteAsync(token); + await process.StandardInput.FlushAsync(); + } + + Logger.LogDebug("Stream injection complete. Closing standard input."); + process.StandardInput.Close(); + await process.WaitForExitAsync(); + } + catch (Exception ex) + { + Console.WriteLine($"[TextInjector] Error injecting text stream: {ex.Message}"); + Notifications.Notify("Injection Error", "Could not type text stream into window."); + } + } } diff --git a/Program.cs b/Program.cs index 3eb41cc..8dd017c 100644 --- a/Program.cs +++ b/Program.cs @@ -5,8 +5,11 @@ using Toak.Api; using Toak.Core; using Toak.IO; -bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p") || Console.IsOutputRedirected; +bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p"); bool copyToClipboard = args.Contains("--copy"); +bool verbose = args.Contains("-v") || args.Contains("--verbose"); + +Logger.Verbose = verbose; string command = ""; @@ -29,6 +32,7 @@ if (args.Contains("-h") || args.Contains("--help") || (string.IsNullOrEmpty(comm Console.WriteLine(" -h, --help - Show this help message"); Console.WriteLine(" -p, --pipe - Output transcription to stdout instead of typing"); Console.WriteLine(" --copy - Copy to clipboard instead of typing"); + Console.WriteLine(" -v, --verbose - Enable detailed debug logging"); return; } @@ -71,6 +75,17 @@ if (command == "onboard") var backend = Console.ReadLine(); if (!string.IsNullOrWhiteSpace(backend)) config.TypingBackend = backend.ToLowerInvariant(); + Console.WriteLine(); + var availableSkills = Toak.Core.Skills.SkillRegistry.AllSkills.Select(s => s.Name); + Console.WriteLine($"Active Skills (comma separated) [{string.Join(", ", config.ActiveSkills)}]:"); + Console.WriteLine($" Available: {string.Join(", ", availableSkills)}"); + Console.Write("Selection: "); + var skillsInput = Console.ReadLine(); + if (!string.IsNullOrWhiteSpace(skillsInput)) + { + config.ActiveSkills = skillsInput.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); + } + ConfigManager.SaveConfig(config); Console.WriteLine("Configuration saved."); return; @@ -85,6 +100,7 @@ if (command == "show") Console.WriteLine($" Whisper Model: {config.WhisperModel}"); Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}"); Console.WriteLine($" Typing Backend: {config.TypingBackend}"); + Console.WriteLine($" Active Skills: {string.Join(", ", config.ActiveSkills)}"); Console.WriteLine($" Style Mode: {config.StyleMode}"); Console.WriteLine($" Punctuation Module: {config.ModulePunctuation}"); Console.WriteLine($" Technical Sanitization: {config.ModuleTechnicalSanitization}"); @@ -244,12 +260,15 @@ if (command == "toggle") { if (StateTracker.IsRecording()) { + var config = ConfigManager.LoadConfig(); + Notifications.PlaySound(config.StopSoundPath); + if (!pipeToStdout) Console.WriteLine("Stopping recording and transcribing..."); if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing..."); AudioRecorder.StopRecording(); - var config = ConfigManager.LoadConfig(); + Logger.LogDebug($"Loaded configuration: LLM={config.LlmModel}, Whisper={config.WhisperModel}, Typing={config.TypingBackend}"); if (string.IsNullOrWhiteSpace(config.GroqApiKey)) { @@ -271,7 +290,9 @@ if (command == "toggle") var stopWatch = Stopwatch.StartNew(); // 1. STT + Logger.LogDebug($"Starting STT transcription via Whisper for {wavPath}..."); var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel); + Logger.LogDebug($"Raw transcript received: '{transcript}'"); if (string.IsNullOrWhiteSpace(transcript)) { @@ -279,32 +300,64 @@ if (command == "toggle") return; } - string finalText = transcript; - // 2. LLM Refinement - var systemPrompt = PromptBuilder.BuildPrompt(config); - finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); - - if (string.IsNullOrWhiteSpace(finalText)) + var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills); + string systemPrompt; + if (detectedSkill != null) { - if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio."); - return; - } - - // 3. Output - if (pipeToStdout) - { - Console.WriteLine(finalText); - } - else if (copyToClipboard) - { - ClipboardManager.Copy(finalText); - stopWatch.Stop(); - Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms"); + Logger.LogDebug($"Skill detected: {detectedSkill.Name}"); + if (!pipeToStdout) Notifications.Notify("Toak Skill Detected", detectedSkill.Name); + systemPrompt = detectedSkill.GetSystemPrompt(transcript); } else { - TextInjector.Inject(finalText, config.TypingBackend); + systemPrompt = PromptBuilder.BuildPrompt(config); + } + + // 3. Output + if (detectedSkill != null && detectedSkill.HandlesExecution) + { + Logger.LogDebug($"Executing skill synchronously: {detectedSkill.Name}"); + string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); + Logger.LogDebug($"Skill refined text: '{finalText}'"); + if (string.IsNullOrWhiteSpace(finalText)) + { + if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio."); + return; + } + + detectedSkill.Execute(finalText); + stopWatch.Stop(); + Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms"); + } + else if (pipeToStdout || copyToClipboard) + { + Logger.LogDebug("Starting LLM text refinement (synchronous)..."); + string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); + Logger.LogDebug($"Refined text received: '{finalText}'"); + if (string.IsNullOrWhiteSpace(finalText)) + { + if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio."); + return; + } + + if (pipeToStdout) + { + Console.WriteLine(finalText); + } + else + { + ClipboardManager.Copy(finalText); + stopWatch.Stop(); + Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms"); + } + } + else + { + Logger.LogDebug("Starting LLM text refinement (streaming)..."); + var tokenStream = groq.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel); + Logger.LogDebug("Starting to inject text..."); + await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend); stopWatch.Stop(); Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms"); } @@ -323,6 +376,8 @@ if (command == "toggle") { // Start recording if (!pipeToStdout) Console.WriteLine("Starting recording..."); + var config = ConfigManager.LoadConfig(); + Notifications.PlaySound(config.StartSoundPath); AudioRecorder.StartRecording(); } } diff --git a/Serialization/AppJsonSerializerContext.cs b/Serialization/AppJsonSerializerContext.cs index b06ab97..f47df36 100644 --- a/Serialization/AppJsonSerializerContext.cs +++ b/Serialization/AppJsonSerializerContext.cs @@ -14,6 +14,10 @@ namespace Toak.Serialization; [JsonSerializable(typeof(LlamaChoice))] [JsonSerializable(typeof(LlamaRequestMessage[]))] [JsonSerializable(typeof(LlamaChoice[]))] +[JsonSerializable(typeof(LlamaStreamResponse))] +[JsonSerializable(typeof(LlamaStreamChoice))] +[JsonSerializable(typeof(LlamaStreamDelta))] +[JsonSerializable(typeof(LlamaStreamChoice[]))] internal partial class AppJsonSerializerContext : JsonSerializerContext { } diff --git a/Toak.csproj b/Toak.csproj index 218df61..1b7c082 100644 --- a/Toak.csproj +++ b/Toak.csproj @@ -8,4 +8,8 @@ true + + + +