1
0

feat: Implement a modular skill system with hotword detection, streaming text output, and enhanced logging.

This commit is contained in:
2026-02-27 00:39:32 +01:00
parent 4ee4bc5457
commit a365448399
18 changed files with 451 additions and 23 deletions

View File

@@ -4,6 +4,7 @@ using System.Text.Json.Serialization;
using Toak.Api.Models; using Toak.Api.Models;
using Toak.Serialization; using Toak.Serialization;
using Toak.Core;
namespace Toak.Api; namespace Toak.Api;
@@ -39,7 +40,9 @@ public class GroqApiClient
content.Add(new StringContent(firstLang), "language"); content.Add(new StringContent(firstLang), "language");
} }
Logger.LogDebug($"Sending Whisper API request ({modelToUse})...");
var response = await _httpClient.PostAsync("audio/transcriptions", content); var response = await _httpClient.PostAsync("audio/transcriptions", content);
Logger.LogDebug($"Whisper API response status: {response.StatusCode}");
if (!response.IsSuccessStatusCode) if (!response.IsSuccessStatusCode)
{ {
@@ -67,7 +70,9 @@ public class GroqApiClient
var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json"); var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json");
Logger.LogDebug($"Sending Llama API request (model: {requestBody.Model})...");
var response = await _httpClient.PostAsync("chat/completions", jsonContent); var response = await _httpClient.PostAsync("chat/completions", jsonContent);
Logger.LogDebug($"Llama API response status: {response.StatusCode}");
if (!response.IsSuccessStatusCode) if (!response.IsSuccessStatusCode)
{ {
@@ -80,4 +85,55 @@ public class GroqApiClient
return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty; return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty;
} }
public async IAsyncEnumerable<string> RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = "openai/gpt-oss-20b")
{
var requestBody = new LlamaRequest
{
Model = string.IsNullOrWhiteSpace(model) ? "openai/gpt-oss-20b" : model,
Temperature = 0.0,
Stream = true,
Messages = new[]
{
new LlamaRequestMessage { Role = "system", Content = systemPrompt },
new LlamaRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" }
}
};
var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json");
using var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions") { Content = jsonContent };
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/event-stream"));
Logger.LogDebug($"Sending Llama Steam API request (model: {requestBody.Model})...");
using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
Logger.LogDebug($"Llama Stream API response status: {response.StatusCode}");
if (!response.IsSuccessStatusCode)
{
var error = await response.Content.ReadAsStringAsync();
throw new Exception($"Llama API Error: {response.StatusCode} - {error}");
}
using var stream = await response.Content.ReadAsStreamAsync();
using var reader = new StreamReader(stream);
string? line;
while ((line = await reader.ReadLineAsync()) != null)
{
if (string.IsNullOrWhiteSpace(line)) continue;
if (line.StartsWith("data: "))
{
var data = line.Substring("data: ".Length).Trim();
if (data == "[DONE]") break;
var chunk = JsonSerializer.Deserialize(data, AppJsonSerializerContext.Default.LlamaStreamResponse);
var content = chunk?.Choices?.FirstOrDefault()?.Delta?.Content;
if (!string.IsNullOrEmpty(content))
{
yield return content;
}
}
}
}
} }

View File

@@ -18,6 +18,8 @@ public class LlamaRequest
public LlamaRequestMessage[] Messages { get; set; } = Array.Empty<LlamaRequestMessage>(); public LlamaRequestMessage[] Messages { get; set; } = Array.Empty<LlamaRequestMessage>();
[JsonPropertyName("temperature")] [JsonPropertyName("temperature")]
public double Temperature { get; set; } = 0.0; public double Temperature { get; set; } = 0.0;
[JsonPropertyName("stream")]
public bool? Stream { get; set; }
} }
public class LlamaResponse public class LlamaResponse
@@ -31,3 +33,22 @@ public class LlamaChoice
[JsonPropertyName("message")] [JsonPropertyName("message")]
public LlamaRequestMessage Message { get; set; } = new(); public LlamaRequestMessage Message { get; set; } = new();
} }
public class LlamaStreamResponse
{
[JsonPropertyName("choices")]
public LlamaStreamChoice[] Choices { get; set; } = Array.Empty<LlamaStreamChoice>();
}
public class LlamaStreamChoice
{
[JsonPropertyName("delta")]
public LlamaStreamDelta Delta { get; set; } = new();
}
public class LlamaStreamDelta
{
[JsonPropertyName("content")]
public string? Content { get; set; }
}

BIN
Assets/Audio/beep.wav Normal file

Binary file not shown.

View File

@@ -15,9 +15,12 @@ public static class AudioRecorder
{ {
if (File.Exists(WavPath)) if (File.Exists(WavPath))
{ {
Logger.LogDebug($"Deleting old audio file: {WavPath}");
File.Delete(WavPath); File.Delete(WavPath);
} }
Logger.LogDebug("Starting ffmpeg to record audio...");
var pInfo = new ProcessStartInfo var pInfo = new ProcessStartInfo
{ {
FileName = "ffmpeg", FileName = "ffmpeg",
@@ -41,6 +44,7 @@ public static class AudioRecorder
var pid = StateTracker.GetRecordingPid(); var pid = StateTracker.GetRecordingPid();
if (pid.HasValue) if (pid.HasValue)
{ {
Logger.LogDebug($"Found active recording process with PID {pid.Value}. Attempting to stop...");
try try
{ {
var process = Process.GetProcessById(pid.Value); var process = Process.GetProcessById(pid.Value);

View File

@@ -13,4 +13,7 @@ public class ToakConfig
public string WhisperLanguage { get; set; } = string.Empty; public string WhisperLanguage { get; set; } = string.Empty;
public string LlmModel { get; set; } = "openai/gpt-oss-20b"; public string LlmModel { get; set; } = "openai/gpt-oss-20b";
public string WhisperModel { get; set; } = "whisper-large-v3-turbo"; public string WhisperModel { get; set; } = "whisper-large-v3-turbo";
public string StartSoundPath { get; set; } = "Assets/Audio/beep.wav";
public string StopSoundPath { get; set; } = "Assets/Audio/beep.wav";
public List<string> ActiveSkills { get; set; } = new List<string> { "Terminal", "Translate" };
} }

15
Core/Logger.cs Normal file
View File

@@ -0,0 +1,15 @@
namespace Toak.Core;
public static class Logger
{
public static bool Verbose { get; set; } = false;
public static void LogDebug(string message)
{
if (Verbose)
{
var logLine = $"[DEBUG] {DateTime.Now:HH:mm:ss.fff} - {message}";
Console.WriteLine(logLine);
}
}
}

View File

@@ -23,6 +23,7 @@ public static class PromptBuilder
sb.AppendLine(); sb.AppendLine();
sb.AppendLine("FORMATTING RULES:"); sb.AppendLine("FORMATTING RULES:");
sb.AppendLine("- CRITICAL: If the <transcript> contains nothing, or very short gibberish, output NOTHING AT ALL (an empty string)."); sb.AppendLine("- CRITICAL: If the <transcript> contains nothing, or very short gibberish, output NOTHING AT ALL (an empty string).");
sb.AppendLine("- LANGUAGE DETECT: The transcript may be in English or a different language (e.g., Hungarian, Spanish). Detect the language and ensure your output and grammar corrections are STRICTLY in that same language.");

13
Core/Skills/ISkill.cs Normal file
View File

@@ -0,0 +1,13 @@
namespace Toak.Core.Skills;
public interface ISkill
{
string Name { get; }
string Description { get; }
string[] Hotwords { get; }
bool HandlesExecution { get; }
string GetSystemPrompt(string rawTranscript);
void Execute(string llmResult);
}

View File

@@ -0,0 +1,29 @@
namespace Toak.Core.Skills;
public static class SkillRegistry
{
public static readonly ISkill[] AllSkills = new ISkill[]
{
new TerminalSkill(),
new TranslateSkill()
};
public static ISkill? DetectSkill(string transcript, IEnumerable<string> activeSkillNames)
{
var activeSkills = AllSkills.Where(s => activeSkillNames.Contains(s.Name, StringComparer.OrdinalIgnoreCase)).ToList();
string normalizedTranscript = transcript.Trim();
foreach (var skill in activeSkills)
{
foreach (var hotword in skill.Hotwords)
{
if (normalizedTranscript.StartsWith(hotword, StringComparison.OrdinalIgnoreCase))
{
return skill;
}
}
}
return null;
}
}

View File

@@ -0,0 +1,41 @@
using System.Diagnostics;
namespace Toak.Core.Skills;
public class TerminalSkill : ISkill
{
public string Name => "Terminal";
public string Description => "Translates an intent into a bash command and runs it in the background.";
public string[] Hotwords => new[] { "System terminal", "System command" };
public bool HandlesExecution => true;
public string GetSystemPrompt(string rawTranscript)
{
return @"You are a command-line assistant. The user will ask you to perform a task.
Translate the request into a single bash command.
Output ONLY the raw bash command to achieve this task. Do not include markdown formatting, backticks, or explanations.";
}
public void Execute(string llmResult)
{
try
{
Console.WriteLine($"[TerminalSkill] Executing: {llmResult}");
var escapedCmd = llmResult.Replace("\"", "\\\"");
var pInfo = new ProcessStartInfo
{
FileName = "bash",
Arguments = $"-c \"{escapedCmd}\"",
UseShellExecute = false,
CreateNoWindow = true
};
Process.Start(pInfo);
IO.Notifications.Notify("Toak Terminal Executed", llmResult);
}
catch (Exception ex)
{
Console.WriteLine($"[TerminalSkill Error] {ex.Message}");
}
}
}

View File

@@ -0,0 +1,23 @@
namespace Toak.Core.Skills;
public class TranslateSkill : ISkill
{
public string Name => "Translate";
public string Description => "Translates the spoken text into another language on the fly.";
public string[] Hotwords => new[] { "System translate to", "System translate into" };
public bool HandlesExecution => false;
public string GetSystemPrompt(string rawTranscript)
{
return @"You are an expert translator. The user wants to translate the following text.
The first few words identify the target language (e.g. 'Translate to Spanish:', 'Translate into Hungarian:').
Translate the REST of the transcript into that target language.
Output ONLY the final translated text. Do not include markdown, explanations, or quotes.";
}
public void Execute(string llmResult)
{
// Not used since HandlesExecution is false
}
}

View File

@@ -11,6 +11,7 @@ public static class StateTracker
public static void SetRecording(int ffmpegPid) public static void SetRecording(int ffmpegPid)
{ {
Logger.LogDebug($"Setting recording state with PID {ffmpegPid}");
File.WriteAllText(StateFilePath, ffmpegPid.ToString()); File.WriteAllText(StateFilePath, ffmpegPid.ToString());
} }
@@ -21,6 +22,7 @@ public static class StateTracker
var content = File.ReadAllText(StateFilePath).Trim(); var content = File.ReadAllText(StateFilePath).Trim();
if (int.TryParse(content, out var pid)) if (int.TryParse(content, out var pid))
{ {
Logger.LogDebug($"Read recording PID {pid} from state file");
return pid; return pid;
} }
} }
@@ -31,6 +33,7 @@ public static class StateTracker
{ {
if (File.Exists(StateFilePath)) if (File.Exists(StateFilePath))
{ {
Logger.LogDebug("Clearing recording state file");
File.Delete(StateFilePath); File.Delete(StateFilePath);
} }
} }

View File

@@ -197,6 +197,61 @@ toak status # Check if daemon is running
--- ---
## Future Innovations
### Hotword Commands (LLM Routing)
Instruct the LLM in `PromptBuilder` to output a specific JSON structure if given a command phrase. If a specific hotword like "System command" or "Computer dictate" is detected at the start of the audio, Toak parses the JSON, skips typng out via `xdotool`/`wtype`, and instead executes a pre-defined background action.
If it doesn't hear a command phrase, it simply returns the text normally and types it.
**How it works (Under the Hood):**
The LLM is prompted to always return JSON in the background when a command is directed at the assistant.
```json
{
"is_command": true,
"action": "append_to_notes",
"content": "Buy milk and eggs",
"meta": {}
}
```
**Alternative Hotword Ideas:**
Since "Toak" is not a real English word, Whisper might transcribe it as "talk", "toke", or "oak." It is highly recommended to use distinct, phonetically clear hotwords such as:
- **"System..."** (e.g. "System note:")
- **"Computer..."** (e.g. "Computer search:")
- **"Action..."** (e.g. "Action commit:")
- **"Dictate..."** (e.g. "Dictate terminal:")
- **"Listen up..."** (e.g. "Listen up translate...")
**Prompt Ideas & Use Cases:**
1. **Quick Notes / Brainstorming:**
- *Hotword:* `"System note:"` or `"Drop this in my notes:"`
- *Action:* Appends the spoken text to a configured `~/notes.md` file in the background without interrupting your current window.
- *Example:* "System note: I need to remember to check the database migrations later today."
2. **Terminal / CLI Execution:**
- *Hotword:* `"Computer terminal:"` or `"Command:"`
- *Action:* Takes the natural language command, asks the LLM to translate it into a bash command, and types it into a new tmux window or background process.
- *Example:* "Computer terminal: find all python files modified in the last 2 days."
3. **Git Commit Messages:**
- *Hotword:* `"Action commit:"`
- *Action:* Automatically formats the dictated text into a standard conventional commit message, stages all files, and commits them.
- *Example:* "Action commit: I refactored the audio recorder to use native processes instead of the old library." -> LLM outputs `refactor(audio): migrate to native processes` and runs `git commit -am "..."`.
4. **Web Search / Lookup:**
- *Hotword:* `"System search:"` or `"Look up:"`
- *Action:* Opens your default browser and performs a search for the spoken phrase.
- *Example:* "System search: MDN documentation for grid layout."
5. **Translating on the fly:**
- *Hotword:* `"Translate to Spanish:"`
- *Action:* Instead of typing English, it types the translated version of the rest of the sentence.
- *Example:* "Translate to Spanish: Hello, how are you today?" -> Types out `Hola, ¿cómo estás hoy?`.
---
## Implementation Priority ## Implementation Priority
### Tier 1: High Impact, Low Effort ### Tier 1: High Impact, Low Effort

View File

@@ -22,4 +22,47 @@ public static class Notifications
Console.WriteLine($"[Notifications] Failed to send notification: {ex.Message}"); Console.WriteLine($"[Notifications] Failed to send notification: {ex.Message}");
} }
} }
public static void PlaySound(string soundPath)
{
if (string.IsNullOrWhiteSpace(soundPath)) return;
try
{
var absolutePath = soundPath;
if (!Path.IsPathRooted(absolutePath))
absolutePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, absolutePath);
if (!File.Exists(absolutePath))
{
var resourceName = "Toak." + soundPath.Replace("/", ".").Replace("\\", ".");
using var stream = System.Reflection.Assembly.GetExecutingAssembly().GetManifestResourceStream(resourceName);
if (stream != null)
{
absolutePath = Path.Combine(Path.GetTempPath(), "toak_" + Path.GetFileName(soundPath));
if (!File.Exists(absolutePath))
{
using var fileStream = File.Create(absolutePath);
stream.CopyTo(fileStream);
}
}
else
{
return;
}
}
var pInfo = new ProcessStartInfo
{
FileName = "paplay",
Arguments = $"\"{absolutePath}\"",
UseShellExecute = false,
CreateNoWindow = true
};
Process.Start(pInfo);
}
catch (Exception ex)
{
Console.WriteLine($"[Notifications] Failed to play sound: {ex.Message}");
}
}
} }

View File

@@ -1,11 +1,14 @@
using System.Diagnostics; using System.Diagnostics;
using Toak.Core;
namespace Toak.IO; namespace Toak.IO;
public static class TextInjector public static class TextInjector
{ {
public static void Inject(string text, string backend) public static void Inject(string text, string backend)
{ {
Logger.LogDebug($"Injecting text: '{text}' with {backend}");
if (string.IsNullOrWhiteSpace(text)) return; if (string.IsNullOrWhiteSpace(text)) return;
try try
@@ -13,6 +16,7 @@ public static class TextInjector
ProcessStartInfo pInfo; ProcessStartInfo pInfo;
if (backend.ToLowerInvariant() == "wtype") if (backend.ToLowerInvariant() == "wtype")
{ {
Logger.LogDebug($"Injecting text using wtype...");
pInfo = new ProcessStartInfo pInfo = new ProcessStartInfo
{ {
FileName = "wtype", FileName = "wtype",
@@ -23,6 +27,7 @@ public static class TextInjector
} }
else // xdotool else // xdotool
{ {
Logger.LogDebug($"Injecting text using xdotool...");
pInfo = new ProcessStartInfo pInfo = new ProcessStartInfo
{ {
FileName = "xdotool", FileName = "xdotool",
@@ -40,4 +45,57 @@ public static class TextInjector
Notifications.Notify("Injection Error", "Could not type text into window."); Notifications.Notify("Injection Error", "Could not type text into window.");
} }
} }
public static async Task InjectStreamAsync(IAsyncEnumerable<string> tokenStream, string backend)
{
try
{
ProcessStartInfo pInfo;
if (backend.ToLowerInvariant() == "wtype")
{
Logger.LogDebug($"Setting up stream injection using wtype...");
pInfo = new ProcessStartInfo
{
FileName = "wtype",
Arguments = "-",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardInput = true
};
}
else // xdotool
{
Logger.LogDebug($"Setting up stream injection using xdotool...");
pInfo = new ProcessStartInfo
{
FileName = "xdotool",
Arguments = "type --clearmodifiers --delay 0 --file -",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardInput = true
};
}
using var process = Process.Start(pInfo);
if (process == null) return;
Logger.LogDebug("Started stream injection process, waiting for tokens...");
await foreach (var token in tokenStream)
{
Logger.LogDebug($"Injecting token: '{token}'");
await process.StandardInput.WriteAsync(token);
await process.StandardInput.FlushAsync();
}
Logger.LogDebug("Stream injection complete. Closing standard input.");
process.StandardInput.Close();
await process.WaitForExitAsync();
}
catch (Exception ex)
{
Console.WriteLine($"[TextInjector] Error injecting text stream: {ex.Message}");
Notifications.Notify("Injection Error", "Could not type text stream into window.");
}
}
} }

View File

@@ -5,8 +5,11 @@ using Toak.Api;
using Toak.Core; using Toak.Core;
using Toak.IO; using Toak.IO;
bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p") || Console.IsOutputRedirected; bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p");
bool copyToClipboard = args.Contains("--copy"); bool copyToClipboard = args.Contains("--copy");
bool verbose = args.Contains("-v") || args.Contains("--verbose");
Logger.Verbose = verbose;
string command = ""; string command = "";
@@ -29,6 +32,7 @@ if (args.Contains("-h") || args.Contains("--help") || (string.IsNullOrEmpty(comm
Console.WriteLine(" -h, --help - Show this help message"); Console.WriteLine(" -h, --help - Show this help message");
Console.WriteLine(" -p, --pipe - Output transcription to stdout instead of typing"); Console.WriteLine(" -p, --pipe - Output transcription to stdout instead of typing");
Console.WriteLine(" --copy - Copy to clipboard instead of typing"); Console.WriteLine(" --copy - Copy to clipboard instead of typing");
Console.WriteLine(" -v, --verbose - Enable detailed debug logging");
return; return;
} }
@@ -71,6 +75,17 @@ if (command == "onboard")
var backend = Console.ReadLine(); var backend = Console.ReadLine();
if (!string.IsNullOrWhiteSpace(backend)) config.TypingBackend = backend.ToLowerInvariant(); if (!string.IsNullOrWhiteSpace(backend)) config.TypingBackend = backend.ToLowerInvariant();
Console.WriteLine();
var availableSkills = Toak.Core.Skills.SkillRegistry.AllSkills.Select(s => s.Name);
Console.WriteLine($"Active Skills (comma separated) [{string.Join(", ", config.ActiveSkills)}]:");
Console.WriteLine($" Available: {string.Join(", ", availableSkills)}");
Console.Write("Selection: ");
var skillsInput = Console.ReadLine();
if (!string.IsNullOrWhiteSpace(skillsInput))
{
config.ActiveSkills = skillsInput.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList();
}
ConfigManager.SaveConfig(config); ConfigManager.SaveConfig(config);
Console.WriteLine("Configuration saved."); Console.WriteLine("Configuration saved.");
return; return;
@@ -85,6 +100,7 @@ if (command == "show")
Console.WriteLine($" Whisper Model: {config.WhisperModel}"); Console.WriteLine($" Whisper Model: {config.WhisperModel}");
Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}"); Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}");
Console.WriteLine($" Typing Backend: {config.TypingBackend}"); Console.WriteLine($" Typing Backend: {config.TypingBackend}");
Console.WriteLine($" Active Skills: {string.Join(", ", config.ActiveSkills)}");
Console.WriteLine($" Style Mode: {config.StyleMode}"); Console.WriteLine($" Style Mode: {config.StyleMode}");
Console.WriteLine($" Punctuation Module: {config.ModulePunctuation}"); Console.WriteLine($" Punctuation Module: {config.ModulePunctuation}");
Console.WriteLine($" Technical Sanitization: {config.ModuleTechnicalSanitization}"); Console.WriteLine($" Technical Sanitization: {config.ModuleTechnicalSanitization}");
@@ -244,12 +260,15 @@ if (command == "toggle")
{ {
if (StateTracker.IsRecording()) if (StateTracker.IsRecording())
{ {
var config = ConfigManager.LoadConfig();
Notifications.PlaySound(config.StopSoundPath);
if (!pipeToStdout) Console.WriteLine("Stopping recording and transcribing..."); if (!pipeToStdout) Console.WriteLine("Stopping recording and transcribing...");
if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing..."); if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing...");
AudioRecorder.StopRecording(); AudioRecorder.StopRecording();
var config = ConfigManager.LoadConfig(); Logger.LogDebug($"Loaded configuration: LLM={config.LlmModel}, Whisper={config.WhisperModel}, Typing={config.TypingBackend}");
if (string.IsNullOrWhiteSpace(config.GroqApiKey)) if (string.IsNullOrWhiteSpace(config.GroqApiKey))
{ {
@@ -271,7 +290,9 @@ if (command == "toggle")
var stopWatch = Stopwatch.StartNew(); var stopWatch = Stopwatch.StartNew();
// 1. STT // 1. STT
Logger.LogDebug($"Starting STT transcription via Whisper for {wavPath}...");
var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel); var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
Logger.LogDebug($"Raw transcript received: '{transcript}'");
if (string.IsNullOrWhiteSpace(transcript)) if (string.IsNullOrWhiteSpace(transcript))
{ {
@@ -279,32 +300,64 @@ if (command == "toggle")
return; return;
} }
string finalText = transcript;
// 2. LLM Refinement // 2. LLM Refinement
var systemPrompt = PromptBuilder.BuildPrompt(config); var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel); string systemPrompt;
if (detectedSkill != null)
if (string.IsNullOrWhiteSpace(finalText))
{ {
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio."); Logger.LogDebug($"Skill detected: {detectedSkill.Name}");
return; if (!pipeToStdout) Notifications.Notify("Toak Skill Detected", detectedSkill.Name);
} systemPrompt = detectedSkill.GetSystemPrompt(transcript);
// 3. Output
if (pipeToStdout)
{
Console.WriteLine(finalText);
}
else if (copyToClipboard)
{
ClipboardManager.Copy(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
} }
else else
{ {
TextInjector.Inject(finalText, config.TypingBackend); systemPrompt = PromptBuilder.BuildPrompt(config);
}
// 3. Output
if (detectedSkill != null && detectedSkill.HandlesExecution)
{
Logger.LogDebug($"Executing skill synchronously: {detectedSkill.Name}");
string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
Logger.LogDebug($"Skill refined text: '{finalText}'");
if (string.IsNullOrWhiteSpace(finalText))
{
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
return;
}
detectedSkill.Execute(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
}
else if (pipeToStdout || copyToClipboard)
{
Logger.LogDebug("Starting LLM text refinement (synchronous)...");
string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
Logger.LogDebug($"Refined text received: '{finalText}'");
if (string.IsNullOrWhiteSpace(finalText))
{
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
return;
}
if (pipeToStdout)
{
Console.WriteLine(finalText);
}
else
{
ClipboardManager.Copy(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
}
}
else
{
Logger.LogDebug("Starting LLM text refinement (streaming)...");
var tokenStream = groq.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
Logger.LogDebug("Starting to inject text...");
await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
stopWatch.Stop(); stopWatch.Stop();
Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms"); Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
} }
@@ -323,6 +376,8 @@ if (command == "toggle")
{ {
// Start recording // Start recording
if (!pipeToStdout) Console.WriteLine("Starting recording..."); if (!pipeToStdout) Console.WriteLine("Starting recording...");
var config = ConfigManager.LoadConfig();
Notifications.PlaySound(config.StartSoundPath);
AudioRecorder.StartRecording(); AudioRecorder.StartRecording();
} }
} }

View File

@@ -14,6 +14,10 @@ namespace Toak.Serialization;
[JsonSerializable(typeof(LlamaChoice))] [JsonSerializable(typeof(LlamaChoice))]
[JsonSerializable(typeof(LlamaRequestMessage[]))] [JsonSerializable(typeof(LlamaRequestMessage[]))]
[JsonSerializable(typeof(LlamaChoice[]))] [JsonSerializable(typeof(LlamaChoice[]))]
[JsonSerializable(typeof(LlamaStreamResponse))]
[JsonSerializable(typeof(LlamaStreamChoice))]
[JsonSerializable(typeof(LlamaStreamDelta))]
[JsonSerializable(typeof(LlamaStreamChoice[]))]
internal partial class AppJsonSerializerContext : JsonSerializerContext internal partial class AppJsonSerializerContext : JsonSerializerContext
{ {
} }

View File

@@ -8,4 +8,8 @@
<PublishAot>true</PublishAot> <PublishAot>true</PublishAot>
</PropertyGroup> </PropertyGroup>
<ItemGroup>
<EmbeddedResource Include="Assets\Audio\**" />
</ItemGroup>
</Project> </Project>