1
0

feat: Implement a modular skill system with hotword detection, streaming text output, and enhanced logging.

This commit is contained in:
2026-02-27 00:39:32 +01:00
parent 4ee4bc5457
commit a365448399
18 changed files with 451 additions and 23 deletions

View File

@@ -5,8 +5,11 @@ using Toak.Api;
using Toak.Core;
using Toak.IO;
bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p") || Console.IsOutputRedirected;
bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p");
bool copyToClipboard = args.Contains("--copy");
bool verbose = args.Contains("-v") || args.Contains("--verbose");
Logger.Verbose = verbose;
string command = "";
@@ -29,6 +32,7 @@ if (args.Contains("-h") || args.Contains("--help") || (string.IsNullOrEmpty(comm
Console.WriteLine(" -h, --help - Show this help message");
Console.WriteLine(" -p, --pipe - Output transcription to stdout instead of typing");
Console.WriteLine(" --copy - Copy to clipboard instead of typing");
Console.WriteLine(" -v, --verbose - Enable detailed debug logging");
return;
}
@@ -71,6 +75,17 @@ if (command == "onboard")
var backend = Console.ReadLine();
if (!string.IsNullOrWhiteSpace(backend)) config.TypingBackend = backend.ToLowerInvariant();
Console.WriteLine();
var availableSkills = Toak.Core.Skills.SkillRegistry.AllSkills.Select(s => s.Name);
Console.WriteLine($"Active Skills (comma separated) [{string.Join(", ", config.ActiveSkills)}]:");
Console.WriteLine($" Available: {string.Join(", ", availableSkills)}");
Console.Write("Selection: ");
var skillsInput = Console.ReadLine();
if (!string.IsNullOrWhiteSpace(skillsInput))
{
config.ActiveSkills = skillsInput.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList();
}
ConfigManager.SaveConfig(config);
Console.WriteLine("Configuration saved.");
return;
@@ -85,6 +100,7 @@ if (command == "show")
Console.WriteLine($" Whisper Model: {config.WhisperModel}");
Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}");
Console.WriteLine($" Typing Backend: {config.TypingBackend}");
Console.WriteLine($" Active Skills: {string.Join(", ", config.ActiveSkills)}");
Console.WriteLine($" Style Mode: {config.StyleMode}");
Console.WriteLine($" Punctuation Module: {config.ModulePunctuation}");
Console.WriteLine($" Technical Sanitization: {config.ModuleTechnicalSanitization}");
@@ -244,12 +260,15 @@ if (command == "toggle")
{
if (StateTracker.IsRecording())
{
var config = ConfigManager.LoadConfig();
Notifications.PlaySound(config.StopSoundPath);
if (!pipeToStdout) Console.WriteLine("Stopping recording and transcribing...");
if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing...");
AudioRecorder.StopRecording();
var config = ConfigManager.LoadConfig();
Logger.LogDebug($"Loaded configuration: LLM={config.LlmModel}, Whisper={config.WhisperModel}, Typing={config.TypingBackend}");
if (string.IsNullOrWhiteSpace(config.GroqApiKey))
{
@@ -271,7 +290,9 @@ if (command == "toggle")
var stopWatch = Stopwatch.StartNew();
// 1. STT
Logger.LogDebug($"Starting STT transcription via Whisper for {wavPath}...");
var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
Logger.LogDebug($"Raw transcript received: '{transcript}'");
if (string.IsNullOrWhiteSpace(transcript))
{
@@ -279,32 +300,64 @@ if (command == "toggle")
return;
}
string finalText = transcript;
// 2. LLM Refinement
var systemPrompt = PromptBuilder.BuildPrompt(config);
finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
if (string.IsNullOrWhiteSpace(finalText))
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
string systemPrompt;
if (detectedSkill != null)
{
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
return;
}
// 3. Output
if (pipeToStdout)
{
Console.WriteLine(finalText);
}
else if (copyToClipboard)
{
ClipboardManager.Copy(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
Logger.LogDebug($"Skill detected: {detectedSkill.Name}");
if (!pipeToStdout) Notifications.Notify("Toak Skill Detected", detectedSkill.Name);
systemPrompt = detectedSkill.GetSystemPrompt(transcript);
}
else
{
TextInjector.Inject(finalText, config.TypingBackend);
systemPrompt = PromptBuilder.BuildPrompt(config);
}
// 3. Output
if (detectedSkill != null && detectedSkill.HandlesExecution)
{
Logger.LogDebug($"Executing skill synchronously: {detectedSkill.Name}");
string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
Logger.LogDebug($"Skill refined text: '{finalText}'");
if (string.IsNullOrWhiteSpace(finalText))
{
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
return;
}
detectedSkill.Execute(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
}
else if (pipeToStdout || copyToClipboard)
{
Logger.LogDebug("Starting LLM text refinement (synchronous)...");
string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
Logger.LogDebug($"Refined text received: '{finalText}'");
if (string.IsNullOrWhiteSpace(finalText))
{
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
return;
}
if (pipeToStdout)
{
Console.WriteLine(finalText);
}
else
{
ClipboardManager.Copy(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
}
}
else
{
Logger.LogDebug("Starting LLM text refinement (streaming)...");
var tokenStream = groq.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
Logger.LogDebug("Starting to inject text...");
await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
stopWatch.Stop();
Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
}
@@ -323,6 +376,8 @@ if (command == "toggle")
{
// Start recording
if (!pipeToStdout) Console.WriteLine("Starting recording...");
var config = ConfigManager.LoadConfig();
Notifications.PlaySound(config.StartSoundPath);
AudioRecorder.StartRecording();
}
}