feat: Implement a modular skill system with hotword detection, streaming text output, and enhanced logging.
This commit is contained in:
101
Program.cs
101
Program.cs
@@ -5,8 +5,11 @@ using Toak.Api;
|
||||
using Toak.Core;
|
||||
using Toak.IO;
|
||||
|
||||
bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p") || Console.IsOutputRedirected;
|
||||
bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p");
|
||||
bool copyToClipboard = args.Contains("--copy");
|
||||
bool verbose = args.Contains("-v") || args.Contains("--verbose");
|
||||
|
||||
Logger.Verbose = verbose;
|
||||
|
||||
|
||||
string command = "";
|
||||
@@ -29,6 +32,7 @@ if (args.Contains("-h") || args.Contains("--help") || (string.IsNullOrEmpty(comm
|
||||
Console.WriteLine(" -h, --help - Show this help message");
|
||||
Console.WriteLine(" -p, --pipe - Output transcription to stdout instead of typing");
|
||||
Console.WriteLine(" --copy - Copy to clipboard instead of typing");
|
||||
Console.WriteLine(" -v, --verbose - Enable detailed debug logging");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -71,6 +75,17 @@ if (command == "onboard")
|
||||
var backend = Console.ReadLine();
|
||||
if (!string.IsNullOrWhiteSpace(backend)) config.TypingBackend = backend.ToLowerInvariant();
|
||||
|
||||
Console.WriteLine();
|
||||
var availableSkills = Toak.Core.Skills.SkillRegistry.AllSkills.Select(s => s.Name);
|
||||
Console.WriteLine($"Active Skills (comma separated) [{string.Join(", ", config.ActiveSkills)}]:");
|
||||
Console.WriteLine($" Available: {string.Join(", ", availableSkills)}");
|
||||
Console.Write("Selection: ");
|
||||
var skillsInput = Console.ReadLine();
|
||||
if (!string.IsNullOrWhiteSpace(skillsInput))
|
||||
{
|
||||
config.ActiveSkills = skillsInput.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList();
|
||||
}
|
||||
|
||||
ConfigManager.SaveConfig(config);
|
||||
Console.WriteLine("Configuration saved.");
|
||||
return;
|
||||
@@ -85,6 +100,7 @@ if (command == "show")
|
||||
Console.WriteLine($" Whisper Model: {config.WhisperModel}");
|
||||
Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}");
|
||||
Console.WriteLine($" Typing Backend: {config.TypingBackend}");
|
||||
Console.WriteLine($" Active Skills: {string.Join(", ", config.ActiveSkills)}");
|
||||
Console.WriteLine($" Style Mode: {config.StyleMode}");
|
||||
Console.WriteLine($" Punctuation Module: {config.ModulePunctuation}");
|
||||
Console.WriteLine($" Technical Sanitization: {config.ModuleTechnicalSanitization}");
|
||||
@@ -244,12 +260,15 @@ if (command == "toggle")
|
||||
{
|
||||
if (StateTracker.IsRecording())
|
||||
{
|
||||
var config = ConfigManager.LoadConfig();
|
||||
Notifications.PlaySound(config.StopSoundPath);
|
||||
|
||||
if (!pipeToStdout) Console.WriteLine("Stopping recording and transcribing...");
|
||||
if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing...");
|
||||
|
||||
AudioRecorder.StopRecording();
|
||||
|
||||
var config = ConfigManager.LoadConfig();
|
||||
Logger.LogDebug($"Loaded configuration: LLM={config.LlmModel}, Whisper={config.WhisperModel}, Typing={config.TypingBackend}");
|
||||
|
||||
if (string.IsNullOrWhiteSpace(config.GroqApiKey))
|
||||
{
|
||||
@@ -271,7 +290,9 @@ if (command == "toggle")
|
||||
var stopWatch = Stopwatch.StartNew();
|
||||
|
||||
// 1. STT
|
||||
Logger.LogDebug($"Starting STT transcription via Whisper for {wavPath}...");
|
||||
var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
|
||||
Logger.LogDebug($"Raw transcript received: '{transcript}'");
|
||||
|
||||
if (string.IsNullOrWhiteSpace(transcript))
|
||||
{
|
||||
@@ -279,32 +300,64 @@ if (command == "toggle")
|
||||
return;
|
||||
}
|
||||
|
||||
string finalText = transcript;
|
||||
|
||||
// 2. LLM Refinement
|
||||
var systemPrompt = PromptBuilder.BuildPrompt(config);
|
||||
finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(finalText))
|
||||
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
|
||||
string systemPrompt;
|
||||
if (detectedSkill != null)
|
||||
{
|
||||
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
|
||||
return;
|
||||
}
|
||||
|
||||
// 3. Output
|
||||
if (pipeToStdout)
|
||||
{
|
||||
Console.WriteLine(finalText);
|
||||
}
|
||||
else if (copyToClipboard)
|
||||
{
|
||||
ClipboardManager.Copy(finalText);
|
||||
stopWatch.Stop();
|
||||
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
|
||||
Logger.LogDebug($"Skill detected: {detectedSkill.Name}");
|
||||
if (!pipeToStdout) Notifications.Notify("Toak Skill Detected", detectedSkill.Name);
|
||||
systemPrompt = detectedSkill.GetSystemPrompt(transcript);
|
||||
}
|
||||
else
|
||||
{
|
||||
TextInjector.Inject(finalText, config.TypingBackend);
|
||||
systemPrompt = PromptBuilder.BuildPrompt(config);
|
||||
}
|
||||
|
||||
// 3. Output
|
||||
if (detectedSkill != null && detectedSkill.HandlesExecution)
|
||||
{
|
||||
Logger.LogDebug($"Executing skill synchronously: {detectedSkill.Name}");
|
||||
string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
|
||||
Logger.LogDebug($"Skill refined text: '{finalText}'");
|
||||
if (string.IsNullOrWhiteSpace(finalText))
|
||||
{
|
||||
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
|
||||
return;
|
||||
}
|
||||
|
||||
detectedSkill.Execute(finalText);
|
||||
stopWatch.Stop();
|
||||
Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
|
||||
}
|
||||
else if (pipeToStdout || copyToClipboard)
|
||||
{
|
||||
Logger.LogDebug("Starting LLM text refinement (synchronous)...");
|
||||
string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
|
||||
Logger.LogDebug($"Refined text received: '{finalText}'");
|
||||
if (string.IsNullOrWhiteSpace(finalText))
|
||||
{
|
||||
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (pipeToStdout)
|
||||
{
|
||||
Console.WriteLine(finalText);
|
||||
}
|
||||
else
|
||||
{
|
||||
ClipboardManager.Copy(finalText);
|
||||
stopWatch.Stop();
|
||||
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Logger.LogDebug("Starting LLM text refinement (streaming)...");
|
||||
var tokenStream = groq.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
|
||||
Logger.LogDebug("Starting to inject text...");
|
||||
await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
|
||||
stopWatch.Stop();
|
||||
Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
|
||||
}
|
||||
@@ -323,6 +376,8 @@ if (command == "toggle")
|
||||
{
|
||||
// Start recording
|
||||
if (!pipeToStdout) Console.WriteLine("Starting recording...");
|
||||
var config = ConfigManager.LoadConfig();
|
||||
Notifications.PlaySound(config.StartSoundPath);
|
||||
AudioRecorder.StartRecording();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user