1
0

feat: Implement new commands for recording control, configuration management, latency testing, and status display, updating program entry and project references.

This commit is contained in:
2026-02-27 01:12:23 +01:00
parent d910fe1441
commit 482fe84eb1
8 changed files with 494 additions and 368 deletions

View File

@@ -0,0 +1,51 @@
using System.Threading.Tasks;
using Spectre.Console;
using Toak.Configuration;
namespace Toak.Commands;
public static class ConfigUpdaterCommand
{
public static async Task ExecuteAsync(string key, string val, bool verbose)
{
Toak.Core.Logger.Verbose = verbose;
var config = ConfigManager.LoadConfig();
key = key.ToLowerInvariant();
val = val.ToLowerInvariant();
switch (key)
{
case "llm": config.LlmModel = val; break;
case "whisper": config.WhisperModel = val; break;
case "style":
if (val == "professional" || val == "concise" || val == "casual") { config.StyleMode = val; }
else { AnsiConsole.MarkupLine("[red]Invalid style.[/] Use: professional, concise, casual"); return; }
break;
case "language":
case "lang": config.WhisperLanguage = val; break;
case "backend": config.TypingBackend = val; break;
case "punctuation":
if (bool.TryParse(val, out var p)) { config.ModulePunctuation = p; }
else { AnsiConsole.MarkupLine("[red]Invalid value. Use true or false.[/]"); return; }
break;
case "tech":
if (bool.TryParse(val, out var t)) { config.ModuleTechnicalSanitization = t; }
else { AnsiConsole.MarkupLine("[red]Invalid value. Use true or false.[/]"); return; }
break;
case "bullets":
if (bool.TryParse(val, out var b)) { config.StructureBulletPoints = b; }
else { AnsiConsole.MarkupLine("[red]Invalid value. Use true or false.[/]"); return; }
break;
case "paragraphs":
if (bool.TryParse(val, out var sp)) { config.StructureSmartParagraphing = sp; }
else { AnsiConsole.MarkupLine("[red]Invalid value. Use true or false.[/]"); return; }
break;
default:
AnsiConsole.MarkupLine($"[red]Unknown config key: {key}[/]");
return;
}
ConfigManager.SaveConfig(config);
AnsiConsole.MarkupLine($"[green]Successfully[/] set {key} to [blue]{val}[/].");
}
}

View File

@@ -0,0 +1,35 @@
using System.IO;
using System.Threading.Tasks;
using Spectre.Console;
using Toak.Audio;
using Toak.Core;
using Toak.IO;
namespace Toak.Commands;
public static class DiscardCommand
{
public static async Task ExecuteAsync(bool pipeToStdout, bool verbose)
{
Logger.Verbose = verbose;
if (StateTracker.IsRecording())
{
AudioRecorder.StopRecording();
var wavPath = AudioRecorder.GetWavPath();
if (File.Exists(wavPath)) File.Delete(wavPath);
Notifications.Notify("Toak", "Recording discarded");
if (!pipeToStdout)
{
AnsiConsole.MarkupLine("[yellow]Recording discarded.[/]");
}
}
else
{
if (!pipeToStdout)
{
AnsiConsole.MarkupLine("[grey]No active recording to discard.[/]");
}
}
}
}

View File

@@ -0,0 +1,95 @@
using System;
using System.Diagnostics;
using System.IO;
using System.Threading.Tasks;
using Spectre.Console;
using Toak.Api;
using Toak.Configuration;
using Toak.Core;
namespace Toak.Commands;
public static class LatencyTestCommand
{
public static async Task ExecuteAsync(bool verbose)
{
Logger.Verbose = verbose;
var config = ConfigManager.LoadConfig();
if (string.IsNullOrWhiteSpace(config.GroqApiKey))
{
AnsiConsole.MarkupLine("[red]Groq API Key is not configured.[/] Run 'toak onboard'.");
return;
}
AnsiConsole.MarkupLine("Generating 1-second silent audio file for testing...");
var testWavPath = Path.Combine(Path.GetTempPath(), "toak_latency_test.wav");
var pInfo = new ProcessStartInfo
{
FileName = "ffmpeg",
Arguments = $"-f lavfi -i anullsrc=r=44100:cl=mono -t 1 -y {testWavPath}",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardError = true,
RedirectStandardOutput = true
};
var proc = Process.Start(pInfo);
proc?.WaitForExit();
if (!File.Exists(testWavPath))
{
AnsiConsole.MarkupLine("[red]Failed to generate test audio file using ffmpeg.[/]");
return;
}
var groq = new GroqApiClient(config.GroqApiKey);
try
{
await AnsiConsole.Status()
.StartAsync("Running latency test...", async ctx =>
{
ctx.Status("Testing STT (Whisper)...");
var sttWatch = Stopwatch.StartNew();
var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel);
sttWatch.Stop();
ctx.Status("Testing LLM (Llama)...");
var systemPrompt = PromptBuilder.BuildPrompt(config);
var llmWatch = Stopwatch.StartNew();
var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel);
llmWatch.Stop();
var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds;
AnsiConsole.WriteLine();
var table = new Table();
table.AddColumn("Operation");
table.AddColumn("Latency (ms)");
table.AddRow("STT", sttWatch.ElapsedMilliseconds.ToString());
table.AddRow("LLM", llmWatch.ElapsedMilliseconds.ToString());
table.AddRow("[bold]Total[/]", $"[bold]{total}ms[/]");
AnsiConsole.Write(table);
if (total < 1500)
{
AnsiConsole.MarkupLine($"[green]Status: OK (under 1.5s target). Total time: {(total / 1000.0):0.0}s.[/]");
}
else
{
AnsiConsole.MarkupLine($"[yellow]Status: SLOW (over 1.5s target). Total time: {(total / 1000.0):0.0}s.[/]");
}
});
}
catch (Exception ex)
{
AnsiConsole.MarkupLine($"[red]Error during test: {ex.Message}[/]");
}
finally
{
if (File.Exists(testWavPath)) File.Delete(testWavPath);
}
}
}

View File

@@ -0,0 +1,74 @@
using System;
using System.Linq;
using System.Threading.Tasks;
using Spectre.Console;
using Toak.Configuration;
using Toak.Core.Skills;
namespace Toak.Commands;
public static class OnboardCommand
{
public static async Task ExecuteAsync(bool verbose)
{
Toak.Core.Logger.Verbose = verbose;
var config = ConfigManager.LoadConfig();
AnsiConsole.Write(new FigletText("Toak").Color(Color.Green));
AnsiConsole.MarkupLine("[grey]Welcome to the Toak configuration wizard.[/]");
AnsiConsole.WriteLine();
config.GroqApiKey = AnsiConsole.Prompt(
new TextPrompt<string>("Groq API Key:")
.DefaultValue(string.IsNullOrWhiteSpace(config.GroqApiKey) ? "" : config.GroqApiKey)
.AllowEmpty());
config.LlmModel = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title("Select [green]LLM Model[/]:")
.AddChoices(new[] { "openai/gpt-oss-20b", "llama-3.1-8b-instant" })
.UseConverter(c => c == "openai/gpt-oss-20b" ? "openai/gpt-oss-20b (Fastest)" : "llama-3.1-8b-instant (Cheapest)"));
if (config.LlmModel.Contains(" ")) config.LlmModel = config.LlmModel.Split(' ')[0];
config.WhisperModel = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title("Select [green]Whisper Model[/]:")
.AddChoices(new[] { "whisper-large-v3", "whisper-large-v3-turbo" })
.UseConverter(c => c == "whisper-large-v3" ? "whisper-large-v3 (Accurate)" : "whisper-large-v3-turbo (Fast)"));
config.WhisperLanguage = AnsiConsole.Prompt(
new TextPrompt<string>("Microphone Spoken Language (e.g. en, es, zh):")
.DefaultValue(string.IsNullOrWhiteSpace(config.WhisperLanguage) ? "en" : config.WhisperLanguage)
.AllowEmpty()
.Validate(lang =>
{
if (string.IsNullOrWhiteSpace(lang)) return ValidationResult.Success();
if (lang.Contains(",") || lang.Contains(" "))
return ValidationResult.Error("[red]Please provide only one language code (e.g., 'en' not 'en, es')[/]");
return ValidationResult.Success();
}));
config.TypingBackend = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title("Select [green]Typing Backend[/]:")
.AddChoices(new[] { "wtype", "xdotool" }));
var availableSkills = SkillRegistry.AllSkills.Select(s => s.Name).ToList();
if (availableSkills.Any())
{
config.ActiveSkills = AnsiConsole.Prompt(
new MultiSelectionPrompt<string>()
.Title("Select [green]Active Skills[/]:")
.NotRequired()
.InstructionsText("[grey](Press [blue]<space>[/] to toggle a skill, [green]<enter>[/] to accept)[/]")
.AddChoices(availableSkills));
}
ConfigManager.SaveConfig(config);
AnsiConsole.MarkupLine("\n[bold green]Configuration saved successfully![/]");
}
}

32
Commands/ShowCommand.cs Normal file
View File

@@ -0,0 +1,32 @@
using System.Threading.Tasks;
using Spectre.Console;
using Toak.Configuration;
namespace Toak.Commands;
public static class ShowCommand
{
public static async Task ExecuteAsync(bool verbose)
{
Toak.Core.Logger.Verbose = verbose;
var config = ConfigManager.LoadConfig();
var table = new Table();
table.AddColumn("Setting");
table.AddColumn("Value");
table.AddRow("Groq API Key", string.IsNullOrEmpty(config.GroqApiKey) ? "[red]Not Set[/]" : "[green]Set[/]");
table.AddRow("LLM Model", $"[blue]{config.LlmModel}[/]");
table.AddRow("Whisper Model", $"[blue]{config.WhisperModel}[/]");
table.AddRow("Spoken Language", $"[yellow]{(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}[/]");
table.AddRow("Typing Backend", config.TypingBackend);
table.AddRow("Active Skills", string.Join(", ", config.ActiveSkills));
table.AddRow("Style Mode", config.StyleMode);
table.AddRow("Punctuation Module", config.ModulePunctuation.ToString());
table.AddRow("Technical Sanitization", config.ModuleTechnicalSanitization.ToString());
table.AddRow("Bullet Points", config.StructureBulletPoints.ToString());
table.AddRow("Smart Paragraphing", config.StructureSmartParagraphing.ToString());
AnsiConsole.Write(table);
}
}

159
Commands/ToggleCommand.cs Normal file
View File

@@ -0,0 +1,159 @@
using System;
using System.Diagnostics;
using System.IO;
using System.Threading.Tasks;
using Spectre.Console;
using Toak.Audio;
using Toak.Configuration;
using Toak.Api;
using Toak.Core;
using Toak.IO;
namespace Toak.Commands;
public static class ToggleCommand
{
public static async Task ExecuteAsync(bool pipeToStdout, bool copyToClipboard, bool verbose)
{
Logger.Verbose = verbose;
if (StateTracker.IsRecording())
{
var config = ConfigManager.LoadConfig();
Notifications.PlaySound(config.StopSoundPath);
if (!pipeToStdout) AnsiConsole.MarkupLine("[yellow]Stopping recording and transcribing...[/]");
if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing...");
AudioRecorder.StopRecording();
Logger.LogDebug($"Loaded configuration: LLM={config.LlmModel}, Whisper={config.WhisperModel}, Typing={config.TypingBackend}");
if (string.IsNullOrWhiteSpace(config.GroqApiKey))
{
Notifications.Notify("Toak Error", "Groq API Key is not configured. Run 'toak onboard'.");
AnsiConsole.MarkupLine("[red]Groq API Key is not configured.[/] Run 'toak onboard'.");
return;
}
var groq = new GroqApiClient(config.GroqApiKey);
var wavPath = AudioRecorder.GetWavPath();
if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0)
{
if (!pipeToStdout) Notifications.Notify("Toak", "No audio recorded.");
return;
}
try
{
var stopWatch = Stopwatch.StartNew();
// 1. STT
Logger.LogDebug($"Starting STT transcription via Whisper for {wavPath}...");
string transcript = string.Empty;
if (!pipeToStdout)
{
await AnsiConsole.Status().StartAsync("Transcribing...", async ctx => {
transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
});
}
else
{
transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
}
Logger.LogDebug($"Raw transcript received: '{transcript}'");
if (string.IsNullOrWhiteSpace(transcript))
{
if (!pipeToStdout) Notifications.Notify("Toak", "No speech detected.");
return;
}
// 2. LLM Refinement
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
string systemPrompt;
if (detectedSkill != null)
{
Logger.LogDebug($"Skill detected: {detectedSkill.Name}");
if (!pipeToStdout) Notifications.Notify("Toak Skill Detected", detectedSkill.Name);
systemPrompt = detectedSkill.GetSystemPrompt(transcript);
}
else
{
systemPrompt = PromptBuilder.BuildPrompt(config);
}
bool isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution;
// 3. Output
if (isExecutionSkill || pipeToStdout || copyToClipboard)
{
Logger.LogDebug("Starting LLM text refinement (synchronous)...");
string finalText = string.Empty;
if (!pipeToStdout) {
await AnsiConsole.Status().StartAsync("Refining text...", async ctx => {
finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
});
} else {
finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
}
Logger.LogDebug($"Refined text received: '{finalText}'");
if (string.IsNullOrWhiteSpace(finalText))
{
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
return;
}
if (isExecutionSkill)
{
detectedSkill!.Execute(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
}
else if (pipeToStdout)
{
Console.WriteLine(finalText);
}
else
{
ClipboardManager.Copy(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
}
}
else
{
Logger.LogDebug("Starting LLM text refinement (streaming)...");
var tokenStream = groq.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
Logger.LogDebug("Starting to inject text...");
await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
stopWatch.Stop();
Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
}
}
catch (Exception ex)
{
if (!pipeToStdout) Notifications.Notify("Toak Error", ex.Message);
if (!pipeToStdout) AnsiConsole.MarkupLine($"[red]Error:[/] {ex.Message}");
}
finally
{
if (File.Exists(wavPath)) File.Delete(wavPath);
}
}
else
{
// Start recording
if (!pipeToStdout) AnsiConsole.MarkupLine("[green]Starting recording...[/]");
var config = ConfigManager.LoadConfig();
Notifications.PlaySound(config.StartSoundPath);
AudioRecorder.StartRecording();
}
}
}

View File

@@ -1,383 +1,58 @@
using System.Diagnostics; using System.CommandLine;
using Toak.Audio; using System.Threading.Tasks;
using Toak.Configuration; using Toak.Commands;
using Toak.Api;
using Toak.Core;
using Toak.IO;
bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p"); namespace Toak;
bool copyToClipboard = args.Contains("--copy");
bool verbose = args.Contains("-v") || args.Contains("--verbose");
Logger.Verbose = verbose; public class Program
string command = "";
if (args.Length > 0 && !args[0].StartsWith("-"))
{ {
command = args[0]; public static async Task<int> Main(string[] args)
}
if (args.Contains("-h") || args.Contains("--help") || (string.IsNullOrEmpty(command) && args.Length == 0))
{ {
Console.WriteLine("Toak: High-speed Linux Dictation"); var rootCommand = new RootCommand("Toak: High-speed Linux Dictation");
Console.WriteLine("Usage:");
Console.WriteLine(" toak toggle - Starts or stops the recording");
Console.WriteLine(" toak discard - Abort current recording without transcribing");
Console.WriteLine(" toak onboard - Configure the application");
Console.WriteLine(" toak latency-test - Benchmark full pipeline without recording");
Console.WriteLine(" toak config <key> <value> - Update a specific configuration setting");
Console.WriteLine(" toak show - Show current configuration");
Console.WriteLine("Flags:");
Console.WriteLine(" -h, --help - Show this help message");
Console.WriteLine(" -p, --pipe - Output transcription to stdout instead of typing");
Console.WriteLine(" --copy - Copy to clipboard instead of typing");
Console.WriteLine(" -v, --verbose - Enable detailed debug logging");
return;
}
if (string.IsNullOrEmpty(command)) var pipeOption = new Option<bool>(new[] { "--pipe", "-p" }, "Output transcription to stdout instead of typing");
{ var copyOption = new Option<bool>("--copy", "Copy to clipboard instead of typing");
Console.WriteLine("Error: Please specify a command (e.g. 'toggle'). Use 'toak --help' for usage."); var verboseOption = new Option<bool>(new[] { "--verbose", "-v" }, "Enable detailed debug logging");
return;
}
if (command == "onboard")
{
var config = ConfigManager.LoadConfig();
Console.Write($"Groq API Key [{config.GroqApiKey}]: ");
var key = Console.ReadLine();
if (!string.IsNullOrWhiteSpace(key)) config.GroqApiKey = key;
Console.WriteLine(); rootCommand.AddGlobalOption(verboseOption);
Console.WriteLine("LLM Model:");
Console.WriteLine(" 1) openai/gpt-oss-20b -- fastest");
Console.WriteLine(" 2) llama-3.1-8b-instant -- cheapest, but dumb");
Console.Write($"Select 1 or 2 [{config.LlmModel}]: ");
var llmSelection = Console.ReadLine()?.Trim();
if (llmSelection == "1" || llmSelection == "openai/gpt-oss-20b") config.LlmModel = "openai/gpt-oss-20b";
else if (llmSelection == "2" || llmSelection == "llama-3.1-8b-instant") config.LlmModel = "llama-3.1-8b-instant";
Console.WriteLine(); // Toggle Command
Console.WriteLine("Whisper Model:"); var toggleCmd = new Command("toggle", "Starts or stops the recording");
Console.WriteLine(" 1) whisper-large-v3 -- large model, very accurate"); toggleCmd.AddOption(pipeOption);
Console.WriteLine(" 2) whisper-large-v3-turbo -- very fast, a bit less accurate"); toggleCmd.AddOption(copyOption);
Console.Write($"Select 1 or 2 [{config.WhisperModel}]: "); toggleCmd.SetHandler(ToggleCommand.ExecuteAsync, pipeOption, copyOption, verboseOption);
var whisperSelection = Console.ReadLine()?.Trim(); rootCommand.AddCommand(toggleCmd);
if (whisperSelection == "1" || whisperSelection == "whisper-large-v3") config.WhisperModel = "whisper-large-v3";
else if (whisperSelection == "2" || whisperSelection == "whisper-large-v3-turbo") config.WhisperModel = "whisper-large-v3-turbo";
Console.WriteLine(); // Discard Command
Console.Write($"Microphone Spoken Language (e.g. en, es, zh) [{config.WhisperLanguage}]: "); var discardCmd = new Command("discard", "Abort current recording without transcribing");
var lang = Console.ReadLine(); discardCmd.AddOption(pipeOption);
if (!string.IsNullOrWhiteSpace(lang)) config.WhisperLanguage = lang.ToLowerInvariant(); discardCmd.SetHandler(DiscardCommand.ExecuteAsync, pipeOption, verboseOption);
rootCommand.AddCommand(discardCmd);
Console.Write($"Typing Backend (xdotool or wtype) [{config.TypingBackend}]: "); // Onboard Command
var backend = Console.ReadLine(); var onboardCmd = new Command("onboard", "Configure the application");
if (!string.IsNullOrWhiteSpace(backend)) config.TypingBackend = backend.ToLowerInvariant(); onboardCmd.SetHandler(OnboardCommand.ExecuteAsync, verboseOption);
rootCommand.AddCommand(onboardCmd);
Console.WriteLine(); // Latency Test Command
var availableSkills = Toak.Core.Skills.SkillRegistry.AllSkills.Select(s => s.Name); var latencyCmd = new Command("latency-test", "Benchmark full pipeline without recording");
Console.WriteLine($"Active Skills (comma separated) [{string.Join(", ", config.ActiveSkills)}]:"); latencyCmd.SetHandler(LatencyTestCommand.ExecuteAsync, verboseOption);
Console.WriteLine($" Available: {string.Join(", ", availableSkills)}"); rootCommand.AddCommand(latencyCmd);
Console.Write("Selection: ");
var skillsInput = Console.ReadLine();
if (!string.IsNullOrWhiteSpace(skillsInput))
{
config.ActiveSkills = skillsInput.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList();
}
ConfigManager.SaveConfig(config); // Show Command
Console.WriteLine("Configuration saved."); var showCmd = new Command("show", "Show current configuration");
return; showCmd.SetHandler(ShowCommand.ExecuteAsync, verboseOption);
} rootCommand.AddCommand(showCmd);
if (command == "show") // Config Command
{ var configCmd = new Command("config", "Update a specific configuration setting");
var config = ConfigManager.LoadConfig(); var keyArg = new Argument<string>("key", "Configuration key (e.g., llm, whisper, lang)");
Console.WriteLine("Current Configuration:"); var valArg = new Argument<string>("value", "Configuration value");
Console.WriteLine($" Groq API Key: {(string.IsNullOrEmpty(config.GroqApiKey) ? "Not Set" : "Set")}"); configCmd.AddArgument(keyArg);
Console.WriteLine($" LLM Model: {config.LlmModel}"); configCmd.AddArgument(valArg);
Console.WriteLine($" Whisper Model: {config.WhisperModel}"); configCmd.SetHandler(ConfigUpdaterCommand.ExecuteAsync, keyArg, valArg, verboseOption);
Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}"); rootCommand.AddCommand(configCmd);
Console.WriteLine($" Typing Backend: {config.TypingBackend}");
Console.WriteLine($" Active Skills: {string.Join(", ", config.ActiveSkills)}");
Console.WriteLine($" Style Mode: {config.StyleMode}");
Console.WriteLine($" Punctuation Module: {config.ModulePunctuation}");
Console.WriteLine($" Technical Sanitization: {config.ModuleTechnicalSanitization}");
Console.WriteLine($" Bullet Points: {config.StructureBulletPoints}");
Console.WriteLine($" Smart Paragraphing: {config.StructureSmartParagraphing}");
return;
}
if (command == "config") return await rootCommand.InvokeAsync(args);
{
var argsNoFlags = args.Where(a => !a.StartsWith("--")).ToArray();
if (argsNoFlags.Length < 3)
{
Console.WriteLine("Usage: toak config <key> <value>");
Console.WriteLine("Keys: llm, whisper, style, language, backend, punctuation, tech, bullets, paragraphs");
return;
}
var key = argsNoFlags[1].ToLowerInvariant();
var val = argsNoFlags[2].ToLowerInvariant();
var config = ConfigManager.LoadConfig();
switch (key)
{
case "llm":
config.LlmModel = val;
Console.WriteLine($"LLM Model set to {val}");
break;
case "whisper":
config.WhisperModel = val;
Console.WriteLine($"Whisper Model set to {val}");
break;
case "style":
if (val == "professional" || val == "concise" || val == "casual") {
config.StyleMode = val;
Console.WriteLine($"StyleMode set to {val}");
} else {
Console.WriteLine("Invalid style. Use: professional, concise, casual");
}
break;
case "language":
case "lang":
config.WhisperLanguage = val;
Console.WriteLine($"Spoken Language set to {val}");
break;
case "backend":
config.TypingBackend = val;
Console.WriteLine($"TypingBackend set to {val}");
break;
case "punctuation":
if (bool.TryParse(val, out var p)) { config.ModulePunctuation = p; Console.WriteLine($"Punctuation set to {p}"); }
else Console.WriteLine("Invalid value. Use true or false.");
break;
case "tech":
if (bool.TryParse(val, out var t)) { config.ModuleTechnicalSanitization = t; Console.WriteLine($"TechnicalSanitization set to {t}"); }
else Console.WriteLine("Invalid value. Use true or false.");
break;
case "bullets":
if (bool.TryParse(val, out var b)) { config.StructureBulletPoints = b; Console.WriteLine($"BulletPoints set to {b}"); }
else Console.WriteLine("Invalid value. Use true or false.");
break;
case "paragraphs":
if (bool.TryParse(val, out var sp)) { config.StructureSmartParagraphing = sp; Console.WriteLine($"SmartParagraphing set to {sp}"); }
else Console.WriteLine("Invalid value. Use true or false.");
break;
default:
Console.WriteLine($"Unknown config key: {key}");
return;
}
ConfigManager.SaveConfig(config);
return;
}
if (command == "discard")
{
if (StateTracker.IsRecording())
{
AudioRecorder.StopRecording();
var wavPath = AudioRecorder.GetWavPath();
if (File.Exists(wavPath)) File.Delete(wavPath);
Notifications.Notify("Toak", "Recording discarded");
if (!pipeToStdout) Console.WriteLine("Recording discarded.");
}
else
{
if (!pipeToStdout) Console.WriteLine("No active recording to discard.");
}
return;
}
if (command == "latency-test")
{
var config = ConfigManager.LoadConfig();
if (string.IsNullOrWhiteSpace(config.GroqApiKey))
{
Console.WriteLine("Groq API Key is not configured. Run 'toak onboard'.");
return;
}
Console.WriteLine("Generating 1-second silent audio file for testing...");
var testWavPath = Path.Combine(Path.GetTempPath(), "toak_latency_test.wav");
var pInfo = new ProcessStartInfo
{
FileName = "ffmpeg",
Arguments = $"-f lavfi -i anullsrc=r=44100:cl=mono -t 1 -y {testWavPath}",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardError = true,
RedirectStandardOutput = true
};
var proc = Process.Start(pInfo);
proc?.WaitForExit();
if (!File.Exists(testWavPath))
{
Console.WriteLine("Failed to generate test audio file using ffmpeg.");
return;
}
var groq = new GroqApiClient(config.GroqApiKey);
try
{
Console.WriteLine("Testing STT (Whisper)...");
var sttWatch = Stopwatch.StartNew();
var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel);
sttWatch.Stop();
Console.WriteLine("Testing LLM (Llama)...");
var systemPrompt = PromptBuilder.BuildPrompt(config);
var llmWatch = Stopwatch.StartNew();
var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel);
llmWatch.Stop();
var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds;
Console.WriteLine();
Console.WriteLine($"STT latency: {sttWatch.ElapsedMilliseconds}ms");
Console.WriteLine($"LLM latency: {llmWatch.ElapsedMilliseconds}ms");
Console.WriteLine($"Total: {(total / 1000.0):0.0}s ({total}ms)");
Console.WriteLine($"Status: {(total < 1500 ? "OK (under 1.5s target)" : "SLOW (over 1.5s target)")}");
}
catch (Exception ex)
{
Console.WriteLine($"Error during test: {ex.Message}");
}
finally
{
if (File.Exists(testWavPath)) File.Delete(testWavPath);
}
return;
}
if (command == "toggle")
{
if (StateTracker.IsRecording())
{
var config = ConfigManager.LoadConfig();
Notifications.PlaySound(config.StopSoundPath);
if (!pipeToStdout) Console.WriteLine("Stopping recording and transcribing...");
if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing...");
AudioRecorder.StopRecording();
Logger.LogDebug($"Loaded configuration: LLM={config.LlmModel}, Whisper={config.WhisperModel}, Typing={config.TypingBackend}");
if (string.IsNullOrWhiteSpace(config.GroqApiKey))
{
Notifications.Notify("Toak Error", "Groq API Key is not configured. Run 'toak onboard'.");
return;
}
var groq = new GroqApiClient(config.GroqApiKey);
var wavPath = AudioRecorder.GetWavPath();
if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0)
{
if (!pipeToStdout) Notifications.Notify("Toak", "No audio recorded.");
return;
}
try
{
var stopWatch = Stopwatch.StartNew();
// 1. STT
Logger.LogDebug($"Starting STT transcription via Whisper for {wavPath}...");
var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
Logger.LogDebug($"Raw transcript received: '{transcript}'");
if (string.IsNullOrWhiteSpace(transcript))
{
if (!pipeToStdout) Notifications.Notify("Toak", "No speech detected.");
return;
}
// 2. LLM Refinement
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
string systemPrompt;
if (detectedSkill != null)
{
Logger.LogDebug($"Skill detected: {detectedSkill.Name}");
if (!pipeToStdout) Notifications.Notify("Toak Skill Detected", detectedSkill.Name);
systemPrompt = detectedSkill.GetSystemPrompt(transcript);
}
else
{
systemPrompt = PromptBuilder.BuildPrompt(config);
}
// 3. Output
if (detectedSkill != null && detectedSkill.HandlesExecution)
{
Logger.LogDebug($"Executing skill synchronously: {detectedSkill.Name}");
string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
Logger.LogDebug($"Skill refined text: '{finalText}'");
if (string.IsNullOrWhiteSpace(finalText))
{
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
return;
}
detectedSkill.Execute(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
}
else if (pipeToStdout || copyToClipboard)
{
Logger.LogDebug("Starting LLM text refinement (synchronous)...");
string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
Logger.LogDebug($"Refined text received: '{finalText}'");
if (string.IsNullOrWhiteSpace(finalText))
{
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
return;
}
if (pipeToStdout)
{
Console.WriteLine(finalText);
}
else
{
ClipboardManager.Copy(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
}
}
else
{
Logger.LogDebug("Starting LLM text refinement (streaming)...");
var tokenStream = groq.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
Logger.LogDebug("Starting to inject text...");
await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
stopWatch.Stop();
Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
}
}
catch (Exception ex)
{
if (!pipeToStdout) Notifications.Notify("Toak Error", ex.Message);
if (!pipeToStdout) Console.WriteLine(ex.ToString());
}
finally
{
if (File.Exists(wavPath)) File.Delete(wavPath);
}
}
else
{
// Start recording
if (!pipeToStdout) Console.WriteLine("Starting recording...");
var config = ConfigManager.LoadConfig();
Notifications.PlaySound(config.StartSoundPath);
AudioRecorder.StartRecording();
} }
} }

View File

@@ -12,4 +12,9 @@
<EmbeddedResource Include="Assets\Audio\**" /> <EmbeddedResource Include="Assets\Audio\**" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<PackageReference Include="Spectre.Console" Version="0.54.0" />
<PackageReference Include="System.CommandLine" Version="2.0.0-beta4.22272.1" />
</ItemGroup>
</Project> </Project>