1
0

initial commit

This commit is contained in:
2026-02-25 21:51:27 +01:00
commit 863063f124
15 changed files with 1330 additions and 0 deletions

299
Program.cs Normal file
View File

@@ -0,0 +1,299 @@
using System.Diagnostics;
using Toak;
bool pipeToStdout = args.Contains("--pipe") || Console.IsOutputRedirected;
bool rawOutput = args.Contains("--raw");
bool copyToClipboard = args.Contains("--copy");
string translateTo = "";
int translateIndex = Array.IndexOf(args, "--translate");
if (translateIndex >= 0 && translateIndex < args.Length - 1)
{
translateTo = args[translateIndex + 1];
}
string command = args.FirstOrDefault(a => !a.StartsWith("--")) ?? "";
if (string.IsNullOrEmpty(command) && args.Length == 0)
{
Console.WriteLine("Toak: High-speed Linux Dictation");
Console.WriteLine("Usage:");
Console.WriteLine(" toak toggle - Starts or stops the recording");
Console.WriteLine(" toak discard - Abort current recording without transcribing");
Console.WriteLine(" toak onboard - Configure the application");
Console.WriteLine(" toak latency-test - Benchmark full pipeline without recording");
Console.WriteLine(" toak config <key> <value> - Update a specific configuration setting");
Console.WriteLine(" toak show - Show current configuration");
Console.WriteLine("Flags:");
Console.WriteLine(" --pipe - Output transcription to stdout instead of typing");
Console.WriteLine(" --raw - Skip LLM refinement, output raw transcript");
Console.WriteLine(" --copy - Copy to clipboard instead of typing");
Console.WriteLine(" --translate <lang> - Translate output to the specified language");
return;
}
if (string.IsNullOrEmpty(command))
{
command = "toggle";
}
if (command == "onboard")
{
var config = ConfigManager.LoadConfig();
Console.Write($"Groq API Key [{config.GroqApiKey}]: ");
var key = Console.ReadLine();
if (!string.IsNullOrWhiteSpace(key)) config.GroqApiKey = key;
Console.Write($"Microphone Spoken Language (e.g. en, es, zh) [{config.WhisperLanguage}]: ");
var lang = Console.ReadLine();
if (!string.IsNullOrWhiteSpace(lang)) config.WhisperLanguage = lang.ToLowerInvariant();
Console.Write($"Typing Backend (xdotool or wtype) [{config.TypingBackend}]: ");
var backend = Console.ReadLine();
if (!string.IsNullOrWhiteSpace(backend)) config.TypingBackend = backend.ToLowerInvariant();
ConfigManager.SaveConfig(config);
Console.WriteLine("Configuration saved.");
return;
}
if (command == "show")
{
var config = ConfigManager.LoadConfig();
Console.WriteLine("Current Configuration:");
Console.WriteLine($" Groq API Key: {(string.IsNullOrEmpty(config.GroqApiKey) ? "Not Set" : "Set")}");
Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}");
Console.WriteLine($" Typing Backend: {config.TypingBackend}");
Console.WriteLine($" Style Mode: {config.StyleMode}");
Console.WriteLine($" Punctuation Module: {config.ModulePunctuation}");
Console.WriteLine($" Technical Sanitization: {config.ModuleTechnicalSanitization}");
Console.WriteLine($" Bullet Points: {config.StructureBulletPoints}");
Console.WriteLine($" Smart Paragraphing: {config.StructureSmartParagraphing}");
return;
}
if (command == "config")
{
var argsNoFlags = args.Where(a => !a.StartsWith("--")).ToArray();
if (argsNoFlags.Length < 3)
{
Console.WriteLine("Usage: toak config <key> <value>");
Console.WriteLine("Keys: style, backend, punctuation, tech, bullets, paragraphs");
return;
}
var key = argsNoFlags[1].ToLowerInvariant();
var val = argsNoFlags[2].ToLowerInvariant();
var config = ConfigManager.LoadConfig();
switch (key)
{
case "style":
if (val == "professional" || val == "concise" || val == "casual") {
config.StyleMode = val;
Console.WriteLine($"StyleMode set to {val}");
} else {
Console.WriteLine("Invalid style. Use: professional, concise, casual");
}
break;
case "language":
case "lang":
config.WhisperLanguage = val;
Console.WriteLine($"Spoken Language set to {val}");
break;
case "backend":
config.TypingBackend = val;
Console.WriteLine($"TypingBackend set to {val}");
break;
case "punctuation":
if (bool.TryParse(val, out var p)) { config.ModulePunctuation = p; Console.WriteLine($"Punctuation set to {p}"); }
else Console.WriteLine("Invalid value. Use true or false.");
break;
case "tech":
if (bool.TryParse(val, out var t)) { config.ModuleTechnicalSanitization = t; Console.WriteLine($"TechnicalSanitization set to {t}"); }
else Console.WriteLine("Invalid value. Use true or false.");
break;
case "bullets":
if (bool.TryParse(val, out var b)) { config.StructureBulletPoints = b; Console.WriteLine($"BulletPoints set to {b}"); }
else Console.WriteLine("Invalid value. Use true or false.");
break;
case "paragraphs":
if (bool.TryParse(val, out var sp)) { config.StructureSmartParagraphing = sp; Console.WriteLine($"SmartParagraphing set to {sp}"); }
else Console.WriteLine("Invalid value. Use true or false.");
break;
default:
Console.WriteLine($"Unknown config key: {key}");
return;
}
ConfigManager.SaveConfig(config);
return;
}
if (command == "discard")
{
if (StateTracker.IsRecording())
{
AudioRecorder.StopRecording();
var wavPath = AudioRecorder.GetWavPath();
if (File.Exists(wavPath)) File.Delete(wavPath);
Notifications.Notify("Toak", "Recording discarded");
if (!pipeToStdout) Console.WriteLine("Recording discarded.");
}
else
{
if (!pipeToStdout) Console.WriteLine("No active recording to discard.");
}
return;
}
if (command == "latency-test")
{
var config = ConfigManager.LoadConfig();
if (string.IsNullOrWhiteSpace(config.GroqApiKey))
{
Console.WriteLine("Groq API Key is not configured. Run 'toak onboard'.");
return;
}
Console.WriteLine("Generating 1-second silent audio file for testing...");
var testWavPath = Path.Combine(Path.GetTempPath(), "toak_latency_test.wav");
var pInfo = new ProcessStartInfo
{
FileName = "ffmpeg",
Arguments = $"-f lavfi -i anullsrc=r=44100:cl=mono -t 1 -y {testWavPath}",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardError = true,
RedirectStandardOutput = true
};
var proc = Process.Start(pInfo);
proc?.WaitForExit();
if (!File.Exists(testWavPath))
{
Console.WriteLine("Failed to generate test audio file using ffmpeg.");
return;
}
var groq = new GroqApiClient(config.GroqApiKey);
try
{
Console.WriteLine("Testing STT (Whisper)...");
var sttWatch = Stopwatch.StartNew();
var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage);
sttWatch.Stop();
Console.WriteLine("Testing LLM (Llama)...");
var systemPrompt = PromptBuilder.BuildPrompt(config);
var llmWatch = Stopwatch.StartNew();
var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt);
llmWatch.Stop();
var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds;
Console.WriteLine();
Console.WriteLine($"STT latency: {sttWatch.ElapsedMilliseconds}ms");
Console.WriteLine($"LLM latency: {llmWatch.ElapsedMilliseconds}ms");
Console.WriteLine($"Total: {(total / 1000.0):0.0}s ({total}ms)");
Console.WriteLine($"Status: {(total < 1500 ? "OK (under 1.5s target)" : "SLOW (over 1.5s target)")}");
}
catch (Exception ex)
{
Console.WriteLine($"Error during test: {ex.Message}");
}
finally
{
if (File.Exists(testWavPath)) File.Delete(testWavPath);
}
return;
}
if (command == "toggle")
{
if (StateTracker.IsRecording())
{
if (!pipeToStdout) Console.WriteLine("Stopping recording and transcribing...");
if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing...");
AudioRecorder.StopRecording();
var config = ConfigManager.LoadConfig();
if (!string.IsNullOrWhiteSpace(translateTo))
{
config.TargetLanguage = translateTo;
}
if (string.IsNullOrWhiteSpace(config.GroqApiKey))
{
Notifications.Notify("Toak Error", "Groq API Key is not configured. Run 'toak onboard'.");
return;
}
var groq = new GroqApiClient(config.GroqApiKey);
var wavPath = AudioRecorder.GetWavPath();
if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0)
{
if (!pipeToStdout) Notifications.Notify("Toak", "No audio recorded.");
return;
}
try
{
var stopWatch = Stopwatch.StartNew();
// 1. STT
var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage);
if (string.IsNullOrWhiteSpace(transcript))
{
if (!pipeToStdout) Notifications.Notify("Toak", "Could not transcribe audio.");
return;
}
string finalText = transcript;
// 2. LLM Refinement
if (!rawOutput)
{
var systemPrompt = PromptBuilder.BuildPrompt(config);
finalText = await groq.RefineTextAsync(transcript, systemPrompt);
}
// 3. Output
if (pipeToStdout)
{
Console.WriteLine(finalText);
}
else if (copyToClipboard)
{
ClipboardManager.Copy(finalText);
stopWatch.Stop();
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
}
else
{
TextInjector.Inject(finalText, config.TypingBackend);
stopWatch.Stop();
Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
}
}
catch (Exception ex)
{
if (!pipeToStdout) Notifications.Notify("Toak Error", ex.Message);
if (!pipeToStdout) Console.WriteLine(ex.ToString());
}
finally
{
if (File.Exists(wavPath)) File.Delete(wavPath);
}
}
else
{
// Start recording
if (!pipeToStdout) Console.WriteLine("Starting recording...");
AudioRecorder.StartRecording();
}
}