feat: Implement a modular skill system with hotword detection, streaming text output, and enhanced logging.
This commit is contained in:
@@ -4,6 +4,7 @@ using System.Text.Json.Serialization;
|
||||
|
||||
using Toak.Api.Models;
|
||||
using Toak.Serialization;
|
||||
using Toak.Core;
|
||||
|
||||
namespace Toak.Api;
|
||||
|
||||
@@ -39,7 +40,9 @@ public class GroqApiClient
|
||||
content.Add(new StringContent(firstLang), "language");
|
||||
}
|
||||
|
||||
Logger.LogDebug($"Sending Whisper API request ({modelToUse})...");
|
||||
var response = await _httpClient.PostAsync("audio/transcriptions", content);
|
||||
Logger.LogDebug($"Whisper API response status: {response.StatusCode}");
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
@@ -67,7 +70,9 @@ public class GroqApiClient
|
||||
|
||||
var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json");
|
||||
|
||||
Logger.LogDebug($"Sending Llama API request (model: {requestBody.Model})...");
|
||||
var response = await _httpClient.PostAsync("chat/completions", jsonContent);
|
||||
Logger.LogDebug($"Llama API response status: {response.StatusCode}");
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
@@ -80,4 +85,55 @@ public class GroqApiClient
|
||||
|
||||
return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty;
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<string> RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = "openai/gpt-oss-20b")
|
||||
{
|
||||
var requestBody = new LlamaRequest
|
||||
{
|
||||
Model = string.IsNullOrWhiteSpace(model) ? "openai/gpt-oss-20b" : model,
|
||||
Temperature = 0.0,
|
||||
Stream = true,
|
||||
Messages = new[]
|
||||
{
|
||||
new LlamaRequestMessage { Role = "system", Content = systemPrompt },
|
||||
new LlamaRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" }
|
||||
}
|
||||
};
|
||||
|
||||
var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json");
|
||||
|
||||
using var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions") { Content = jsonContent };
|
||||
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/event-stream"));
|
||||
|
||||
Logger.LogDebug($"Sending Llama Steam API request (model: {requestBody.Model})...");
|
||||
using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
|
||||
Logger.LogDebug($"Llama Stream API response status: {response.StatusCode}");
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var error = await response.Content.ReadAsStringAsync();
|
||||
throw new Exception($"Llama API Error: {response.StatusCode} - {error}");
|
||||
}
|
||||
|
||||
using var stream = await response.Content.ReadAsStreamAsync();
|
||||
using var reader = new StreamReader(stream);
|
||||
|
||||
string? line;
|
||||
while ((line = await reader.ReadLineAsync()) != null)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(line)) continue;
|
||||
if (line.StartsWith("data: "))
|
||||
{
|
||||
var data = line.Substring("data: ".Length).Trim();
|
||||
if (data == "[DONE]") break;
|
||||
|
||||
var chunk = JsonSerializer.Deserialize(data, AppJsonSerializerContext.Default.LlamaStreamResponse);
|
||||
var content = chunk?.Choices?.FirstOrDefault()?.Delta?.Content;
|
||||
if (!string.IsNullOrEmpty(content))
|
||||
{
|
||||
yield return content;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,8 @@ public class LlamaRequest
|
||||
public LlamaRequestMessage[] Messages { get; set; } = Array.Empty<LlamaRequestMessage>();
|
||||
[JsonPropertyName("temperature")]
|
||||
public double Temperature { get; set; } = 0.0;
|
||||
[JsonPropertyName("stream")]
|
||||
public bool? Stream { get; set; }
|
||||
}
|
||||
|
||||
public class LlamaResponse
|
||||
@@ -31,3 +33,22 @@ public class LlamaChoice
|
||||
[JsonPropertyName("message")]
|
||||
public LlamaRequestMessage Message { get; set; } = new();
|
||||
}
|
||||
|
||||
public class LlamaStreamResponse
|
||||
{
|
||||
[JsonPropertyName("choices")]
|
||||
public LlamaStreamChoice[] Choices { get; set; } = Array.Empty<LlamaStreamChoice>();
|
||||
}
|
||||
|
||||
public class LlamaStreamChoice
|
||||
{
|
||||
[JsonPropertyName("delta")]
|
||||
public LlamaStreamDelta Delta { get; set; } = new();
|
||||
}
|
||||
|
||||
public class LlamaStreamDelta
|
||||
{
|
||||
[JsonPropertyName("content")]
|
||||
public string? Content { get; set; }
|
||||
}
|
||||
|
||||
|
||||
BIN
Assets/Audio/beep.wav
Normal file
BIN
Assets/Audio/beep.wav
Normal file
Binary file not shown.
@@ -15,9 +15,12 @@ public static class AudioRecorder
|
||||
{
|
||||
if (File.Exists(WavPath))
|
||||
{
|
||||
Logger.LogDebug($"Deleting old audio file: {WavPath}");
|
||||
File.Delete(WavPath);
|
||||
}
|
||||
|
||||
Logger.LogDebug("Starting ffmpeg to record audio...");
|
||||
|
||||
var pInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "ffmpeg",
|
||||
@@ -41,6 +44,7 @@ public static class AudioRecorder
|
||||
var pid = StateTracker.GetRecordingPid();
|
||||
if (pid.HasValue)
|
||||
{
|
||||
Logger.LogDebug($"Found active recording process with PID {pid.Value}. Attempting to stop...");
|
||||
try
|
||||
{
|
||||
var process = Process.GetProcessById(pid.Value);
|
||||
|
||||
@@ -13,4 +13,7 @@ public class ToakConfig
|
||||
public string WhisperLanguage { get; set; } = string.Empty;
|
||||
public string LlmModel { get; set; } = "openai/gpt-oss-20b";
|
||||
public string WhisperModel { get; set; } = "whisper-large-v3-turbo";
|
||||
public string StartSoundPath { get; set; } = "Assets/Audio/beep.wav";
|
||||
public string StopSoundPath { get; set; } = "Assets/Audio/beep.wav";
|
||||
public List<string> ActiveSkills { get; set; } = new List<string> { "Terminal", "Translate" };
|
||||
}
|
||||
|
||||
15
Core/Logger.cs
Normal file
15
Core/Logger.cs
Normal file
@@ -0,0 +1,15 @@
|
||||
namespace Toak.Core;
|
||||
|
||||
public static class Logger
|
||||
{
|
||||
public static bool Verbose { get; set; } = false;
|
||||
|
||||
public static void LogDebug(string message)
|
||||
{
|
||||
if (Verbose)
|
||||
{
|
||||
var logLine = $"[DEBUG] {DateTime.Now:HH:mm:ss.fff} - {message}";
|
||||
Console.WriteLine(logLine);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -23,6 +23,7 @@ public static class PromptBuilder
|
||||
sb.AppendLine();
|
||||
sb.AppendLine("FORMATTING RULES:");
|
||||
sb.AppendLine("- CRITICAL: If the <transcript> contains nothing, or very short gibberish, output NOTHING AT ALL (an empty string).");
|
||||
sb.AppendLine("- LANGUAGE DETECT: The transcript may be in English or a different language (e.g., Hungarian, Spanish). Detect the language and ensure your output and grammar corrections are STRICTLY in that same language.");
|
||||
|
||||
|
||||
|
||||
|
||||
13
Core/Skills/ISkill.cs
Normal file
13
Core/Skills/ISkill.cs
Normal file
@@ -0,0 +1,13 @@
|
||||
namespace Toak.Core.Skills;
|
||||
|
||||
public interface ISkill
|
||||
{
|
||||
string Name { get; }
|
||||
string Description { get; }
|
||||
string[] Hotwords { get; }
|
||||
|
||||
bool HandlesExecution { get; }
|
||||
|
||||
string GetSystemPrompt(string rawTranscript);
|
||||
void Execute(string llmResult);
|
||||
}
|
||||
29
Core/Skills/SkillRegistry.cs
Normal file
29
Core/Skills/SkillRegistry.cs
Normal file
@@ -0,0 +1,29 @@
|
||||
namespace Toak.Core.Skills;
|
||||
|
||||
public static class SkillRegistry
|
||||
{
|
||||
public static readonly ISkill[] AllSkills = new ISkill[]
|
||||
{
|
||||
new TerminalSkill(),
|
||||
new TranslateSkill()
|
||||
};
|
||||
|
||||
public static ISkill? DetectSkill(string transcript, IEnumerable<string> activeSkillNames)
|
||||
{
|
||||
var activeSkills = AllSkills.Where(s => activeSkillNames.Contains(s.Name, StringComparer.OrdinalIgnoreCase)).ToList();
|
||||
|
||||
string normalizedTranscript = transcript.Trim();
|
||||
|
||||
foreach (var skill in activeSkills)
|
||||
{
|
||||
foreach (var hotword in skill.Hotwords)
|
||||
{
|
||||
if (normalizedTranscript.StartsWith(hotword, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return skill;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
41
Core/Skills/TerminalSkill.cs
Normal file
41
Core/Skills/TerminalSkill.cs
Normal file
@@ -0,0 +1,41 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Toak.Core.Skills;
|
||||
|
||||
public class TerminalSkill : ISkill
|
||||
{
|
||||
public string Name => "Terminal";
|
||||
public string Description => "Translates an intent into a bash command and runs it in the background.";
|
||||
public string[] Hotwords => new[] { "System terminal", "System command" };
|
||||
|
||||
public bool HandlesExecution => true;
|
||||
|
||||
public string GetSystemPrompt(string rawTranscript)
|
||||
{
|
||||
return @"You are a command-line assistant. The user will ask you to perform a task.
|
||||
Translate the request into a single bash command.
|
||||
Output ONLY the raw bash command to achieve this task. Do not include markdown formatting, backticks, or explanations.";
|
||||
}
|
||||
|
||||
public void Execute(string llmResult)
|
||||
{
|
||||
try
|
||||
{
|
||||
Console.WriteLine($"[TerminalSkill] Executing: {llmResult}");
|
||||
var escapedCmd = llmResult.Replace("\"", "\\\"");
|
||||
var pInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "bash",
|
||||
Arguments = $"-c \"{escapedCmd}\"",
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
Process.Start(pInfo);
|
||||
IO.Notifications.Notify("Toak Terminal Executed", llmResult);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"[TerminalSkill Error] {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
23
Core/Skills/TranslateSkill.cs
Normal file
23
Core/Skills/TranslateSkill.cs
Normal file
@@ -0,0 +1,23 @@
|
||||
namespace Toak.Core.Skills;
|
||||
|
||||
public class TranslateSkill : ISkill
|
||||
{
|
||||
public string Name => "Translate";
|
||||
public string Description => "Translates the spoken text into another language on the fly.";
|
||||
public string[] Hotwords => new[] { "System translate to", "System translate into" };
|
||||
|
||||
public bool HandlesExecution => false;
|
||||
|
||||
public string GetSystemPrompt(string rawTranscript)
|
||||
{
|
||||
return @"You are an expert translator. The user wants to translate the following text.
|
||||
The first few words identify the target language (e.g. 'Translate to Spanish:', 'Translate into Hungarian:').
|
||||
Translate the REST of the transcript into that target language.
|
||||
Output ONLY the final translated text. Do not include markdown, explanations, or quotes.";
|
||||
}
|
||||
|
||||
public void Execute(string llmResult)
|
||||
{
|
||||
// Not used since HandlesExecution is false
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ public static class StateTracker
|
||||
|
||||
public static void SetRecording(int ffmpegPid)
|
||||
{
|
||||
Logger.LogDebug($"Setting recording state with PID {ffmpegPid}");
|
||||
File.WriteAllText(StateFilePath, ffmpegPid.ToString());
|
||||
}
|
||||
|
||||
@@ -21,6 +22,7 @@ public static class StateTracker
|
||||
var content = File.ReadAllText(StateFilePath).Trim();
|
||||
if (int.TryParse(content, out var pid))
|
||||
{
|
||||
Logger.LogDebug($"Read recording PID {pid} from state file");
|
||||
return pid;
|
||||
}
|
||||
}
|
||||
@@ -31,6 +33,7 @@ public static class StateTracker
|
||||
{
|
||||
if (File.Exists(StateFilePath))
|
||||
{
|
||||
Logger.LogDebug("Clearing recording state file");
|
||||
File.Delete(StateFilePath);
|
||||
}
|
||||
}
|
||||
|
||||
55
IDEAS.md
55
IDEAS.md
@@ -197,6 +197,61 @@ toak status # Check if daemon is running
|
||||
|
||||
---
|
||||
|
||||
## Future Innovations
|
||||
|
||||
### Hotword Commands (LLM Routing)
|
||||
Instruct the LLM in `PromptBuilder` to output a specific JSON structure if given a command phrase. If a specific hotword like "System command" or "Computer dictate" is detected at the start of the audio, Toak parses the JSON, skips typng out via `xdotool`/`wtype`, and instead executes a pre-defined background action.
|
||||
|
||||
If it doesn't hear a command phrase, it simply returns the text normally and types it.
|
||||
|
||||
**How it works (Under the Hood):**
|
||||
The LLM is prompted to always return JSON in the background when a command is directed at the assistant.
|
||||
```json
|
||||
{
|
||||
"is_command": true,
|
||||
"action": "append_to_notes",
|
||||
"content": "Buy milk and eggs",
|
||||
"meta": {}
|
||||
}
|
||||
```
|
||||
|
||||
**Alternative Hotword Ideas:**
|
||||
Since "Toak" is not a real English word, Whisper might transcribe it as "talk", "toke", or "oak." It is highly recommended to use distinct, phonetically clear hotwords such as:
|
||||
- **"System..."** (e.g. "System note:")
|
||||
- **"Computer..."** (e.g. "Computer search:")
|
||||
- **"Action..."** (e.g. "Action commit:")
|
||||
- **"Dictate..."** (e.g. "Dictate terminal:")
|
||||
- **"Listen up..."** (e.g. "Listen up translate...")
|
||||
|
||||
**Prompt Ideas & Use Cases:**
|
||||
|
||||
1. **Quick Notes / Brainstorming:**
|
||||
- *Hotword:* `"System note:"` or `"Drop this in my notes:"`
|
||||
- *Action:* Appends the spoken text to a configured `~/notes.md` file in the background without interrupting your current window.
|
||||
- *Example:* "System note: I need to remember to check the database migrations later today."
|
||||
|
||||
2. **Terminal / CLI Execution:**
|
||||
- *Hotword:* `"Computer terminal:"` or `"Command:"`
|
||||
- *Action:* Takes the natural language command, asks the LLM to translate it into a bash command, and types it into a new tmux window or background process.
|
||||
- *Example:* "Computer terminal: find all python files modified in the last 2 days."
|
||||
|
||||
3. **Git Commit Messages:**
|
||||
- *Hotword:* `"Action commit:"`
|
||||
- *Action:* Automatically formats the dictated text into a standard conventional commit message, stages all files, and commits them.
|
||||
- *Example:* "Action commit: I refactored the audio recorder to use native processes instead of the old library." -> LLM outputs `refactor(audio): migrate to native processes` and runs `git commit -am "..."`.
|
||||
|
||||
4. **Web Search / Lookup:**
|
||||
- *Hotword:* `"System search:"` or `"Look up:"`
|
||||
- *Action:* Opens your default browser and performs a search for the spoken phrase.
|
||||
- *Example:* "System search: MDN documentation for grid layout."
|
||||
|
||||
5. **Translating on the fly:**
|
||||
- *Hotword:* `"Translate to Spanish:"`
|
||||
- *Action:* Instead of typing English, it types the translated version of the rest of the sentence.
|
||||
- *Example:* "Translate to Spanish: Hello, how are you today?" -> Types out `Hola, ¿cómo estás hoy?`.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Priority
|
||||
|
||||
### Tier 1: High Impact, Low Effort
|
||||
|
||||
@@ -22,4 +22,47 @@ public static class Notifications
|
||||
Console.WriteLine($"[Notifications] Failed to send notification: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
public static void PlaySound(string soundPath)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(soundPath)) return;
|
||||
try
|
||||
{
|
||||
var absolutePath = soundPath;
|
||||
if (!Path.IsPathRooted(absolutePath))
|
||||
absolutePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, absolutePath);
|
||||
|
||||
if (!File.Exists(absolutePath))
|
||||
{
|
||||
var resourceName = "Toak." + soundPath.Replace("/", ".").Replace("\\", ".");
|
||||
using var stream = System.Reflection.Assembly.GetExecutingAssembly().GetManifestResourceStream(resourceName);
|
||||
if (stream != null)
|
||||
{
|
||||
absolutePath = Path.Combine(Path.GetTempPath(), "toak_" + Path.GetFileName(soundPath));
|
||||
if (!File.Exists(absolutePath))
|
||||
{
|
||||
using var fileStream = File.Create(absolutePath);
|
||||
stream.CopyTo(fileStream);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
var pInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "paplay",
|
||||
Arguments = $"\"{absolutePath}\"",
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
Process.Start(pInfo);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"[Notifications] Failed to play sound: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
using Toak.Core;
|
||||
|
||||
namespace Toak.IO;
|
||||
|
||||
public static class TextInjector
|
||||
{
|
||||
public static void Inject(string text, string backend)
|
||||
{
|
||||
Logger.LogDebug($"Injecting text: '{text}' with {backend}");
|
||||
if (string.IsNullOrWhiteSpace(text)) return;
|
||||
|
||||
try
|
||||
@@ -13,6 +16,7 @@ public static class TextInjector
|
||||
ProcessStartInfo pInfo;
|
||||
if (backend.ToLowerInvariant() == "wtype")
|
||||
{
|
||||
Logger.LogDebug($"Injecting text using wtype...");
|
||||
pInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "wtype",
|
||||
@@ -23,6 +27,7 @@ public static class TextInjector
|
||||
}
|
||||
else // xdotool
|
||||
{
|
||||
Logger.LogDebug($"Injecting text using xdotool...");
|
||||
pInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "xdotool",
|
||||
@@ -40,4 +45,57 @@ public static class TextInjector
|
||||
Notifications.Notify("Injection Error", "Could not type text into window.");
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task InjectStreamAsync(IAsyncEnumerable<string> tokenStream, string backend)
|
||||
{
|
||||
try
|
||||
{
|
||||
ProcessStartInfo pInfo;
|
||||
if (backend.ToLowerInvariant() == "wtype")
|
||||
{
|
||||
Logger.LogDebug($"Setting up stream injection using wtype...");
|
||||
pInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "wtype",
|
||||
Arguments = "-",
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
RedirectStandardInput = true
|
||||
};
|
||||
}
|
||||
else // xdotool
|
||||
{
|
||||
Logger.LogDebug($"Setting up stream injection using xdotool...");
|
||||
pInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "xdotool",
|
||||
Arguments = "type --clearmodifiers --delay 0 --file -",
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
RedirectStandardInput = true
|
||||
};
|
||||
}
|
||||
|
||||
using var process = Process.Start(pInfo);
|
||||
if (process == null) return;
|
||||
|
||||
Logger.LogDebug("Started stream injection process, waiting for tokens...");
|
||||
|
||||
await foreach (var token in tokenStream)
|
||||
{
|
||||
Logger.LogDebug($"Injecting token: '{token}'");
|
||||
await process.StandardInput.WriteAsync(token);
|
||||
await process.StandardInput.FlushAsync();
|
||||
}
|
||||
|
||||
Logger.LogDebug("Stream injection complete. Closing standard input.");
|
||||
process.StandardInput.Close();
|
||||
await process.WaitForExitAsync();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"[TextInjector] Error injecting text stream: {ex.Message}");
|
||||
Notifications.Notify("Injection Error", "Could not type text stream into window.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
73
Program.cs
73
Program.cs
@@ -5,8 +5,11 @@ using Toak.Api;
|
||||
using Toak.Core;
|
||||
using Toak.IO;
|
||||
|
||||
bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p") || Console.IsOutputRedirected;
|
||||
bool pipeToStdout = args.Contains("--pipe") || args.Contains("-p");
|
||||
bool copyToClipboard = args.Contains("--copy");
|
||||
bool verbose = args.Contains("-v") || args.Contains("--verbose");
|
||||
|
||||
Logger.Verbose = verbose;
|
||||
|
||||
|
||||
string command = "";
|
||||
@@ -29,6 +32,7 @@ if (args.Contains("-h") || args.Contains("--help") || (string.IsNullOrEmpty(comm
|
||||
Console.WriteLine(" -h, --help - Show this help message");
|
||||
Console.WriteLine(" -p, --pipe - Output transcription to stdout instead of typing");
|
||||
Console.WriteLine(" --copy - Copy to clipboard instead of typing");
|
||||
Console.WriteLine(" -v, --verbose - Enable detailed debug logging");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -71,6 +75,17 @@ if (command == "onboard")
|
||||
var backend = Console.ReadLine();
|
||||
if (!string.IsNullOrWhiteSpace(backend)) config.TypingBackend = backend.ToLowerInvariant();
|
||||
|
||||
Console.WriteLine();
|
||||
var availableSkills = Toak.Core.Skills.SkillRegistry.AllSkills.Select(s => s.Name);
|
||||
Console.WriteLine($"Active Skills (comma separated) [{string.Join(", ", config.ActiveSkills)}]:");
|
||||
Console.WriteLine($" Available: {string.Join(", ", availableSkills)}");
|
||||
Console.Write("Selection: ");
|
||||
var skillsInput = Console.ReadLine();
|
||||
if (!string.IsNullOrWhiteSpace(skillsInput))
|
||||
{
|
||||
config.ActiveSkills = skillsInput.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList();
|
||||
}
|
||||
|
||||
ConfigManager.SaveConfig(config);
|
||||
Console.WriteLine("Configuration saved.");
|
||||
return;
|
||||
@@ -85,6 +100,7 @@ if (command == "show")
|
||||
Console.WriteLine($" Whisper Model: {config.WhisperModel}");
|
||||
Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}");
|
||||
Console.WriteLine($" Typing Backend: {config.TypingBackend}");
|
||||
Console.WriteLine($" Active Skills: {string.Join(", ", config.ActiveSkills)}");
|
||||
Console.WriteLine($" Style Mode: {config.StyleMode}");
|
||||
Console.WriteLine($" Punctuation Module: {config.ModulePunctuation}");
|
||||
Console.WriteLine($" Technical Sanitization: {config.ModuleTechnicalSanitization}");
|
||||
@@ -244,12 +260,15 @@ if (command == "toggle")
|
||||
{
|
||||
if (StateTracker.IsRecording())
|
||||
{
|
||||
var config = ConfigManager.LoadConfig();
|
||||
Notifications.PlaySound(config.StopSoundPath);
|
||||
|
||||
if (!pipeToStdout) Console.WriteLine("Stopping recording and transcribing...");
|
||||
if (!pipeToStdout) Notifications.Notify("Toak", "Transcribing...");
|
||||
|
||||
AudioRecorder.StopRecording();
|
||||
|
||||
var config = ConfigManager.LoadConfig();
|
||||
Logger.LogDebug($"Loaded configuration: LLM={config.LlmModel}, Whisper={config.WhisperModel}, Typing={config.TypingBackend}");
|
||||
|
||||
if (string.IsNullOrWhiteSpace(config.GroqApiKey))
|
||||
{
|
||||
@@ -271,7 +290,9 @@ if (command == "toggle")
|
||||
var stopWatch = Stopwatch.StartNew();
|
||||
|
||||
// 1. STT
|
||||
Logger.LogDebug($"Starting STT transcription via Whisper for {wavPath}...");
|
||||
var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
|
||||
Logger.LogDebug($"Raw transcript received: '{transcript}'");
|
||||
|
||||
if (string.IsNullOrWhiteSpace(transcript))
|
||||
{
|
||||
@@ -279,32 +300,64 @@ if (command == "toggle")
|
||||
return;
|
||||
}
|
||||
|
||||
string finalText = transcript;
|
||||
|
||||
// 2. LLM Refinement
|
||||
var systemPrompt = PromptBuilder.BuildPrompt(config);
|
||||
finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
|
||||
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
|
||||
string systemPrompt;
|
||||
if (detectedSkill != null)
|
||||
{
|
||||
Logger.LogDebug($"Skill detected: {detectedSkill.Name}");
|
||||
if (!pipeToStdout) Notifications.Notify("Toak Skill Detected", detectedSkill.Name);
|
||||
systemPrompt = detectedSkill.GetSystemPrompt(transcript);
|
||||
}
|
||||
else
|
||||
{
|
||||
systemPrompt = PromptBuilder.BuildPrompt(config);
|
||||
}
|
||||
|
||||
// 3. Output
|
||||
if (detectedSkill != null && detectedSkill.HandlesExecution)
|
||||
{
|
||||
Logger.LogDebug($"Executing skill synchronously: {detectedSkill.Name}");
|
||||
string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
|
||||
Logger.LogDebug($"Skill refined text: '{finalText}'");
|
||||
if (string.IsNullOrWhiteSpace(finalText))
|
||||
{
|
||||
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
|
||||
return;
|
||||
}
|
||||
|
||||
detectedSkill.Execute(finalText);
|
||||
stopWatch.Stop();
|
||||
Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
|
||||
}
|
||||
else if (pipeToStdout || copyToClipboard)
|
||||
{
|
||||
Logger.LogDebug("Starting LLM text refinement (synchronous)...");
|
||||
string finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
|
||||
Logger.LogDebug($"Refined text received: '{finalText}'");
|
||||
if (string.IsNullOrWhiteSpace(finalText))
|
||||
{
|
||||
if (!pipeToStdout) Notifications.Notify("Toak", "Dropped short or empty audio.");
|
||||
return;
|
||||
}
|
||||
|
||||
// 3. Output
|
||||
if (pipeToStdout)
|
||||
{
|
||||
Console.WriteLine(finalText);
|
||||
}
|
||||
else if (copyToClipboard)
|
||||
else
|
||||
{
|
||||
ClipboardManager.Copy(finalText);
|
||||
stopWatch.Stop();
|
||||
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
TextInjector.Inject(finalText, config.TypingBackend);
|
||||
Logger.LogDebug("Starting LLM text refinement (streaming)...");
|
||||
var tokenStream = groq.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
|
||||
Logger.LogDebug("Starting to inject text...");
|
||||
await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
|
||||
stopWatch.Stop();
|
||||
Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
|
||||
}
|
||||
@@ -323,6 +376,8 @@ if (command == "toggle")
|
||||
{
|
||||
// Start recording
|
||||
if (!pipeToStdout) Console.WriteLine("Starting recording...");
|
||||
var config = ConfigManager.LoadConfig();
|
||||
Notifications.PlaySound(config.StartSoundPath);
|
||||
AudioRecorder.StartRecording();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,10 @@ namespace Toak.Serialization;
|
||||
[JsonSerializable(typeof(LlamaChoice))]
|
||||
[JsonSerializable(typeof(LlamaRequestMessage[]))]
|
||||
[JsonSerializable(typeof(LlamaChoice[]))]
|
||||
[JsonSerializable(typeof(LlamaStreamResponse))]
|
||||
[JsonSerializable(typeof(LlamaStreamChoice))]
|
||||
[JsonSerializable(typeof(LlamaStreamDelta))]
|
||||
[JsonSerializable(typeof(LlamaStreamChoice[]))]
|
||||
internal partial class AppJsonSerializerContext : JsonSerializerContext
|
||||
{
|
||||
}
|
||||
|
||||
@@ -8,4 +8,8 @@
|
||||
<PublishAot>true</PublishAot>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Include="Assets\Audio\**" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
Reference in New Issue
Block a user