diff --git a/Commands/SkillCommand.cs b/Commands/SkillCommand.cs new file mode 100644 index 0000000..abae42c --- /dev/null +++ b/Commands/SkillCommand.cs @@ -0,0 +1,124 @@ +using System; +using System.CommandLine; +using System.IO; +using System.Text.Json; +using System.Threading.Tasks; +using Spectre.Console; +using Toak.Core.Skills; +using Toak.Serialization; + +namespace Toak.Commands; + +public static class SkillCommand +{ + public static Command CreateCommand(Option verboseOption) + { + var skillCmd = new Command("skill", "Manage dynamic skills (list, add, remove)"); + + var listCmd = new Command("list", "List all available skills"); + listCmd.SetHandler(ExecuteListAsync); + skillCmd.AddCommand(listCmd); + + // Add + var addCmd = new Command("add", "Add a new skill interactively"); + addCmd.SetHandler(ExecuteAddAsync); + skillCmd.AddCommand(addCmd); + + // Remove + var removeCmd = new Command("remove", "Remove a skill"); + var nameArg = new Argument("name", "The name of the skill to remove"); + removeCmd.AddArgument(nameArg); + removeCmd.SetHandler((name) => ExecuteRemoveAsync(name), nameArg); + skillCmd.AddCommand(removeCmd); + + return skillCmd; + } + + private static async Task ExecuteListAsync() + { + SkillRegistry.Initialize(); + + var table = new Table().Border(TableBorder.Rounded); + table.AddColumn("Name"); + table.AddColumn("Action"); + table.AddColumn("Hotwords"); + table.AddColumn("Description"); + table.AddColumn("ScriptPath"); + + foreach (var skill in SkillRegistry.AllSkills) + { + var def = JsonSerializer.Deserialize( + File.ReadAllText(Path.Combine(SkillRegistry.SkillsDirectory, $"{skill.Name.ToLowerInvariant()}.json")), + AppJsonSerializerContext.Default.SkillDefinition); + + if (def == null) continue; + + table.AddRow( + $"[green]{def.Name}[/]", + !string.IsNullOrEmpty(def.Action) ? $"[yellow]{def.Action}[/]" : "type", + $"[blue]{string.Join(", ", def.Hotwords)}[/]", + def.Description, + def.Action == "script" ? $"[dim]{def.ScriptPath ?? "None"}[/]" : "-" + ); + } + + AnsiConsole.Write(table); + } + + private static async Task ExecuteAddAsync() + { + AnsiConsole.MarkupLine("[bold blue]Add a new Dynamic Skill[/]"); + + var name = AnsiConsole.Ask("Skill [green]Name[/]:"); + var description = AnsiConsole.Ask("Skill [green]Description[/]:"); + + var hotwordsStr = AnsiConsole.Ask("Comma-separated [green]Hotwords[/] (e.g. 'System my skill, System do skill'):"); + var hotwords = hotwordsStr.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + + var action = AnsiConsole.Prompt( + new SelectionPrompt() + .Title("What is the [green]Action[/] type?") + .AddChoices(new[] { "type", "script" }) + ); + + string? scriptPath = null; + if (action == "script") + { + scriptPath = AnsiConsole.Ask("Enter the absolute [green]Script Path[/] (~/ is allowed):"); + } + + var systemPrompt = AnsiConsole.Ask("Enter the [green]System Prompt[/] (use {transcript} to inject the user's speech):"); + + var def = new SkillDefinition + { + Name = name, + Description = description, + Hotwords = hotwords, + Action = action, + ScriptPath = scriptPath, + SystemPrompt = systemPrompt + }; + + SkillRegistry.Initialize(); // ensure dir exists + string filename = Path.Combine(SkillRegistry.SkillsDirectory, $"{name.ToLowerInvariant()}.json"); + string json = JsonSerializer.Serialize(def, AppJsonSerializerContext.Default.SkillDefinition); + File.WriteAllText(filename, json); + + AnsiConsole.MarkupLine($"[bold green]Success![/] Skill '{name}' saved to {filename}"); + } + + private static async Task ExecuteRemoveAsync(string name) + { + SkillRegistry.Initialize(); + var filename = Path.Combine(SkillRegistry.SkillsDirectory, $"{name.ToLowerInvariant()}.json"); + if (File.Exists(filename)) + { + File.Delete(filename); + AnsiConsole.MarkupLine($"[bold green]Success![/] Deleted skill '{name}' ({filename})"); + } + else + { + AnsiConsole.MarkupLine($"[bold red]Error:[/] Skill file not found: {filename}"); + } + } +} diff --git a/Core/Skills/DynamicSkill.cs b/Core/Skills/DynamicSkill.cs new file mode 100644 index 0000000..fa43852 --- /dev/null +++ b/Core/Skills/DynamicSkill.cs @@ -0,0 +1,53 @@ +using System; +using System.Diagnostics; + +namespace Toak.Core.Skills; + +public class DynamicSkill : ISkill +{ + private readonly SkillDefinition _def; + + public string Name => _def.Name; + public string Description => _def.Description; + public string[] Hotwords => _def.Hotwords; + + public bool HandlesExecution => _def.Action.ToLowerInvariant() == "script"; + + public DynamicSkill(SkillDefinition def) + { + _def = def; + } + + public string GetSystemPrompt(string rawTranscript) + { + return _def.SystemPrompt.Replace("{transcript}", rawTranscript); + } + + public void Execute(string llmResult) + { + if (HandlesExecution && !string.IsNullOrWhiteSpace(_def.ScriptPath)) + { + var expandedPath = Environment.ExpandEnvironmentVariables(_def.ScriptPath); + if (expandedPath.StartsWith("~")) + { + expandedPath = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile) + expandedPath.Substring(1); + } + + try + { + var process = Process.Start(new ProcessStartInfo + { + FileName = expandedPath, + Arguments = $"\"{llmResult.Replace("\"", "\\\"")}\"", + UseShellExecute = false, + CreateNoWindow = true + }); + process?.WaitForExit(); + } + catch (Exception ex) + { + Console.WriteLine($"[DynamicSkill] Error executing script '{expandedPath}': {ex.Message}"); + } + } + } +} diff --git a/Core/Skills/ProfessionalSkill.cs b/Core/Skills/ProfessionalSkill.cs deleted file mode 100644 index e0d9d91..0000000 --- a/Core/Skills/ProfessionalSkill.cs +++ /dev/null @@ -1,26 +0,0 @@ -namespace Toak.Core.Skills; - -public class ProfessionalSkill : ISkill -{ - public string Name => "Professional"; - public string Description => "Rewrites the spoken text to sound highly professional and articulate."; - public string[] Hotwords => new[] { "System professional", "System rewrite professionally", "System formalize" }; - - public bool HandlesExecution => false; - - public string GetSystemPrompt(string rawTranscript) - { - return @"You are an expert formal editor and corporate communicator. -The user wants to rewrite the following text professionally. The transcript might start with a hotword like 'System professional'. -- Enhance the text from the speaker to sound highly professional and articulate. -- Maintain the exact meaning and key information of the original transcription. -- Ensure paragraph breaks are added logically to prevent walls of text, improving readability. -- Avoid filler words, hesitations (umm, uh), or conversational redundancies. -- Output ONLY the final polished text. Do not include markdown, explanations, or quotes."; - } - - public void Execute(string llmResult) - { - // Not used since HandlesExecution is false - } -} diff --git a/Core/Skills/SkillDefinition.cs b/Core/Skills/SkillDefinition.cs new file mode 100644 index 0000000..4c850c5 --- /dev/null +++ b/Core/Skills/SkillDefinition.cs @@ -0,0 +1,11 @@ +namespace Toak.Core.Skills; + +public class SkillDefinition +{ + public string Name { get; set; } = ""; + public string Description { get; set; } = ""; + public string[] Hotwords { get; set; } = System.Array.Empty(); + public string Action { get; set; } = "type"; // "type" or "script" + public string SystemPrompt { get; set; } = ""; + public string? ScriptPath { get; set; } +} diff --git a/Core/Skills/SkillRegistry.cs b/Core/Skills/SkillRegistry.cs index a4baf05..e959b1a 100644 --- a/Core/Skills/SkillRegistry.cs +++ b/Core/Skills/SkillRegistry.cs @@ -1,19 +1,52 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text.Json; +using Toak.Serialization; + namespace Toak.Core.Skills; public static class SkillRegistry { - public static readonly ISkill[] AllSkills = new ISkill[] + public static List AllSkills = new List(); + + public static string SkillsDirectory => Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), + ".config", "toak", "skills"); + + public static void Initialize() { - new TerminalSkill(), - new TranslateSkill(), - new ProfessionalSkill(), - new SummarySkill() - }; - + if (!Directory.Exists(SkillsDirectory)) + { + Directory.CreateDirectory(SkillsDirectory); + CreateDefaultSkills(); + } + + AllSkills.Clear(); + foreach (var file in Directory.GetFiles(SkillsDirectory, "*.json")) + { + try + { + string json = File.ReadAllText(file); + var def = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.SkillDefinition); + if (def != null) + { + AllSkills.Add(new DynamicSkill(def)); + } + } + catch (Exception ex) + { + Logger.LogDebug($"Failed to load skill from {file}: {ex.Message}"); + } + } + } + public static ISkill? DetectSkill(string transcript, IEnumerable activeSkillNames) { - var activeSkills = AllSkills.Where(s => activeSkillNames.Contains(s.Name, StringComparer.OrdinalIgnoreCase)).ToList(); + if (AllSkills.Count == 0) Initialize(); + var activeSkills = AllSkills.Where(s => activeSkillNames.Contains(s.Name, StringComparer.OrdinalIgnoreCase)).ToList(); string normalizedTranscript = transcript.Trim(); foreach (var skill in activeSkills) @@ -28,4 +61,71 @@ public static class SkillRegistry } return null; } + + private static void CreateDefaultSkills() + { + var defaults = new List + { + new SkillDefinition + { + Name = "Terminal", + Description = "Executes the spoken command in your shell.", + Hotwords = new[] { "System terminal", "System run", "System execute" }, + Action = "script", + ScriptPath = "~/.config/toak/skills/terminal_action.sh", + SystemPrompt = "You are a Linux terminal expert. Translate the user's request into a single, valid bash command. Output ONLY the raw command, no formatting, no markdown." + }, + new SkillDefinition + { + Name = "Translate", + Description = "Translates the spoken text into another language on the fly.", + Hotwords = new[] { "System translate to", "System translate into" }, + Action = "type", + SystemPrompt = @"You are an expert translator. The user wants to translate the following text. +The first few words identify the target language (e.g. 'Translate to Spanish:', 'Translate into Hungarian:'). +Translate the REST of the transcript into that target language. +Output ONLY the final translated text. Do not include markdown, explanations, or quotes." + }, + new SkillDefinition + { + Name = "Professional", + Description = "Rewrites text into a formal, articulate tone.", + Hotwords = new[] { "System professional", "System formalize", "System formal" }, + Action = "type", + SystemPrompt = "Rewrite the following text to be articulate and formal. Do not add any conversational filler. Text: {transcript}" + }, + new SkillDefinition + { + Name = "Summary", + Description = "Provides a direct, crisp summary of the dictation.", + Hotwords = new[] { "System summary", "System concise", "System summarize" }, + Action = "type", + SystemPrompt = "Summarize the following text to be as concise and direct as possible. Remove all fluff. Text: {transcript}" + } + }; + + foreach (var def in defaults) + { + string filename = Path.Combine(SkillsDirectory, $"{def.Name.ToLowerInvariant()}.json"); + string json = JsonSerializer.Serialize(def, AppJsonSerializerContext.Default.SkillDefinition); + File.WriteAllText(filename, json); + } + + // Create the default terminal wrapper script if it doesn't exist + string scriptPath = Path.Combine(SkillsDirectory, "terminal_action.sh"); + if (!File.Exists(scriptPath)) + { + File.WriteAllText(scriptPath, "#!/bin/bash\n\n# Terminal skill wrapper script.\n# The LLM output is passed as the first argument ($1).\nx-terminal-emulator -e bash -c \"$1; exec bash\"\n"); + + // Try to make it executable + try { + System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo { + FileName = "chmod", + Arguments = $"+x \"{scriptPath}\"", + CreateNoWindow = true, + UseShellExecute = false + })?.WaitForExit(); + } catch { } + } + } } diff --git a/Core/Skills/SummarySkill.cs b/Core/Skills/SummarySkill.cs deleted file mode 100644 index 9840a81..0000000 --- a/Core/Skills/SummarySkill.cs +++ /dev/null @@ -1,25 +0,0 @@ -namespace Toak.Core.Skills; - -public class SummarySkill : ISkill -{ - public string Name => "Summary"; - public string Description => "Summarizes the spoken text securely and concisely, removing fluff."; - public string[] Hotwords => new[] { "System summary", "System summarize", "System concise" }; - - public bool HandlesExecution => false; - - public string GetSystemPrompt(string rawTranscript) - { - return @"You are an expert editor who strips all fluff and makes text as concise as possible. -The user wants to summarize the following text. The transcript might start with a hotword like 'System summary'. -- Strip all fluff, filler, and unnecessary conversational words. -- Make the output as direct and brief as possible without losing the core information. -- Use clear, crisp phrasing. If the text lists items or instructions, format them logically. -- Output ONLY the final summarized text. Do not include markdown, explanations, or quotes."; - } - - public void Execute(string llmResult) - { - // Not used since HandlesExecution is false - } -} diff --git a/Core/Skills/TerminalSkill.cs b/Core/Skills/TerminalSkill.cs deleted file mode 100644 index 767e6af..0000000 --- a/Core/Skills/TerminalSkill.cs +++ /dev/null @@ -1,43 +0,0 @@ -using System.Diagnostics; - -namespace Toak.Core.Skills; - -public class TerminalSkill : ISkill -{ - public string Name => "Terminal"; - public string Description => "Translates an intent into a bash command and runs it in the background."; - public string[] Hotwords => new[] { "System terminal", "System command" }; - - public bool HandlesExecution => false; - - public string GetSystemPrompt(string rawTranscript) - { - return @"You are a command-line assistant. The user will ask you to perform a task. -Translate the request into a single bash command. -Output ONLY the raw bash command to achieve this task. Do not include markdown formatting, backticks, or explanations."; - } - - public void Execute(string llmResult) - { - // HandlesExecution is false because we are not retarded enough - // to let the LLM execute commands directly - try - { - Console.WriteLine($"[TerminalSkill] Executing: {llmResult}"); - var escapedCmd = llmResult.Replace("\"", "\\\""); - var pInfo = new ProcessStartInfo - { - FileName = "bash", - Arguments = $"-c \"{escapedCmd}\"", - UseShellExecute = false, - CreateNoWindow = true - }; - Process.Start(pInfo); - IO.Notifications.Notify("Toak Terminal Executed", llmResult); - } - catch (Exception ex) - { - Console.WriteLine($"[TerminalSkill Error] {ex.Message}"); - } - } -} diff --git a/Core/Skills/TranslateSkill.cs b/Core/Skills/TranslateSkill.cs deleted file mode 100644 index 3017b31..0000000 --- a/Core/Skills/TranslateSkill.cs +++ /dev/null @@ -1,23 +0,0 @@ -namespace Toak.Core.Skills; - -public class TranslateSkill : ISkill -{ - public string Name => "Translate"; - public string Description => "Translates the spoken text into another language on the fly."; - public string[] Hotwords => new[] { "System translate to", "System translate into" }; - - public bool HandlesExecution => false; - - public string GetSystemPrompt(string rawTranscript) - { - return @"You are an expert translator. The user wants to translate the following text. -The first few words identify the target language (e.g. 'Translate to Spanish:', 'Translate into Hungarian:'). -Translate the REST of the transcript into that target language. -Output ONLY the final translated text. Do not include markdown, explanations, or quotes."; - } - - public void Execute(string llmResult) - { - // Not used since HandlesExecution is false - } -} diff --git a/Program.cs b/Program.cs index 31cf216..5036cb9 100644 --- a/Program.cs +++ b/Program.cs @@ -58,6 +58,9 @@ public class Program configCmd.SetHandler(ConfigUpdaterCommand.ExecuteAsync, keyArg, valArg, verboseOption); rootCommand.AddCommand(configCmd); + // Skill Command + rootCommand.AddCommand(SkillCommand.CreateCommand(verboseOption)); + return await rootCommand.InvokeAsync(args); } } diff --git a/Serialization/AppJsonSerializerContext.cs b/Serialization/AppJsonSerializerContext.cs index f47df36..9709b18 100644 --- a/Serialization/AppJsonSerializerContext.cs +++ b/Serialization/AppJsonSerializerContext.cs @@ -18,6 +18,7 @@ namespace Toak.Serialization; [JsonSerializable(typeof(LlamaStreamChoice))] [JsonSerializable(typeof(LlamaStreamDelta))] [JsonSerializable(typeof(LlamaStreamChoice[]))] +[JsonSerializable(typeof(Toak.Core.Skills.SkillDefinition))] internal partial class AppJsonSerializerContext : JsonSerializerContext { } diff --git a/_toak b/_toak index b4963f8..862b362 100644 --- a/_toak +++ b/_toak @@ -19,6 +19,7 @@ _toak() { 'latency-test:Benchmark full pipeline without recording' 'show:Show current configuration' 'config:Update a specific configuration setting' + 'skill:Manage dynamic skills (list, add, remove)' ) _arguments -C \ @@ -46,6 +47,15 @@ _toak() { '1:key:(llm whisper language lang backend punctuation tech)' \ '2:value:' ;; + skill) + local -a skill_cmds + skill_cmds=( + 'list:List all available skills' + 'add:Add a new skill interactively' + 'remove:Remove a skill' + ) + _describe -t commands 'skill command' skill_cmds + ;; *) _message "no more arguments" ;;