add configuration profiles with per-invocation --profile flag

- Add SystemPrompt field to HushConfig (empty = built-in default) - Refactor ConfigManager: extract ApplyTomlFields, add LoadWithProfile(), ListProfiles(), GetProfilePath(), EnsureProfilesDirExists(); remove HUSH_PROFILE env-var logic (profiles are now resolved by the CLI) - Extend socket protocol: action commands (START/STOP/TOGGLE/ABORT) now carry a [4-byte LE length][optional HushConfig JSON] payload so the CLI can pass a per-invocation config override without restarting the daemon - Add GENERATE_PROFILE (cmd 7) socket command: CLI sends a description, daemon calls the LLM and returns a generated system prompt - Orchestrator: StopAndProcessAsync accepts optional HushConfig override; ProcessWithLlmAsync uses proper system/user chat roles and respects config.SystemPrompt; add GenerateProfilePromptAsync - Split CompleteTextAsync signature to (systemPrompt, userMessage, model) across ITextStreamingProvider, GroqProvider, FireworksProvider - Add --profile/-p flag to hush toggle and hush stop - Add hush profiles subcommand: list, get, new (manual or AI-generated), edit
2026-03-23 00:38:29 +01:00
parent 70e784a1cc
commit eb0619dea2
14 changed files with 659 additions and 372 deletions
@@ -18,6 +18,8 @@ public class Orchestrator
    private bool _isRecording;
    private readonly Lock _lock = new();

+
+
    public Orchestrator(ConfigManager configManager)
    {
        _configManager = configManager;
@@ -61,7 +63,7 @@ public class Orchestrator
        return _recorder.StartRecording(_recordingPath);
    }

-    public async Task StopAndProcessAsync()
+    public async Task StopAndProcessAsync(HushConfig? overrideConfig = null)
    {
        string? recordingPath;
        DateTime? recordingStartTime;
@@ -86,7 +88,7 @@ public class Orchestrator
        
        try
        {
-            var config = _configManager.Load();
+            var config = overrideConfig ?? _configManager.Load();
            
            var recordingDuration = recordingStartTime.HasValue
                ? DateTime.UtcNow - recordingStartTime.Value
@@ -101,7 +103,7 @@ public class Orchestrator
            
            var transcription = await TranscribeAsync(recordingPath, config);
            var processedText = await ProcessWithLlmAsync(transcription, config);
-            
+
            await TypeAsync(processedText, config);
            
            File.Delete(recordingPath);
@@ -138,32 +140,61 @@ public class Orchestrator
    private async Task<string> TranscribeAsync(string path, HushConfig config)
    {
        var provider = GetAudioToTextProvider(config);
-        
+
        await using var stream = File.OpenRead(path);
-        return await provider.TranscribeAsync(stream, config.WhisperModel);
+        return await provider.TranscribeAsync(
+            stream,
+            config.WhisperModel,
+            language: string.IsNullOrEmpty(config.WhisperLanguage) ? null : config.WhisperLanguage);
    }

+    private const string DefaultSystemPrompt =
+        """
+        You are a transcription post-processor. Your task is to clean up raw speech-to-text output and return polished, ready-to-type text.
+
+        Rules:
+        - Detect the language of the transcription and process it entirely in that language — do not translate
+        - Fix grammar, spelling, and punctuation errors introduced by the speech recognizer, following the conventions of the detected language
+        - Capitalize sentences and proper nouns appropriately for the detected language
+        - Remove filler words and false starts appropriate to the detected language (e.g. "um", "uh", "like" in English; "euh", "bah" in French; "äh", "ähm" in German; "eh", "tipo" in Spanish/Italian)
+        - Preserve the speaker's original intent, vocabulary choices, and tone
+        - Do not add, remove, or reinterpret content beyond what was said
+        - Do not include any explanation, preamble, or metadata — output only the corrected text
+        - If the input is empty or unintelligible, return an empty string
+        """;
+
    private async Task<string> ProcessWithLlmAsync(string text, HushConfig config)
    {
        var provider = GetTextProvider(config);
-        
-        var prompt = $"""
-                      You are a transcription post-processor. Your task is to clean up raw speech-to-text output and return polished, ready-to-type text.
+        var systemPrompt = string.IsNullOrWhiteSpace(config.SystemPrompt)
+            ? DefaultSystemPrompt
+            : config.SystemPrompt;

-                      Rules:
-                      - Detect the language of the transcription and process it entirely in that language — do not translate
-                      - Fix grammar, spelling, and punctuation errors introduced by the speech recognizer, following the conventions of the detected language
-                      - Capitalize sentences and proper nouns appropriately for the detected language
-                      - Remove filler words and false starts appropriate to the detected language (e.g. "um", "uh", "like" in English; "euh", "bah" in French; "äh", "ähm" in German; "eh", "tipo" in Spanish/Italian)
-                      - Preserve the speaker's original intent, vocabulary choices, and tone
-                      - Do not add, remove, or reinterpret content beyond what was said
-                      - Do not include any explanation, preamble, or metadata — output only the corrected text
-                      - If the input is empty or unintelligible, return an empty string
+        return await provider.CompleteTextAsync(systemPrompt, text, config.LlmModel);
+    }

-                      Raw transcription: {text}
-                      """;
-        
-        return await provider.CompleteTextAsync(prompt, config.LlmModel);
+    public async Task<string> GenerateProfilePromptAsync(string description)
+    {
+        var config   = _configManager.Load();
+        var provider = GetTextProvider(config);
+
+        const string systemPrompt =
+            """
+            You are a configuration assistant for Hush, a Linux speech-to-text post-processor.
+            Hush records the user's voice, transcribes it with Whisper, then passes the transcription
+            to an LLM using a system prompt you will write.
+
+            Given the user's description of what they want the profile to do, write a precise, concise
+            system prompt that instructs the LLM how to transform the raw transcription.
+
+            Rules:
+            - Output only the system prompt text, nothing else
+            - Do not include meta-commentary, labels, or markdown formatting
+            - The prompt must be self-contained and unambiguous
+            - Always end with an instruction to output only the final result with no explanation
+            """;
+
+        return await provider.CompleteTextAsync(systemPrompt, description, config.LlmModel);
    }

    private async Task TypeAsync(string text, HushConfig config)