eb0619dea2
- Add SystemPrompt field to HushConfig (empty = built-in default) - Refactor ConfigManager: extract ApplyTomlFields, add LoadWithProfile(), ListProfiles(), GetProfilePath(), EnsureProfilesDirExists(); remove HUSH_PROFILE env-var logic (profiles are now resolved by the CLI) - Extend socket protocol: action commands (START/STOP/TOGGLE/ABORT) now carry a [4-byte LE length][optional HushConfig JSON] payload so the CLI can pass a per-invocation config override without restarting the daemon - Add GENERATE_PROFILE (cmd 7) socket command: CLI sends a description, daemon calls the LLM and returns a generated system prompt - Orchestrator: StopAndProcessAsync accepts optional HushConfig override; ProcessWithLlmAsync uses proper system/user chat roles and respects config.SystemPrompt; add GenerateProfilePromptAsync - Split CompleteTextAsync signature to (systemPrompt, userMessage, model) across ITextStreamingProvider, GroqProvider, FireworksProvider - Add --profile/-p flag to hush toggle and hush stop - Add hush profiles subcommand: list, get, new (manual or AI-generated), edit
340 lines
12 KiB
C#
340 lines
12 KiB
C#
using Hush.Audio;
|
|
using Hush.Config;
|
|
using Hush.Input;
|
|
using Hush.Providers.Interfaces;
|
|
using Hush.Providers.Providers;
|
|
|
|
namespace Hush.Daemon;
|
|
|
|
public class Orchestrator
|
|
{
|
|
private static readonly HttpClient _httpClient = new();
|
|
|
|
private readonly ConfigManager _configManager;
|
|
private readonly IAudioRecorder _recorder;
|
|
|
|
private string? _recordingPath;
|
|
private DateTime? _recordingStartTime;
|
|
private bool _isRecording;
|
|
private readonly Lock _lock = new();
|
|
|
|
|
|
|
|
public Orchestrator(ConfigManager configManager)
|
|
{
|
|
_configManager = configManager;
|
|
_recorder = CreateAudioRecorder();
|
|
}
|
|
|
|
public bool IsRecording
|
|
{
|
|
get
|
|
{
|
|
lock (_lock)
|
|
{
|
|
return _isRecording && _recorder.IsRecording;
|
|
}
|
|
}
|
|
}
|
|
|
|
public TimeSpan? GetRecordingDuration()
|
|
{
|
|
lock (_lock)
|
|
{
|
|
if (!_isRecording || !_recordingStartTime.HasValue)
|
|
return null;
|
|
|
|
return DateTime.UtcNow - _recordingStartTime.Value;
|
|
}
|
|
}
|
|
|
|
public Task StartRecordingAsync()
|
|
{
|
|
lock (_lock)
|
|
{
|
|
if (_isRecording)
|
|
throw new InvalidOperationException("Recording is already in progress");
|
|
|
|
_recordingPath = Path.Combine(Path.GetTempPath(), $"hush_recording_{Guid.NewGuid()}.wav");
|
|
_recordingStartTime = DateTime.UtcNow;
|
|
_isRecording = true;
|
|
}
|
|
|
|
return _recorder.StartRecording(_recordingPath);
|
|
}
|
|
|
|
public async Task StopAndProcessAsync(HushConfig? overrideConfig = null)
|
|
{
|
|
string? recordingPath;
|
|
DateTime? recordingStartTime;
|
|
|
|
lock (_lock)
|
|
{
|
|
if (!_isRecording)
|
|
return;
|
|
|
|
recordingPath = _recordingPath;
|
|
recordingStartTime = _recordingStartTime;
|
|
_isRecording = false;
|
|
}
|
|
|
|
await _recorder.StopRecording();
|
|
|
|
if (string.IsNullOrEmpty(recordingPath) || !File.Exists(recordingPath))
|
|
{
|
|
SendNotification("Error", "Recording file not found");
|
|
return;
|
|
}
|
|
|
|
try
|
|
{
|
|
var config = overrideConfig ?? _configManager.Load();
|
|
|
|
var recordingDuration = recordingStartTime.HasValue
|
|
? DateTime.UtcNow - recordingStartTime.Value
|
|
: TimeSpan.Zero;
|
|
var minDuration = TimeSpan.FromMilliseconds(config.MinRecordingDuration);
|
|
if (recordingDuration < minDuration)
|
|
{
|
|
SendNotification("Hush", "Recording too short, ignored");
|
|
File.Delete(recordingPath);
|
|
return;
|
|
}
|
|
|
|
var transcription = await TranscribeAsync(recordingPath, config);
|
|
var processedText = await ProcessWithLlmAsync(transcription, config);
|
|
|
|
await TypeAsync(processedText, config);
|
|
|
|
File.Delete(recordingPath);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
SendNotification("Hush Error", ex.Message);
|
|
}
|
|
}
|
|
|
|
public Task AbortAsync()
|
|
{
|
|
string? recordingPath;
|
|
|
|
lock (_lock)
|
|
{
|
|
if (!_isRecording)
|
|
return Task.CompletedTask;
|
|
|
|
recordingPath = _recordingPath;
|
|
_isRecording = false;
|
|
}
|
|
|
|
_ = _recorder.StopRecording();
|
|
|
|
if (!string.IsNullOrEmpty(recordingPath) && File.Exists(recordingPath))
|
|
{
|
|
File.Delete(recordingPath);
|
|
}
|
|
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
private async Task<string> TranscribeAsync(string path, HushConfig config)
|
|
{
|
|
var provider = GetAudioToTextProvider(config);
|
|
|
|
await using var stream = File.OpenRead(path);
|
|
return await provider.TranscribeAsync(
|
|
stream,
|
|
config.WhisperModel,
|
|
language: string.IsNullOrEmpty(config.WhisperLanguage) ? null : config.WhisperLanguage);
|
|
}
|
|
|
|
private const string DefaultSystemPrompt =
|
|
"""
|
|
You are a transcription post-processor. Your task is to clean up raw speech-to-text output and return polished, ready-to-type text.
|
|
|
|
Rules:
|
|
- Detect the language of the transcription and process it entirely in that language — do not translate
|
|
- Fix grammar, spelling, and punctuation errors introduced by the speech recognizer, following the conventions of the detected language
|
|
- Capitalize sentences and proper nouns appropriately for the detected language
|
|
- Remove filler words and false starts appropriate to the detected language (e.g. "um", "uh", "like" in English; "euh", "bah" in French; "äh", "ähm" in German; "eh", "tipo" in Spanish/Italian)
|
|
- Preserve the speaker's original intent, vocabulary choices, and tone
|
|
- Do not add, remove, or reinterpret content beyond what was said
|
|
- Do not include any explanation, preamble, or metadata — output only the corrected text
|
|
- If the input is empty or unintelligible, return an empty string
|
|
""";
|
|
|
|
private async Task<string> ProcessWithLlmAsync(string text, HushConfig config)
|
|
{
|
|
var provider = GetTextProvider(config);
|
|
var systemPrompt = string.IsNullOrWhiteSpace(config.SystemPrompt)
|
|
? DefaultSystemPrompt
|
|
: config.SystemPrompt;
|
|
|
|
return await provider.CompleteTextAsync(systemPrompt, text, config.LlmModel);
|
|
}
|
|
|
|
public async Task<string> GenerateProfilePromptAsync(string description)
|
|
{
|
|
var config = _configManager.Load();
|
|
var provider = GetTextProvider(config);
|
|
|
|
const string systemPrompt =
|
|
"""
|
|
You are a configuration assistant for Hush, a Linux speech-to-text post-processor.
|
|
Hush records the user's voice, transcribes it with Whisper, then passes the transcription
|
|
to an LLM using a system prompt you will write.
|
|
|
|
Given the user's description of what they want the profile to do, write a precise, concise
|
|
system prompt that instructs the LLM how to transform the raw transcription.
|
|
|
|
Rules:
|
|
- Output only the system prompt text, nothing else
|
|
- Do not include meta-commentary, labels, or markdown formatting
|
|
- The prompt must be self-contained and unambiguous
|
|
- Always end with an instruction to output only the final result with no explanation
|
|
""";
|
|
|
|
return await provider.CompleteTextAsync(systemPrompt, description, config.LlmModel);
|
|
}
|
|
|
|
private async Task TypeAsync(string text, HushConfig config)
|
|
{
|
|
var input = GetTextInput(config);
|
|
await input.TypeString(text);
|
|
}
|
|
|
|
private IAudioToTextProvider GetAudioToTextProvider(HushConfig config) =>
|
|
config.WhisperProvider switch
|
|
{
|
|
"groq" => string.IsNullOrEmpty(config.GroqApiKey)
|
|
? throw new InvalidOperationException("Groq API key is required for Whisper transcription")
|
|
: new GroqProvider(config.GroqApiKey, _httpClient),
|
|
"fireworks" => string.IsNullOrEmpty(config.FireworksApiKey)
|
|
? throw new InvalidOperationException("Fireworks API key is required for Whisper transcription")
|
|
: new FireworksProvider(config.FireworksApiKey, _httpClient),
|
|
_ => throw new InvalidOperationException($"Unsupported Whisper provider: {config.WhisperProvider}")
|
|
};
|
|
|
|
private ITextStreamingProvider GetTextProvider(HushConfig config) =>
|
|
config.LlmProvider switch
|
|
{
|
|
"groq" => string.IsNullOrEmpty(config.GroqApiKey)
|
|
? throw new InvalidOperationException("Groq API key is required for LLM")
|
|
: new GroqProvider(config.GroqApiKey, _httpClient),
|
|
"fireworks" => string.IsNullOrEmpty(config.FireworksApiKey)
|
|
? throw new InvalidOperationException("Fireworks API key is required for LLM")
|
|
: new FireworksProvider(config.FireworksApiKey, _httpClient),
|
|
_ => throw new InvalidOperationException($"Unsupported LLM provider: {config.LlmProvider}")
|
|
};
|
|
|
|
private static ITextInput GetTextInput(HushConfig config) =>
|
|
config.TypingBackend switch
|
|
{
|
|
"xdotool" => new XdotoolInput(),
|
|
_ => new WtypeInput()
|
|
};
|
|
|
|
private IAudioRecorder CreateAudioRecorder()
|
|
{
|
|
var config = _configManager.Load();
|
|
|
|
return config.AudioBackend switch
|
|
{
|
|
"ffmpeg" => new FfmpegAudioRecorder(),
|
|
_ => new PipewireAudioRecorder()
|
|
};
|
|
}
|
|
|
|
private static void SendNotification(string title, string message)
|
|
{
|
|
try
|
|
{
|
|
var process = new System.Diagnostics.Process
|
|
{
|
|
StartInfo = new System.Diagnostics.ProcessStartInfo
|
|
{
|
|
FileName = "notify-send",
|
|
Arguments = $"\"{title}\" \"{message}\"",
|
|
UseShellExecute = false,
|
|
CreateNoWindow = true
|
|
}
|
|
};
|
|
process.Start();
|
|
process.WaitForExit();
|
|
}
|
|
catch
|
|
{
|
|
Console.WriteLine($"[Notification] {title}: {message}");
|
|
}
|
|
}
|
|
|
|
public async Task<LatencyResult> RunLatencyTestAsync()
|
|
{
|
|
var config = _configManager.Load();
|
|
|
|
var sttStopwatch = System.Diagnostics.Stopwatch.StartNew();
|
|
var llmStopwatch = new System.Diagnostics.Stopwatch();
|
|
|
|
var wavBytes = GenerateSilentWav(1.0);
|
|
await using var wavStream = new MemoryStream(wavBytes);
|
|
|
|
var transcription = await TranscribeStreamAsync(wavStream, config);
|
|
|
|
sttStopwatch.Stop();
|
|
llmStopwatch.Start();
|
|
|
|
var processedText = await ProcessWithLlmAsync(transcription, config);
|
|
|
|
llmStopwatch.Stop();
|
|
|
|
return new LatencyResult(
|
|
(int)sttStopwatch.ElapsedMilliseconds,
|
|
(int)llmStopwatch.ElapsedMilliseconds,
|
|
(int)(sttStopwatch.ElapsedMilliseconds + llmStopwatch.ElapsedMilliseconds)
|
|
);
|
|
}
|
|
|
|
private async Task<string> TranscribeStreamAsync(Stream stream, HushConfig config)
|
|
{
|
|
var provider = GetAudioToTextProvider(config);
|
|
return await provider.TranscribeAsync(stream, config.WhisperModel);
|
|
}
|
|
|
|
private static byte[] GenerateSilentWav(double durationSeconds)
|
|
{
|
|
int sampleRate = 16000;
|
|
short bitsPerSample = 16;
|
|
int channels = 1;
|
|
|
|
int dataChunkSize = (int)(sampleRate * durationSeconds * channels * (bitsPerSample / 8));
|
|
int fileSize = 36 + dataChunkSize;
|
|
|
|
using var ms = new MemoryStream();
|
|
using var writer = new BinaryWriter(ms);
|
|
|
|
writer.Write("RIFF"u8.ToArray());
|
|
writer.Write(fileSize);
|
|
writer.Write("WAVE"u8.ToArray());
|
|
|
|
writer.Write("fmt "u8.ToArray());
|
|
writer.Write(16);
|
|
writer.Write((short)1);
|
|
writer.Write((short)channels);
|
|
writer.Write(sampleRate);
|
|
writer.Write(sampleRate * channels * (bitsPerSample / 8));
|
|
writer.Write((short)(channels * (bitsPerSample / 8)));
|
|
writer.Write(bitsPerSample);
|
|
|
|
writer.Write("data"u8.ToArray());
|
|
writer.Write(dataChunkSize);
|
|
|
|
int samples = (int)(sampleRate * durationSeconds);
|
|
for (int i = 0; i < samples; i++)
|
|
{
|
|
writer.Write((short)0);
|
|
}
|
|
|
|
return ms.ToArray();
|
|
}
|
|
}
|