initial commit

This commit is contained in:
2026-03-22 02:25:16 +01:00
commit eb72820ce9
42 changed files with 2506 additions and 0 deletions
+314
View File
@@ -0,0 +1,314 @@
using Hush.Audio;
using Hush.Config;
using Hush.Input;
using Hush.Providers.Interfaces;
using Hush.Providers.Providers;
namespace Hush.Daemon;
public class Orchestrator
{
private readonly ConfigManager _configManager;
private readonly IAudioRecorder _recorder;
private IAudioToTextProvider? _audioToTextProvider;
private ITextStreamingProvider? _textProvider;
private ITextInput? _textInput;
private string? _recordingPath;
private DateTime? _recordingStartTime;
private bool _isRecording;
private readonly Lock _lock = new();
public Orchestrator(ConfigManager configManager)
{
_configManager = configManager;
_recorder = CreateAudioRecorder();
}
public bool IsRecording
{
get
{
lock (_lock)
{
return _isRecording && _recorder.IsRecording;
}
}
}
public TimeSpan? GetRecordingDuration()
{
lock (_lock)
{
if (!_isRecording || !_recordingStartTime.HasValue)
return null;
return DateTime.UtcNow - _recordingStartTime.Value;
}
}
public Task StartRecordingAsync()
{
lock (_lock)
{
if (_isRecording)
throw new InvalidOperationException("Recording is already in progress");
_recordingPath = Path.Combine(Path.GetTempPath(), $"hush_recording_{Guid.NewGuid()}.wav");
_recordingStartTime = DateTime.UtcNow;
_isRecording = true;
}
return _recorder.StartRecording(_recordingPath);
}
public async Task StopAndProcessAsync()
{
string? recordingPath;
DateTime? recordingStartTime;
lock (_lock)
{
if (!_isRecording)
return;
recordingPath = _recordingPath;
recordingStartTime = _recordingStartTime;
_isRecording = false;
}
await _recorder.StopRecording();
if (string.IsNullOrEmpty(recordingPath) || !File.Exists(recordingPath))
{
SendNotification("Error", "Recording file not found");
return;
}
try
{
var config = _configManager.Load();
var recordingDuration = recordingStartTime.HasValue
? DateTime.UtcNow - recordingStartTime.Value
: TimeSpan.Zero;
var minDuration = TimeSpan.FromMilliseconds(config.MinRecordingDuration);
if (recordingDuration < minDuration)
{
SendNotification("Hush", "Recording too short, ignored");
File.Delete(recordingPath);
return;
}
var transcription = await TranscribeAsync(recordingPath, config);
var processedText = await ProcessWithLlmAsync(transcription, config);
await TypeAsync(processedText, config);
File.Delete(recordingPath);
}
catch (Exception ex)
{
SendNotification("Hush Error", ex.Message);
}
}
public Task AbortAsync()
{
string? recordingPath;
lock (_lock)
{
if (!_isRecording)
return Task.CompletedTask;
recordingPath = _recordingPath;
_isRecording = false;
}
_ = _recorder.StopRecording();
if (!string.IsNullOrEmpty(recordingPath) && File.Exists(recordingPath))
{
File.Delete(recordingPath);
}
return Task.CompletedTask;
}
private async Task<string> TranscribeAsync(string path, HushConfig config)
{
var provider = GetAudioToTextProvider(config);
await using var stream = File.OpenRead(path);
return await provider.TranscribeAsync(stream, config.WhisperModel);
}
private async Task<string> ProcessWithLlmAsync(string text, HushConfig config)
{
var provider = GetTextProvider(config);
var prompt = $"""
Process this spoken text for clarity and correctness. Fix any errors, add proper punctuation, and make it read naturally. Keep the original meaning intact.
Text: {text}
""";
return await provider.CompleteTextAsync(prompt, config.LlmModel);
}
private async Task TypeAsync(string text, HushConfig config)
{
var input = GetTextInput(config);
await input.TypeString(text);
}
private IAudioToTextProvider GetAudioToTextProvider(HushConfig config)
{
if (_audioToTextProvider != null)
return _audioToTextProvider;
_audioToTextProvider = config.WhisperProvider switch
{
"groq" => string.IsNullOrEmpty(config.GroqApiKey)
? throw new InvalidOperationException("Groq API key is required for Whisper transcription")
: new GroqProvider(config.GroqApiKey),
_ => throw new InvalidOperationException($"Unsupported Whisper provider: {config.WhisperProvider}")
};
return _audioToTextProvider;
}
private ITextStreamingProvider GetTextProvider(HushConfig config)
{
if (_textProvider != null)
return _textProvider;
_textProvider = config.LlmProvider switch
{
"groq" => string.IsNullOrEmpty(config.GroqApiKey)
? throw new InvalidOperationException("Groq API key is required for LLM")
: new GroqProvider(config.GroqApiKey),
_ => throw new InvalidOperationException($"Unsupported LLM provider: {config.LlmProvider}")
};
return _textProvider;
}
private ITextInput GetTextInput(HushConfig config)
{
if (_textInput != null)
return _textInput;
_textInput = config.TypingBackend switch
{
"xdotool" => new XdotoolInput(),
_ => new WtypeInput()
};
return _textInput;
}
private IAudioRecorder CreateAudioRecorder()
{
var config = _configManager.Load();
return config.AudioBackend switch
{
"ffmpeg" => new FfmpegAudioRecorder(),
_ => new PipewireAudioRecorder()
};
}
private static void SendNotification(string title, string message)
{
try
{
var process = new System.Diagnostics.Process
{
StartInfo = new System.Diagnostics.ProcessStartInfo
{
FileName = "notify-send",
Arguments = $"\"{title}\" \"{message}\"",
UseShellExecute = false,
CreateNoWindow = true
}
};
process.Start();
process.WaitForExit();
}
catch
{
Console.WriteLine($"[Notification] {title}: {message}");
}
}
public async Task<LatencyResult> RunLatencyTestAsync()
{
var config = _configManager.Load();
var sttStopwatch = System.Diagnostics.Stopwatch.StartNew();
var llmStopwatch = new System.Diagnostics.Stopwatch();
var wavBytes = GenerateSilentWav(1.0);
await using var wavStream = new MemoryStream(wavBytes);
var transcription = await TranscribeStreamAsync(wavStream, config);
sttStopwatch.Stop();
llmStopwatch.Start();
var processedText = await ProcessWithLlmAsync(transcription, config);
llmStopwatch.Stop();
return new LatencyResult(
(int)sttStopwatch.ElapsedMilliseconds,
(int)llmStopwatch.ElapsedMilliseconds,
(int)(sttStopwatch.ElapsedMilliseconds + llmStopwatch.ElapsedMilliseconds)
);
}
private async Task<string> TranscribeStreamAsync(Stream stream, HushConfig config)
{
var provider = GetAudioToTextProvider(config);
return await provider.TranscribeAsync(stream, config.WhisperModel);
}
private static byte[] GenerateSilentWav(double durationSeconds)
{
int sampleRate = 16000;
short bitsPerSample = 16;
int channels = 1;
int dataChunkSize = (int)(sampleRate * durationSeconds * channels * (bitsPerSample / 8));
int fileSize = 36 + dataChunkSize;
using var ms = new MemoryStream();
using var writer = new BinaryWriter(ms);
writer.Write("RIFF"u8.ToArray());
writer.Write(fileSize);
writer.Write("WAVE"u8.ToArray());
writer.Write("fmt "u8.ToArray());
writer.Write(16);
writer.Write((short)1);
writer.Write((short)channels);
writer.Write(sampleRate);
writer.Write(sampleRate * channels * (bitsPerSample / 8));
writer.Write((short)(channels * (bitsPerSample / 8)));
writer.Write(bitsPerSample);
writer.Write("data"u8.ToArray());
writer.Write(dataChunkSize);
int samples = (int)(sampleRate * durationSeconds);
for (int i = 0; i < samples; i++)
{
writer.Write((short)0);
}
return ms.ToArray();
}
}