1
0

feat: Introduce ITranscriptionOrchestrator and related interfaces, refactoring DaemonService and other components to use dependency injection.

This commit is contained in:
2026-02-28 15:36:03 +01:00
parent ac0ac2397b
commit 0577640da9
18 changed files with 356 additions and 175 deletions

View File

@@ -2,19 +2,17 @@ using System;
using System.Diagnostics;
using System.IO;
using System.Net.Sockets;
using System.Threading;
using System.Threading.Tasks;
using Toak.Audio;
using Toak.Configuration;
using Toak.Api;
using Toak.Core.Interfaces;
using Toak.Audio;
using Toak.IO;
namespace Toak.Core;
public static class DaemonService
{
private static GroqApiClient? _groqClient;
public static string GetSocketPath()
{
var runtimeDir = Environment.GetEnvironmentVariable("XDG_RUNTIME_DIR");
@@ -49,14 +47,29 @@ public static class DaemonService
try { File.Delete(socketPath); } catch { }
}
var config = ConfigManager.LoadConfig();
var configManager = new ConfigManager();
var config = configManager.LoadConfig();
if (string.IsNullOrWhiteSpace(config.GroqApiKey))
{
Console.WriteLine("Groq API Key is not configured. Run 'toak onboard'.");
return;
}
_groqClient = new GroqApiClient(config.GroqApiKey);
var stateTracker = new StateTracker();
var notifications = new Notifications();
var groqClient = new GroqApiClient(config.GroqApiKey);
var orchestrator = new TranscriptionOrchestrator(
groqClient,
groqClient,
configManager,
new AudioRecorder(stateTracker, notifications),
notifications,
new TextInjector(notifications),
new HistoryManager(),
new ClipboardManager(notifications),
stateTracker
);
using var socket = new Socket(AddressFamily.Unix, SocketType.Stream, ProtocolType.Unspecified);
var endPoint = new UnixDomainSocketEndPoint(socketPath);
@@ -71,7 +84,7 @@ public static class DaemonService
while (true)
{
var client = await socket.AcceptAsync();
_ = Task.Run(() => HandleClientAsync(client));
_ = Task.Run(() => HandleClientAsync(client, orchestrator, stateTracker));
}
}
catch (Exception ex)
@@ -87,7 +100,7 @@ public static class DaemonService
}
}
private static async Task HandleClientAsync(Socket client)
private static async Task HandleClientAsync(Socket client, ITranscriptionOrchestrator orchestrator, IRecordingStateTracker stateTracker)
{
try
{
@@ -101,22 +114,22 @@ public static class DaemonService
if (cmd == 1) // START
{
await ProcessStartRecordingAsync();
await orchestrator.ProcessStartRecordingAsync();
}
else if (cmd == 2) // STOP
{
await ProcessStopRecordingAsync(client, pipeToStdout, copyToClipboard);
await orchestrator.ProcessStopRecordingAsync(client, pipeToStdout, copyToClipboard);
}
else if (cmd == 3) // ABORT
{
ProcessAbortAsync();
orchestrator.ProcessAbortAsync();
}
else if (cmd == 4) // TOGGLE
{
if (StateTracker.IsRecording())
await ProcessStopRecordingAsync(client, pipeToStdout, copyToClipboard);
if (stateTracker.IsRecording())
await orchestrator.ProcessStopRecordingAsync(client, pipeToStdout, copyToClipboard);
else
await ProcessStartRecordingAsync();
await orchestrator.ProcessStartRecordingAsync();
}
}
}
@@ -129,114 +142,5 @@ public static class DaemonService
client.Close();
}
}
private static async Task ProcessStartRecordingAsync()
{
if (StateTracker.IsRecording()) return;
Logger.LogDebug("Received START command");
var config = ConfigManager.LoadConfig();
Notifications.PlaySound(config.StartSoundPath);
AudioRecorder.StartRecording();
}
private static async Task ProcessStopRecordingAsync(Socket client, bool pipeToStdout, bool copyToClipboard)
{
if (!StateTracker.IsRecording()) return;
Logger.LogDebug("Received STOP command");
var config = ConfigManager.LoadConfig();
Notifications.PlaySound(config.StopSoundPath);
Notifications.Notify("Toak", "Transcribing...");
AudioRecorder.StopRecording();
var wavPath = AudioRecorder.GetWavPath();
if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0)
{
Notifications.Notify("Toak", "No audio recorded.");
return;
}
try
{
var stopWatch = Stopwatch.StartNew();
Logger.LogDebug($"Starting STT via Whisper for {wavPath}...");
var transcript = await _groqClient!.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
if (string.IsNullOrWhiteSpace(transcript))
{
Notifications.Notify("Toak", "No speech detected.");
return;
}
// LLM Refinement
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
string systemPrompt = detectedSkill != null ? detectedSkill.GetSystemPrompt(transcript) : PromptBuilder.BuildPrompt(config);
bool isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution;
if (isExecutionSkill)
{
var finalText = await _groqClient.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
if (!string.IsNullOrWhiteSpace(finalText))
{
detectedSkill!.Execute(finalText);
stopWatch.Stop();
HistoryManager.SaveEntry(transcript, finalText, detectedSkill.Name, stopWatch.ElapsedMilliseconds);
Notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
}
}
else
{
Logger.LogDebug("Starting LLM text refinement (streaming)...");
var tokenStream = _groqClient.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
if (pipeToStdout || copyToClipboard)
{
string fullText = "";
await foreach (var token in tokenStream)
{
fullText += token;
if (pipeToStdout)
{
await client.SendAsync(System.Text.Encoding.UTF8.GetBytes(token), SocketFlags.None);
}
}
stopWatch.Stop();
if (copyToClipboard)
{
ClipboardManager.Copy(fullText);
Notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
}
HistoryManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
}
else
{
string fullText = await TextInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
stopWatch.Stop();
HistoryManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
Notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
}
}
}
catch (Exception ex)
{
Notifications.Notify("Toak Error", ex.Message);
Logger.LogDebug($"Error during processing: {ex.Message}");
}
finally
{
if (File.Exists(wavPath)) File.Delete(wavPath);
}
}
private static void ProcessAbortAsync()
{
Logger.LogDebug("Received ABORT command");
AudioRecorder.StopRecording();
var wavPath = AudioRecorder.GetWavPath();
if (File.Exists(wavPath)) File.Delete(wavPath);
Notifications.Notify("Toak", "Recording Aborted.");
}
}

View File

@@ -4,15 +4,21 @@ using System.IO;
using System.Text.Json;
using System.Threading.Tasks;
using Toak.Serialization;
using Toak.Core.Interfaces;
namespace Toak.Core;
public static class HistoryManager
public class HistoryManager : IHistoryManager
{
private static readonly string HistoryDir = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "toak");
private static readonly string HistoryFile = Path.Combine(HistoryDir, "history.jsonl");
private readonly string HistoryDir = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "toak");
private readonly string HistoryFile;
public static void SaveEntry(string rawTranscript, string refinedText, string? skillName, long durationMs)
public HistoryManager()
{
HistoryFile = Path.Combine(HistoryDir, "history.jsonl");
}
public void SaveEntry(string rawTranscript, string refinedText, string? skillName, long durationMs)
{
try
{
@@ -44,7 +50,7 @@ public static class HistoryManager
}
}
public static List<HistoryEntry> LoadEntries()
public List<HistoryEntry> LoadHistory()
{
var entries = new List<HistoryEntry>();
if (!File.Exists(HistoryFile)) return entries;
@@ -85,7 +91,7 @@ public static class HistoryManager
return entries;
}
public static void Shred()
public void ClearHistory()
{
if (File.Exists(HistoryFile))
{

View File

@@ -0,0 +1,11 @@
using System.Net.Sockets;
using System.Threading.Tasks;
namespace Toak.Core.Interfaces;
public interface ITranscriptionOrchestrator
{
Task ProcessStartRecordingAsync();
Task ProcessStopRecordingAsync(Socket client, bool pipeToStdout, bool copyToClipboard);
void ProcessAbortAsync();
}

View File

@@ -0,0 +1,62 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using Toak.Configuration;
namespace Toak.Core.Interfaces;
public interface IConfigProvider
{
ToakConfig LoadConfig();
void SaveConfig(ToakConfig config);
}
public interface ISpeechClient
{
Task<string> TranscribeAsync(string filePath, string language = "", string model = "whisper-large-v3-turbo");
}
public interface ILlmClient
{
Task<string> RefineTextAsync(string rawTranscript, string systemPrompt, string model = "openai/gpt-oss-20b");
IAsyncEnumerable<string> RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = "openai/gpt-oss-20b");
}
public interface IAudioRecorder
{
string GetWavPath();
void StartRecording();
void StopRecording();
}
public interface INotifications
{
void Notify(string title, string message = "");
void PlaySound(string soundPath);
}
public interface ITextInjector
{
Task<string> InjectStreamAsync(IAsyncEnumerable<string> textStream, string backend = "xdotool");
Task InjectTextAsync(string text, string backend = "xdotool");
}
public interface IHistoryManager
{
void SaveEntry(string rawText, string finalText, string? skillUsed, long timeTakenMs);
List<HistoryEntry> LoadHistory();
void ClearHistory();
}
public interface IClipboardManager
{
void Copy(string text);
}
public interface IRecordingStateTracker
{
int? GetRecordingPid();
void SetRecording(int pid);
void ClearRecording();
bool IsRecording();
}

View File

@@ -1,21 +1,23 @@
using Toak.Core.Interfaces;
namespace Toak.Core;
public static class StateTracker
public class StateTracker : IRecordingStateTracker
{
private static readonly string StateFilePath = Path.Combine(Path.GetTempPath(), "toak_state.pid");
private readonly string StateFilePath = Path.Combine(Path.GetTempPath(), "toak_state.pid");
public static bool IsRecording()
public bool IsRecording()
{
return File.Exists(StateFilePath);
}
public static void SetRecording(int ffmpegPid)
public void SetRecording(int ffmpegPid)
{
Logger.LogDebug($"Setting recording state with PID {ffmpegPid}");
File.WriteAllText(StateFilePath, ffmpegPid.ToString());
}
public static int? GetRecordingPid()
public int? GetRecordingPid()
{
if (File.Exists(StateFilePath))
{
@@ -29,7 +31,7 @@ public static class StateTracker
return null;
}
public static void ClearRecording()
public void ClearRecording()
{
if (File.Exists(StateFilePath))
{

View File

@@ -0,0 +1,153 @@
using System;
using System.Diagnostics;
using System.IO;
using System.Net.Sockets;
using System.Threading.Tasks;
using Toak.Core.Interfaces;
using Toak.Configuration;
namespace Toak.Core;
public class TranscriptionOrchestrator : ITranscriptionOrchestrator
{
private readonly ISpeechClient _speechClient;
private readonly ILlmClient _llmClient;
private readonly IConfigProvider _configProvider;
private readonly IAudioRecorder _audioRecorder;
private readonly INotifications _notifications;
private readonly ITextInjector _textInjector;
private readonly IHistoryManager _historyManager;
private readonly IClipboardManager _clipboardManager;
private readonly IRecordingStateTracker _stateTracker;
public TranscriptionOrchestrator(
ISpeechClient speechClient,
ILlmClient llmClient,
IConfigProvider configProvider,
IAudioRecorder audioRecorder,
INotifications notifications,
ITextInjector textInjector,
IHistoryManager historyManager,
IClipboardManager clipboardManager,
IRecordingStateTracker stateTracker)
{
_speechClient = speechClient;
_llmClient = llmClient;
_configProvider = configProvider;
_audioRecorder = audioRecorder;
_notifications = notifications;
_textInjector = textInjector;
_historyManager = historyManager;
_clipboardManager = clipboardManager;
_stateTracker = stateTracker;
}
public async Task ProcessStartRecordingAsync()
{
if (_stateTracker.IsRecording()) return;
Logger.LogDebug("Received START command");
var config = _configProvider.LoadConfig();
_notifications.PlaySound(config.StartSoundPath);
_audioRecorder.StartRecording();
}
public async Task ProcessStopRecordingAsync(Socket client, bool pipeToStdout, bool copyToClipboard)
{
if (!_stateTracker.IsRecording()) return;
Logger.LogDebug("Received STOP command");
var config = _configProvider.LoadConfig();
_notifications.PlaySound(config.StopSoundPath);
_notifications.Notify("Toak", "Transcribing...");
_audioRecorder.StopRecording();
var wavPath = _audioRecorder.GetWavPath();
if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0)
{
_notifications.Notify("Toak", "No audio recorded.");
return;
}
try
{
var stopWatch = Stopwatch.StartNew();
Logger.LogDebug($"Starting STT via Whisper for {wavPath}...");
var transcript = await _speechClient.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
if (string.IsNullOrWhiteSpace(transcript))
{
_notifications.Notify("Toak", "No speech detected.");
return;
}
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
string systemPrompt = detectedSkill != null ? detectedSkill.GetSystemPrompt(transcript) : PromptBuilder.BuildPrompt(config);
bool isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution;
if (isExecutionSkill)
{
var finalText = await _llmClient.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
if (!string.IsNullOrWhiteSpace(finalText))
{
detectedSkill!.Execute(finalText);
stopWatch.Stop();
_historyManager.SaveEntry(transcript, finalText, detectedSkill.Name, stopWatch.ElapsedMilliseconds);
_notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
}
}
else
{
Logger.LogDebug("Starting LLM text refinement (streaming)...");
var tokenStream = _llmClient.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
if (pipeToStdout || copyToClipboard)
{
string fullText = "";
await foreach (var token in tokenStream)
{
fullText += token;
if (pipeToStdout)
{
await client.SendAsync(System.Text.Encoding.UTF8.GetBytes(token), SocketFlags.None);
}
}
stopWatch.Stop();
if (copyToClipboard)
{
_clipboardManager.Copy(fullText);
_notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
}
_historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
}
else
{
string fullText = await _textInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
stopWatch.Stop();
_historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
_notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
}
}
}
catch (Exception ex)
{
_notifications.Notify("Toak Error", ex.Message);
Logger.LogDebug($"Error during processing: {ex.Message}");
}
finally
{
if (File.Exists(wavPath)) File.Delete(wavPath);
}
}
public void ProcessAbortAsync()
{
Logger.LogDebug("Received ABORT command");
_audioRecorder.StopRecording();
var wavPath = _audioRecorder.GetWavPath();
if (File.Exists(wavPath)) File.Delete(wavPath);
_notifications.Notify("Toak", "Recording Aborted.");
}
}