feat: Introduce ITranscriptionOrchestrator and related interfaces, refactoring DaemonService and other components to use dependency injection.
This commit is contained in:
153
Core/TranscriptionOrchestrator.cs
Normal file
153
Core/TranscriptionOrchestrator.cs
Normal file
@@ -0,0 +1,153 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Net.Sockets;
|
||||
using System.Threading.Tasks;
|
||||
using Toak.Core.Interfaces;
|
||||
using Toak.Configuration;
|
||||
|
||||
namespace Toak.Core;
|
||||
|
||||
public class TranscriptionOrchestrator : ITranscriptionOrchestrator
|
||||
{
|
||||
private readonly ISpeechClient _speechClient;
|
||||
private readonly ILlmClient _llmClient;
|
||||
private readonly IConfigProvider _configProvider;
|
||||
private readonly IAudioRecorder _audioRecorder;
|
||||
private readonly INotifications _notifications;
|
||||
private readonly ITextInjector _textInjector;
|
||||
private readonly IHistoryManager _historyManager;
|
||||
private readonly IClipboardManager _clipboardManager;
|
||||
private readonly IRecordingStateTracker _stateTracker;
|
||||
|
||||
public TranscriptionOrchestrator(
|
||||
ISpeechClient speechClient,
|
||||
ILlmClient llmClient,
|
||||
IConfigProvider configProvider,
|
||||
IAudioRecorder audioRecorder,
|
||||
INotifications notifications,
|
||||
ITextInjector textInjector,
|
||||
IHistoryManager historyManager,
|
||||
IClipboardManager clipboardManager,
|
||||
IRecordingStateTracker stateTracker)
|
||||
{
|
||||
_speechClient = speechClient;
|
||||
_llmClient = llmClient;
|
||||
_configProvider = configProvider;
|
||||
_audioRecorder = audioRecorder;
|
||||
_notifications = notifications;
|
||||
_textInjector = textInjector;
|
||||
_historyManager = historyManager;
|
||||
_clipboardManager = clipboardManager;
|
||||
_stateTracker = stateTracker;
|
||||
}
|
||||
|
||||
public async Task ProcessStartRecordingAsync()
|
||||
{
|
||||
if (_stateTracker.IsRecording()) return;
|
||||
|
||||
Logger.LogDebug("Received START command");
|
||||
var config = _configProvider.LoadConfig();
|
||||
_notifications.PlaySound(config.StartSoundPath);
|
||||
_audioRecorder.StartRecording();
|
||||
}
|
||||
|
||||
public async Task ProcessStopRecordingAsync(Socket client, bool pipeToStdout, bool copyToClipboard)
|
||||
{
|
||||
if (!_stateTracker.IsRecording()) return;
|
||||
|
||||
Logger.LogDebug("Received STOP command");
|
||||
var config = _configProvider.LoadConfig();
|
||||
_notifications.PlaySound(config.StopSoundPath);
|
||||
_notifications.Notify("Toak", "Transcribing...");
|
||||
|
||||
_audioRecorder.StopRecording();
|
||||
|
||||
var wavPath = _audioRecorder.GetWavPath();
|
||||
if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0)
|
||||
{
|
||||
_notifications.Notify("Toak", "No audio recorded.");
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var stopWatch = Stopwatch.StartNew();
|
||||
|
||||
Logger.LogDebug($"Starting STT via Whisper for {wavPath}...");
|
||||
var transcript = await _speechClient.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(transcript))
|
||||
{
|
||||
_notifications.Notify("Toak", "No speech detected.");
|
||||
return;
|
||||
}
|
||||
|
||||
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
|
||||
string systemPrompt = detectedSkill != null ? detectedSkill.GetSystemPrompt(transcript) : PromptBuilder.BuildPrompt(config);
|
||||
bool isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution;
|
||||
|
||||
if (isExecutionSkill)
|
||||
{
|
||||
var finalText = await _llmClient.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
|
||||
if (!string.IsNullOrWhiteSpace(finalText))
|
||||
{
|
||||
detectedSkill!.Execute(finalText);
|
||||
stopWatch.Stop();
|
||||
_historyManager.SaveEntry(transcript, finalText, detectedSkill.Name, stopWatch.ElapsedMilliseconds);
|
||||
_notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Logger.LogDebug("Starting LLM text refinement (streaming)...");
|
||||
var tokenStream = _llmClient.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
|
||||
|
||||
if (pipeToStdout || copyToClipboard)
|
||||
{
|
||||
string fullText = "";
|
||||
await foreach (var token in tokenStream)
|
||||
{
|
||||
fullText += token;
|
||||
if (pipeToStdout)
|
||||
{
|
||||
await client.SendAsync(System.Text.Encoding.UTF8.GetBytes(token), SocketFlags.None);
|
||||
}
|
||||
}
|
||||
stopWatch.Stop();
|
||||
if (copyToClipboard)
|
||||
{
|
||||
_clipboardManager.Copy(fullText);
|
||||
_notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
|
||||
}
|
||||
_historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
|
||||
}
|
||||
else
|
||||
{
|
||||
string fullText = await _textInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
|
||||
stopWatch.Stop();
|
||||
_historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
|
||||
_notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_notifications.Notify("Toak Error", ex.Message);
|
||||
Logger.LogDebug($"Error during processing: {ex.Message}");
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (File.Exists(wavPath)) File.Delete(wavPath);
|
||||
}
|
||||
}
|
||||
|
||||
public void ProcessAbortAsync()
|
||||
{
|
||||
Logger.LogDebug("Received ABORT command");
|
||||
_audioRecorder.StopRecording();
|
||||
var wavPath = _audioRecorder.GetWavPath();
|
||||
if (File.Exists(wavPath)) File.Delete(wavPath);
|
||||
_notifications.Notify("Toak", "Recording Aborted.");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user