1
0
Files
Toak/Core/TranscriptionOrchestrator.cs

154 lines
5.9 KiB
C#

using System;
using System.Diagnostics;
using System.IO;
using System.Net.Sockets;
using System.Threading.Tasks;
using Toak.Core.Interfaces;
using Toak.Configuration;
namespace Toak.Core;
public class TranscriptionOrchestrator : ITranscriptionOrchestrator
{
private readonly ISpeechClient _speechClient;
private readonly ILlmClient _llmClient;
private readonly IConfigProvider _configProvider;
private readonly IAudioRecorder _audioRecorder;
private readonly INotifications _notifications;
private readonly ITextInjector _textInjector;
private readonly IHistoryManager _historyManager;
private readonly IClipboardManager _clipboardManager;
private readonly IRecordingStateTracker _stateTracker;
public TranscriptionOrchestrator(
ISpeechClient speechClient,
ILlmClient llmClient,
IConfigProvider configProvider,
IAudioRecorder audioRecorder,
INotifications notifications,
ITextInjector textInjector,
IHistoryManager historyManager,
IClipboardManager clipboardManager,
IRecordingStateTracker stateTracker)
{
_speechClient = speechClient;
_llmClient = llmClient;
_configProvider = configProvider;
_audioRecorder = audioRecorder;
_notifications = notifications;
_textInjector = textInjector;
_historyManager = historyManager;
_clipboardManager = clipboardManager;
_stateTracker = stateTracker;
}
public async Task ProcessStartRecordingAsync()
{
if (_stateTracker.IsRecording()) return;
Logger.LogDebug("Received START command");
var config = _configProvider.LoadConfig();
_notifications.PlaySound(config.StartSoundPath);
_audioRecorder.StartRecording();
}
public async Task ProcessStopRecordingAsync(Socket client, bool pipeToStdout, bool copyToClipboard)
{
if (!_stateTracker.IsRecording()) return;
Logger.LogDebug("Received STOP command");
var config = _configProvider.LoadConfig();
_notifications.PlaySound(config.StopSoundPath);
_notifications.Notify("Toak", "Transcribing...");
_audioRecorder.StopRecording();
var wavPath = _audioRecorder.GetWavPath();
if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0)
{
_notifications.Notify("Toak", "No audio recorded.");
return;
}
try
{
var stopWatch = Stopwatch.StartNew();
Logger.LogDebug($"Starting STT via Whisper for {wavPath}...");
var transcript = await _speechClient.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
if (string.IsNullOrWhiteSpace(transcript))
{
_notifications.Notify("Toak", "No speech detected.");
return;
}
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
string systemPrompt = detectedSkill != null ? detectedSkill.GetSystemPrompt(transcript) : PromptBuilder.BuildPrompt(config);
bool isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution;
if (isExecutionSkill)
{
var finalText = await _llmClient.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
if (!string.IsNullOrWhiteSpace(finalText))
{
detectedSkill!.Execute(finalText);
stopWatch.Stop();
_historyManager.SaveEntry(transcript, finalText, detectedSkill.Name, stopWatch.ElapsedMilliseconds);
_notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
}
}
else
{
Logger.LogDebug("Starting LLM text refinement (streaming)...");
var tokenStream = _llmClient.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
if (pipeToStdout || copyToClipboard)
{
string fullText = "";
await foreach (var token in tokenStream)
{
fullText += token;
if (pipeToStdout)
{
await client.SendAsync(System.Text.Encoding.UTF8.GetBytes(token), SocketFlags.None);
}
}
stopWatch.Stop();
if (copyToClipboard)
{
_clipboardManager.Copy(fullText);
_notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
}
_historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
}
else
{
string fullText = await _textInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
stopWatch.Stop();
_historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
_notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
}
}
}
catch (Exception ex)
{
_notifications.Notify("Toak Error", ex.Message);
Logger.LogDebug($"Error during processing: {ex.Message}");
}
finally
{
if (File.Exists(wavPath)) File.Delete(wavPath);
}
}
public void ProcessAbortAsync()
{
Logger.LogDebug("Received ABORT command");
_audioRecorder.StopRecording();
var wavPath = _audioRecorder.GetWavPath();
if (File.Exists(wavPath)) File.Delete(wavPath);
_notifications.Notify("Toak", "Recording Aborted.");
}
}