154 lines
5.9 KiB
C#
154 lines
5.9 KiB
C#
using System;
|
|
using System.Diagnostics;
|
|
using System.IO;
|
|
using System.Net.Sockets;
|
|
using System.Threading.Tasks;
|
|
using Toak.Core.Interfaces;
|
|
using Toak.Configuration;
|
|
|
|
namespace Toak.Core;
|
|
|
|
public class TranscriptionOrchestrator : ITranscriptionOrchestrator
|
|
{
|
|
private readonly ISpeechClient _speechClient;
|
|
private readonly ILlmClient _llmClient;
|
|
private readonly IConfigProvider _configProvider;
|
|
private readonly IAudioRecorder _audioRecorder;
|
|
private readonly INotifications _notifications;
|
|
private readonly ITextInjector _textInjector;
|
|
private readonly IHistoryManager _historyManager;
|
|
private readonly IClipboardManager _clipboardManager;
|
|
private readonly IRecordingStateTracker _stateTracker;
|
|
|
|
public TranscriptionOrchestrator(
|
|
ISpeechClient speechClient,
|
|
ILlmClient llmClient,
|
|
IConfigProvider configProvider,
|
|
IAudioRecorder audioRecorder,
|
|
INotifications notifications,
|
|
ITextInjector textInjector,
|
|
IHistoryManager historyManager,
|
|
IClipboardManager clipboardManager,
|
|
IRecordingStateTracker stateTracker)
|
|
{
|
|
_speechClient = speechClient;
|
|
_llmClient = llmClient;
|
|
_configProvider = configProvider;
|
|
_audioRecorder = audioRecorder;
|
|
_notifications = notifications;
|
|
_textInjector = textInjector;
|
|
_historyManager = historyManager;
|
|
_clipboardManager = clipboardManager;
|
|
_stateTracker = stateTracker;
|
|
}
|
|
|
|
public async Task ProcessStartRecordingAsync()
|
|
{
|
|
if (_stateTracker.IsRecording()) return;
|
|
|
|
Logger.LogDebug("Received START command");
|
|
var config = _configProvider.LoadConfig();
|
|
_notifications.PlaySound(config.StartSoundPath);
|
|
_audioRecorder.StartRecording();
|
|
}
|
|
|
|
public async Task ProcessStopRecordingAsync(Socket client, bool pipeToStdout, bool copyToClipboard)
|
|
{
|
|
if (!_stateTracker.IsRecording()) return;
|
|
|
|
Logger.LogDebug("Received STOP command");
|
|
var config = _configProvider.LoadConfig();
|
|
_notifications.PlaySound(config.StopSoundPath);
|
|
_notifications.Notify("Toak", "Transcribing...");
|
|
|
|
_audioRecorder.StopRecording();
|
|
|
|
var wavPath = _audioRecorder.GetWavPath();
|
|
if (!File.Exists(wavPath) || new FileInfo(wavPath).Length == 0)
|
|
{
|
|
_notifications.Notify("Toak", "No audio recorded.");
|
|
return;
|
|
}
|
|
|
|
try
|
|
{
|
|
var stopWatch = Stopwatch.StartNew();
|
|
|
|
Logger.LogDebug($"Starting STT via Whisper for {wavPath}...");
|
|
var transcript = await _speechClient.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
|
|
|
|
if (string.IsNullOrWhiteSpace(transcript))
|
|
{
|
|
_notifications.Notify("Toak", "No speech detected.");
|
|
return;
|
|
}
|
|
|
|
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
|
|
string systemPrompt = detectedSkill != null ? detectedSkill.GetSystemPrompt(transcript) : PromptBuilder.BuildPrompt(config);
|
|
bool isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution;
|
|
|
|
if (isExecutionSkill)
|
|
{
|
|
var finalText = await _llmClient.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
|
|
if (!string.IsNullOrWhiteSpace(finalText))
|
|
{
|
|
detectedSkill!.Execute(finalText);
|
|
stopWatch.Stop();
|
|
_historyManager.SaveEntry(transcript, finalText, detectedSkill.Name, stopWatch.ElapsedMilliseconds);
|
|
_notifications.Notify("Toak", $"Skill executed in {stopWatch.ElapsedMilliseconds}ms");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Logger.LogDebug("Starting LLM text refinement (streaming)...");
|
|
var tokenStream = _llmClient.RefineTextStreamAsync(transcript, systemPrompt, config.LlmModel);
|
|
|
|
if (pipeToStdout || copyToClipboard)
|
|
{
|
|
string fullText = "";
|
|
await foreach (var token in tokenStream)
|
|
{
|
|
fullText += token;
|
|
if (pipeToStdout)
|
|
{
|
|
await client.SendAsync(System.Text.Encoding.UTF8.GetBytes(token), SocketFlags.None);
|
|
}
|
|
}
|
|
stopWatch.Stop();
|
|
if (copyToClipboard)
|
|
{
|
|
_clipboardManager.Copy(fullText);
|
|
_notifications.Notify("Toak", $"Copied to clipboard in {stopWatch.ElapsedMilliseconds}ms");
|
|
}
|
|
_historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
|
|
}
|
|
else
|
|
{
|
|
string fullText = await _textInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
|
|
stopWatch.Stop();
|
|
_historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
|
|
_notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");
|
|
}
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_notifications.Notify("Toak Error", ex.Message);
|
|
Logger.LogDebug($"Error during processing: {ex.Message}");
|
|
}
|
|
finally
|
|
{
|
|
if (File.Exists(wavPath)) File.Delete(wavPath);
|
|
}
|
|
}
|
|
|
|
public void ProcessAbortAsync()
|
|
{
|
|
Logger.LogDebug("Received ABORT command");
|
|
_audioRecorder.StopRecording();
|
|
var wavPath = _audioRecorder.GetWavPath();
|
|
if (File.Exists(wavPath)) File.Delete(wavPath);
|
|
_notifications.Notify("Toak", "Recording Aborted.");
|
|
}
|
|
}
|