1
0

refactor: modernize code, improve performance, and clean up various components.

This commit is contained in:
2026-03-01 21:05:35 +01:00
parent 15f9647f8a
commit a6c7df0a71
37 changed files with 240 additions and 627 deletions

View File

@@ -1,6 +1,3 @@
using System;
using System.IO;
namespace Toak.Core;
public static class Constants

View File

@@ -1,8 +1,4 @@
using System;
using System.Diagnostics;
using System.IO;
using System.Net.Sockets;
using System.Threading.Tasks;
using Toak.Configuration;
using Toak.Api;
using Toak.Core.Interfaces;
@@ -54,15 +50,9 @@ public static class DaemonService
var notifications = new Notifications();
var speechClient = new OpenAiCompatibleClient(config.GroqApiKey);
ILlmClient llmClient;
if (config.LlmProvider == "together")
{
llmClient = new OpenAiCompatibleClient(config.TogetherApiKey, "https://api.together.xyz/v1/", config.ReasoningEffort);
}
else
{
llmClient = new OpenAiCompatibleClient(config.GroqApiKey, "https://api.groq.com/openai/v1/", config.ReasoningEffort);
}
ILlmClient llmClient = config.LlmProvider == "together"
? new OpenAiCompatibleClient(config.TogetherApiKey, "https://api.together.xyz/v1/", config.ReasoningEffort)
: new OpenAiCompatibleClient(config.GroqApiKey, "https://api.groq.com/openai/v1/", config.ReasoningEffort);
IAudioRecorder recorder = config.AudioBackend == "ffmpeg"
? new FfmpegAudioRecorder(stateTracker, notifications)
@@ -114,12 +104,12 @@ public static class DaemonService
try
{
var buffer = new byte[3];
int bytesRead = await client.ReceiveAsync(buffer, SocketFlags.None);
var bytesRead = await client.ReceiveAsync(buffer, SocketFlags.None);
if (bytesRead > 0)
{
byte cmd = buffer[0];
bool pipeToStdout = bytesRead > 1 && buffer[1] == 1;
bool copyToClipboard = bytesRead > 2 && buffer[2] == 1;
var cmd = buffer[0];
var pipeToStdout = bytesRead > 1 && buffer[1] == 1;
var copyToClipboard = bytesRead > 2 && buffer[2] == 1;
if (cmd == 1) // START
{
@@ -142,9 +132,9 @@ public static class DaemonService
}
else if (cmd == 5) // STATUS
{
bool json = pipeToStdout; // buffer[1] == 1 is json
bool isRecording = stateTracker.IsRecording();
string stateStr = isRecording ? "Recording" : "Idle";
var json = pipeToStdout; // buffer[1] == 1 is json
var isRecording = stateTracker.IsRecording();
var stateStr = isRecording ? "Recording" : "Idle";
if (json)
{

View File

@@ -1,5 +1,3 @@
using System;
namespace Toak.Core;
public class HistoryEntry

View File

@@ -1,8 +1,4 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.Json;
using System.Threading.Tasks;
using Toak.Serialization;
using Toak.Core.Interfaces;
@@ -10,20 +6,16 @@ namespace Toak.Core;
public class HistoryManager : IHistoryManager
{
private readonly string HistoryDir = Constants.Paths.AppDataDir;
private readonly string HistoryFile = Constants.Paths.HistoryFile;
public HistoryManager()
{
}
private readonly string _historyDir = Constants.Paths.AppDataDir;
private readonly string _historyFile = Constants.Paths.HistoryFile;
public void SaveEntry(string rawTranscript, string refinedText, string? skillName, long durationMs)
{
try
{
if (!Directory.Exists(HistoryDir))
if (!Directory.Exists(_historyDir))
{
Directory.CreateDirectory(HistoryDir);
Directory.CreateDirectory(_historyDir);
}
var entry = new HistoryEntry
@@ -38,9 +30,9 @@ public class HistoryManager : IHistoryManager
var json = JsonSerializer.Serialize(entry, CompactJsonSerializerContext.Default.HistoryEntry);
// Thread-safe append
lock (HistoryFile)
lock (_historyFile)
{
File.AppendAllLines(HistoryFile, new[] { json });
File.AppendAllLines(_historyFile, [json]);
}
}
catch (Exception ex)
@@ -52,14 +44,14 @@ public class HistoryManager : IHistoryManager
public List<HistoryEntry> LoadHistory()
{
var entries = new List<HistoryEntry>();
if (!File.Exists(HistoryFile)) return entries;
if (!File.Exists(_historyFile)) return entries;
try
{
string[] lines;
lock (HistoryFile)
lock (_historyFile)
{
lines = File.ReadAllLines(HistoryFile);
lines = File.ReadAllLines(_historyFile);
}
foreach (var line in lines)
@@ -92,20 +84,20 @@ public class HistoryManager : IHistoryManager
public void ClearHistory()
{
if (File.Exists(HistoryFile))
if (File.Exists(_historyFile))
{
try
{
lock (HistoryFile)
lock (_historyFile)
{
// Securely delete
var len = new FileInfo(HistoryFile).Length;
using (var fs = new FileStream(HistoryFile, FileMode.Open, FileAccess.Write))
var len = new FileInfo(_historyFile).Length;
using (var fs = new FileStream(_historyFile, FileMode.Open, FileAccess.Write))
{
var blank = new byte[len];
fs.Write(blank, 0, blank.Length);
}
File.Delete(HistoryFile);
File.Delete(_historyFile);
}
}
catch (Exception ex)

View File

@@ -1,5 +1,4 @@
using System.Net.Sockets;
using System.Threading.Tasks;
namespace Toak.Core.Interfaces;

View File

@@ -1,6 +1,3 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using Toak.Configuration;
namespace Toak.Core.Interfaces;
@@ -13,13 +10,13 @@ public interface IConfigProvider
public interface ISpeechClient
{
Task<string> TranscribeAsync(string filePath, string language = "", string model = Toak.Core.Constants.Defaults.WhisperModel);
Task<string> TranscribeAsync(string filePath, string language = "", string model = Constants.Defaults.WhisperModel);
}
public interface ILlmClient
{
Task<string> RefineTextAsync(string rawTranscript, string systemPrompt, string model = Toak.Core.Constants.Defaults.LlmModel);
IAsyncEnumerable<string> RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = Toak.Core.Constants.Defaults.LlmModel);
Task<string> RefineTextAsync(string rawTranscript, string systemPrompt, string model = Constants.Defaults.LlmModel);
IAsyncEnumerable<string> RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = Constants.Defaults.LlmModel);
}
public interface IAudioRecorder

View File

@@ -1,5 +1,4 @@
using System.Text;
using Toak.Configuration;
namespace Toak.Core;

View File

@@ -1,11 +1,10 @@
using System;
using System.Diagnostics;
namespace Toak.Core.Skills;
public class DynamicSkill : ISkill
public class DynamicSkill(SkillDefinition def) : ISkill
{
private readonly SkillDefinition _def;
private readonly SkillDefinition _def = def;
public string Name => _def.Name;
public string Description => _def.Description;
@@ -13,11 +12,6 @@ public class DynamicSkill : ISkill
public bool HandlesExecution => _def.Action.ToLowerInvariant() == "script";
public DynamicSkill(SkillDefinition def)
{
_def = def;
}
public string GetSystemPrompt(string rawTranscript)
{
return _def.SystemPrompt.Replace("{transcript}", rawTranscript);

View File

@@ -4,7 +4,7 @@ public class SkillDefinition
{
public string Name { get; set; } = "";
public string Description { get; set; } = "";
public string[] Hotwords { get; set; } = System.Array.Empty<string>();
public string[] Hotwords { get; set; } = [];
public string Action { get; set; } = "type"; // "type" or "script"
public string SystemPrompt { get; set; } = "";
public string? ScriptPath { get; set; }

View File

@@ -1,7 +1,3 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.Json;
using Toak.Serialization;
@@ -9,7 +5,7 @@ namespace Toak.Core.Skills;
public static class SkillRegistry
{
public static List<ISkill> AllSkills = new List<ISkill>();
public static List<ISkill> AllSkills = [];
public static string SkillsDirectory => Path.Combine(
Environment.GetFolderPath(Environment.SpecialFolder.UserProfile),
@@ -28,7 +24,7 @@ public static class SkillRegistry
{
try
{
string json = File.ReadAllText(file);
var json = File.ReadAllText(file);
var def = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.SkillDefinition);
if (def != null)
{
@@ -47,7 +43,7 @@ public static class SkillRegistry
if (AllSkills.Count == 0) Initialize();
var activeSkills = AllSkills.Where(s => activeSkillNames.Contains(s.Name, StringComparer.OrdinalIgnoreCase)).ToList();
string normalizedTranscript = transcript.Trim();
var normalizedTranscript = transcript.Trim();
foreach (var skill in activeSkills)
{
@@ -72,9 +68,11 @@ public static class SkillRegistry
Description = "Translates the spoken command into a bash command and types it.",
Hotwords = ["System terminal", "System run", "System execute"],
Action = "type",
SystemPrompt = @"You are a Linux terminal expert.
Translate the user's request into a single, valid bash command.
Output ONLY the raw command, no formatting, no markdown."
SystemPrompt = """
You are a Linux terminal expert.
Translate the user's request into a single, valid bash command.
Output ONLY the raw command, no formatting, no markdown.
"""
},
new SkillDefinition
{
@@ -82,10 +80,12 @@ Output ONLY the raw command, no formatting, no markdown."
Description = "Translates the spoken text into another language on the fly.",
Hotwords = ["System translate to", "System translate into"],
Action = "type",
SystemPrompt = @"You are an expert translator. The user wants to translate the following text.
The first few words identify the target language (e.g. 'Translate to Spanish:', 'Translate into Hungarian:').
Translate the REST of the transcript into that target language.
Output ONLY the final translated text. Do not include markdown, explanations, or quotes."
SystemPrompt = """
You are an expert translator. The user wants to translate the following text.
The first few words identify the target language (e.g. 'Translate to Spanish:', 'Translate into Hungarian:').
Translate the REST of the transcript into that target language.
Output ONLY the final translated text. Do not include markdown, explanations, or quotes.
"""
},
new SkillDefinition
{
@@ -93,13 +93,15 @@ Output ONLY the final translated text. Do not include markdown, explanations, or
Description = "Rewrites text into a formal, articulate tone.",
Hotwords = ["System professional", "System formalize", "System formal"],
Action = "type",
SystemPrompt = @"Rewrite the following text to be articulate and formal.
The text will start with 'System professional', 'System formalize', or 'System formal',
or something along the lines of that. You can ignore those words.
Do not add any conversational filler.
Make sure to preserve the meaning of the original text.
Output ONLY the final professional text.
Text: {transcript}"
SystemPrompt = """
Rewrite the following text to be articulate and formal.
The text will start with 'System professional', 'System formalize', or 'System formal',
or something along the lines of that. You can ignore those words.
Do not add any conversational filler.
Make sure to preserve the meaning of the original text.
Output ONLY the final professional text.
Text: {transcript}
"""
},
new SkillDefinition
{
@@ -107,19 +109,21 @@ Text: {transcript}"
Description = "Provides a direct, crisp summary of the dictation.",
Hotwords = ["System summary", "System concise", "System summarize"],
Action = "type",
SystemPrompt = @"Summarize the following text to be as concise
and direct as possible.
The text will start with 'System summary', 'System concise', or 'System summarize',
and you shoul ignore that part of the text.
Output ONLY the final summary text.
Text: {transcript}"
SystemPrompt = """
Summarize the following text to be as concise
and direct as possible.
The text will start with 'System summary', 'System concise', or 'System summarize',
and you shoul ignore that part of the text.
Output ONLY the final summary text.
Text: {transcript}
"""
}
};
foreach (var def in defaults)
{
string filename = Path.Combine(SkillsDirectory, $"{def.Name.ToLowerInvariant()}.json");
string json = JsonSerializer.Serialize(def, AppJsonSerializerContext.Default.SkillDefinition);
var filename = Path.Combine(SkillsDirectory, $"{def.Name.ToLowerInvariant()}.json");
var json = JsonSerializer.Serialize(def, AppJsonSerializerContext.Default.SkillDefinition);
File.WriteAllText(filename, json);
}

View File

@@ -4,24 +4,24 @@ namespace Toak.Core;
public class StateTracker : IRecordingStateTracker
{
private readonly string StateFilePath = Constants.Paths.StateFile;
private readonly string _stateFilePath = Constants.Paths.StateFile;
public bool IsRecording()
{
return File.Exists(StateFilePath);
return File.Exists(_stateFilePath);
}
public void SetRecording(int ffmpegPid)
{
Logger.LogDebug($"Setting recording state with PID {ffmpegPid}");
File.WriteAllText(StateFilePath, $"{ffmpegPid}\n{DateTime.UtcNow.Ticks}");
File.WriteAllText(_stateFilePath, $"{ffmpegPid}\n{DateTime.UtcNow.Ticks}");
}
public int? GetRecordingPid()
{
if (File.Exists(StateFilePath))
if (File.Exists(_stateFilePath))
{
var lines = File.ReadAllLines(StateFilePath);
var lines = File.ReadAllLines(_stateFilePath);
if (lines.Length > 0 && int.TryParse(lines[0], out var pid))
{
Logger.LogDebug($"Read recording PID {pid} from state file");
@@ -33,9 +33,9 @@ public class StateTracker : IRecordingStateTracker
public DateTime? GetRecordingStartTime()
{
if (File.Exists(StateFilePath))
if (File.Exists(_stateFilePath))
{
var lines = File.ReadAllLines(StateFilePath);
var lines = File.ReadAllLines(_stateFilePath);
if (lines.Length > 1 && long.TryParse(lines[1], out var ticks))
{
return new DateTime(ticks, DateTimeKind.Utc);
@@ -46,10 +46,10 @@ public class StateTracker : IRecordingStateTracker
public void ClearRecording()
{
if (File.Exists(StateFilePath))
if (File.Exists(_stateFilePath))
{
Logger.LogDebug("Clearing recording state file");
File.Delete(StateFilePath);
File.Delete(_stateFilePath);
}
}
}

View File

@@ -1,55 +1,39 @@
using System;
using System.Diagnostics;
using System.IO;
using System.Net.Sockets;
using System.Threading.Tasks;
using Toak.Core.Interfaces;
using Toak.Configuration;
namespace Toak.Core;
public class TranscriptionOrchestrator : ITranscriptionOrchestrator
public class TranscriptionOrchestrator(
ISpeechClient speechClient,
ILlmClient llmClient,
IConfigProvider configProvider,
IAudioRecorder audioRecorder,
INotifications notifications,
ITextInjector textInjector,
IHistoryManager historyManager,
IClipboardManager clipboardManager,
IRecordingStateTracker stateTracker) : ITranscriptionOrchestrator
{
private readonly ISpeechClient _speechClient;
private readonly ILlmClient _llmClient;
private readonly IConfigProvider _configProvider;
private readonly IAudioRecorder _audioRecorder;
private readonly INotifications _notifications;
private readonly ITextInjector _textInjector;
private readonly IHistoryManager _historyManager;
private readonly IClipboardManager _clipboardManager;
private readonly IRecordingStateTracker _stateTracker;
private readonly ISpeechClient _speechClient = speechClient;
private readonly ILlmClient _llmClient = llmClient;
private readonly IConfigProvider _configProvider = configProvider;
private readonly IAudioRecorder _audioRecorder = audioRecorder;
private readonly INotifications _notifications = notifications;
private readonly ITextInjector _textInjector = textInjector;
private readonly IHistoryManager _historyManager = historyManager;
private readonly IClipboardManager _clipboardManager = clipboardManager;
private readonly IRecordingStateTracker _stateTracker = stateTracker;
public TranscriptionOrchestrator(
ISpeechClient speechClient,
ILlmClient llmClient,
IConfigProvider configProvider,
IAudioRecorder audioRecorder,
INotifications notifications,
ITextInjector textInjector,
IHistoryManager historyManager,
IClipboardManager clipboardManager,
IRecordingStateTracker stateTracker)
public Task ProcessStartRecordingAsync()
{
_speechClient = speechClient;
_llmClient = llmClient;
_configProvider = configProvider;
_audioRecorder = audioRecorder;
_notifications = notifications;
_textInjector = textInjector;
_historyManager = historyManager;
_clipboardManager = clipboardManager;
_stateTracker = stateTracker;
}
public async Task ProcessStartRecordingAsync()
{
if (_stateTracker.IsRecording()) return;
if (_stateTracker.IsRecording()) return Task.CompletedTask;
Logger.LogDebug("Received START command");
var config = _configProvider.LoadConfig();
_notifications.PlaySound(config.StartSoundPath);
_audioRecorder.StartRecording();
return Task.CompletedTask;
}
public async Task ProcessStopRecordingAsync(Socket client, bool pipeToStdout, bool copyToClipboard)
@@ -96,9 +80,9 @@ public class TranscriptionOrchestrator : ITranscriptionOrchestrator
return;
}
var detectedSkill = Toak.Core.Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
string systemPrompt = detectedSkill != null ? detectedSkill.GetSystemPrompt(transcript) : PromptBuilder.BuildPrompt(config);
bool isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution;
var detectedSkill = Skills.SkillRegistry.DetectSkill(transcript, config.ActiveSkills);
var systemPrompt = detectedSkill != null ? detectedSkill.GetSystemPrompt(transcript) : PromptBuilder.BuildPrompt(config);
var isExecutionSkill = detectedSkill != null && detectedSkill.HandlesExecution;
if (isExecutionSkill)
{
@@ -118,7 +102,7 @@ public class TranscriptionOrchestrator : ITranscriptionOrchestrator
if (pipeToStdout || copyToClipboard)
{
string fullText = "";
var fullText = "";
await foreach (var token in tokenStream)
{
fullText += token;
@@ -137,7 +121,7 @@ public class TranscriptionOrchestrator : ITranscriptionOrchestrator
}
else
{
string fullText = await _textInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
var fullText = await _textInjector.InjectStreamAsync(tokenStream, config.TypingBackend);
stopWatch.Stop();
_historyManager.SaveEntry(transcript, fullText, detectedSkill?.Name, stopWatch.ElapsedMilliseconds);
_notifications.Notify("Toak", $"Done in {stopWatch.ElapsedMilliseconds}ms");