From 4e04cc604270844d4ae12075652e682007ae8f86 Mon Sep 17 00:00:00 2001 From: TomiEckert Date: Sat, 28 Feb 2026 16:09:41 +0100 Subject: [PATCH] feat: Introduce an OpenAI-compatible client to replace the Groq-specific client and enable multiple LLM providers. --- .../{LlamaModels.cs => OpenAiModels.cs} | 26 +++++----- ...ApiClient.cs => OpenAiCompatibleClient.cs} | 47 ++++++++++--------- Commands/LatencyTestCommand.cs | 6 +-- Commands/OnboardCommand.cs | 43 +++++++++++++---- Configuration/ToakConfig.cs | 3 ++ Core/DaemonService.cs | 16 +++++-- Serialization/AppJsonSerializerContext.cs | 20 ++++---- 7 files changed, 104 insertions(+), 57 deletions(-) rename Api/Models/{LlamaModels.cs => OpenAiModels.cs} (52%) rename Api/{GroqApiClient.cs => OpenAiCompatibleClient.cs} (70%) diff --git a/Api/Models/LlamaModels.cs b/Api/Models/OpenAiModels.cs similarity index 52% rename from Api/Models/LlamaModels.cs rename to Api/Models/OpenAiModels.cs index a84cbac..5eaef58 100644 --- a/Api/Models/LlamaModels.cs +++ b/Api/Models/OpenAiModels.cs @@ -2,7 +2,7 @@ using System.Text.Json.Serialization; namespace Toak.Api.Models; -public class LlamaRequestMessage +public class OpenAiRequestMessage { [JsonPropertyName("role")] public string Role { get; set; } = string.Empty; @@ -10,43 +10,45 @@ public class LlamaRequestMessage public string Content { get; set; } = string.Empty; } -public class LlamaRequest +public class OpenAiRequest { [JsonPropertyName("model")] public string Model { get; set; } = "llama-3.1-8b-instant"; [JsonPropertyName("messages")] - public LlamaRequestMessage[] Messages { get; set; } = Array.Empty(); + public OpenAiRequestMessage[] Messages { get; set; } = Array.Empty(); [JsonPropertyName("temperature")] public double Temperature { get; set; } = 0.0; [JsonPropertyName("stream")] public bool? Stream { get; set; } + [JsonPropertyName("reasoning_effort")] + public string? ReasoningEffort { get; set; } } -public class LlamaResponse +public class OpenAiResponse { [JsonPropertyName("choices")] - public LlamaChoice[] Choices { get; set; } = Array.Empty(); + public OpenAiChoice[] Choices { get; set; } = Array.Empty(); } -public class LlamaChoice +public class OpenAiChoice { [JsonPropertyName("message")] - public LlamaRequestMessage Message { get; set; } = new(); + public OpenAiRequestMessage Message { get; set; } = new(); } -public class LlamaStreamResponse +public class OpenAiStreamResponse { [JsonPropertyName("choices")] - public LlamaStreamChoice[] Choices { get; set; } = Array.Empty(); + public OpenAiStreamChoice[] Choices { get; set; } = Array.Empty(); } -public class LlamaStreamChoice +public class OpenAiStreamChoice { [JsonPropertyName("delta")] - public LlamaStreamDelta Delta { get; set; } = new(); + public OpenAiStreamDelta Delta { get; set; } = new(); } -public class LlamaStreamDelta +public class OpenAiStreamDelta { [JsonPropertyName("content")] public string? Content { get; set; } diff --git a/Api/GroqApiClient.cs b/Api/OpenAiCompatibleClient.cs similarity index 70% rename from Api/GroqApiClient.cs rename to Api/OpenAiCompatibleClient.cs index 7122daf..bbdfd87 100644 --- a/Api/GroqApiClient.cs +++ b/Api/OpenAiCompatibleClient.cs @@ -9,19 +9,22 @@ using Toak.Core.Interfaces; namespace Toak.Api; -public class GroqApiClient : ISpeechClient, ILlmClient +public class OpenAiCompatibleClient : ISpeechClient, ILlmClient { private readonly HttpClient _httpClient; + private readonly string? _reasoningEffort; - public GroqApiClient(string apiKey) + public OpenAiCompatibleClient(string apiKey, string baseUrl = "https://api.groq.com/openai/v1/", string? reasoningEffort = null) { _httpClient = new HttpClient(); _httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey); - _httpClient.BaseAddress = new Uri("https://api.groq.com/openai/v1/"); + _httpClient.BaseAddress = new Uri(baseUrl); + _reasoningEffort = reasoningEffort == "none" ? null : reasoningEffort; } public async Task TranscribeAsync(string filePath, string language = "", string model = Toak.Core.Constants.Defaults.WhisperModel) { + // ... (TranscribeAsync content remains same except maybe some internal comments or contexts) using var content = new MultipartFormDataContent(); using var fileStream = File.OpenRead(filePath); using var streamContent = new StreamContent(fileStream); @@ -31,8 +34,6 @@ public class GroqApiClient : ISpeechClient, ILlmClient string modelToUse = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.WhisperModel : model; - // according to docs whisper-large-v3-turbo requires the language to be provided if it is to be translated later potentially or if we need the most accurate behavior - // Actually, if we want language param, we can pass it to either model content.Add(new StringContent(modelToUse), "model"); if (!string.IsNullOrWhiteSpace(language)) @@ -58,62 +59,64 @@ public class GroqApiClient : ISpeechClient, ILlmClient public async Task RefineTextAsync(string rawTranscript, string systemPrompt, string model = Toak.Core.Constants.Defaults.LlmModel) { - var requestBody = new LlamaRequest + var requestBody = new OpenAiRequest { Model = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.LlmModel : model, Temperature = 0.0, + ReasoningEffort = _reasoningEffort, Messages = new[] { - new LlamaRequestMessage { Role = "system", Content = systemPrompt }, - new LlamaRequestMessage { Role = "user", Content = $"{rawTranscript}" } + new OpenAiRequestMessage { Role = "system", Content = systemPrompt }, + new OpenAiRequestMessage { Role = "user", Content = $"{rawTranscript}" } } }; - var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json"); + var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.OpenAiRequest), System.Text.Encoding.UTF8, "application/json"); - Logger.LogDebug($"Sending Llama API request (model: {requestBody.Model})..."); + Logger.LogDebug($"Sending OpenAi API request (model: {requestBody.Model})..."); var response = await _httpClient.PostAsync("chat/completions", jsonContent); - Logger.LogDebug($"Llama API response status: {response.StatusCode}"); + Logger.LogDebug($"OpenAi API response status: {response.StatusCode}"); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); - throw new Exception($"Llama API Error: {response.StatusCode} - {error}"); + throw new Exception($"OpenAi API Error: {response.StatusCode} - {error}"); } var json = await response.Content.ReadAsStringAsync(); - var result = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.LlamaResponse); + var result = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.OpenAiResponse); return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty; } public async IAsyncEnumerable RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = Toak.Core.Constants.Defaults.LlmModel) { - var requestBody = new LlamaRequest + var requestBody = new OpenAiRequest { Model = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.LlmModel : model, Temperature = 0.0, Stream = true, + ReasoningEffort = _reasoningEffort, Messages = new[] { - new LlamaRequestMessage { Role = "system", Content = systemPrompt }, - new LlamaRequestMessage { Role = "user", Content = $"{rawTranscript}" } + new OpenAiRequestMessage { Role = "system", Content = systemPrompt }, + new OpenAiRequestMessage { Role = "user", Content = $"{rawTranscript}" } } }; - var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json"); + var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.OpenAiRequest), System.Text.Encoding.UTF8, "application/json"); using var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions") { Content = jsonContent }; request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/event-stream")); - Logger.LogDebug($"Sending Llama Steam API request (model: {requestBody.Model})..."); + Logger.LogDebug($"Sending OpenAi Steam API request (model: {requestBody.Model})..."); using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead); - Logger.LogDebug($"Llama Stream API response status: {response.StatusCode}"); + Logger.LogDebug($"OpenAi Stream API response status: {response.StatusCode}"); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); - throw new Exception($"Llama API Error: {response.StatusCode} - {error}"); + throw new Exception($"OpenAi API Error: {response.StatusCode} - {error}"); } using var stream = await response.Content.ReadAsStreamAsync(); @@ -128,7 +131,7 @@ public class GroqApiClient : ISpeechClient, ILlmClient var data = line.Substring("data: ".Length).Trim(); if (data == "[DONE]") break; - var chunk = JsonSerializer.Deserialize(data, AppJsonSerializerContext.Default.LlamaStreamResponse); + var chunk = JsonSerializer.Deserialize(data, AppJsonSerializerContext.Default.OpenAiStreamResponse); var content = chunk?.Choices?.FirstOrDefault()?.Delta?.Content; if (!string.IsNullOrEmpty(content)) { @@ -138,3 +141,5 @@ public class GroqApiClient : ISpeechClient, ILlmClient } } } + + diff --git a/Commands/LatencyTestCommand.cs b/Commands/LatencyTestCommand.cs index 87cd09a..96c5fc4 100644 --- a/Commands/LatencyTestCommand.cs +++ b/Commands/LatencyTestCommand.cs @@ -42,7 +42,7 @@ public static class LatencyTestCommand return; } - var groq = new GroqApiClient(config.GroqApiKey); + var client = new OpenAiCompatibleClient(config.GroqApiKey); try { @@ -51,13 +51,13 @@ public static class LatencyTestCommand { ctx.Status("Testing STT (Whisper)..."); var sttWatch = Stopwatch.StartNew(); - var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel); + var transcript = await client.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel); sttWatch.Stop(); ctx.Status("Testing LLM (Llama)..."); var systemPrompt = PromptBuilder.BuildPrompt(config); var llmWatch = Stopwatch.StartNew(); - var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel); + var refinedText = await client.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel); llmWatch.Stop(); var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds; diff --git a/Commands/OnboardCommand.cs b/Commands/OnboardCommand.cs index 7ba087f..81ef28b 100644 --- a/Commands/OnboardCommand.cs +++ b/Commands/OnboardCommand.cs @@ -21,17 +21,44 @@ public static class OnboardCommand AnsiConsole.WriteLine(); config.GroqApiKey = AnsiConsole.Prompt( - new TextPrompt("Groq API Key:") + new TextPrompt("Groq API Key (required for Whisper):") .DefaultValue(string.IsNullOrWhiteSpace(config.GroqApiKey) ? "" : config.GroqApiKey) .AllowEmpty()); - - config.LlmModel = AnsiConsole.Prompt( + + config.LlmProvider = AnsiConsole.Prompt( new SelectionPrompt() - .Title("Select [green]LLM Model[/]:") - .AddChoices(new[] { "openai/gpt-oss-20b", "llama-3.1-8b-instant", "llama-3.3-70b-versatile" }) - .UseConverter(c => c == "openai/gpt-oss-20b" ? "openai/gpt-oss-20b (Fastest)" : c == "llama-3.1-8b-instant" ? "llama-3.1-8b-instant (Cheapest)" : "llama-3.3-70b-versatile (More Accurate)")); - - if (config.LlmModel.Contains(" ")) config.LlmModel = config.LlmModel.Split(' ')[0]; + .Title("Select [green]LLM Provider[/]:") + .AddChoices(new[] { "groq", "together" }) + .UseConverter(c => c == "groq" ? "Groq (Default)" : "Together AI")); + + if (config.LlmProvider == "together") + { + config.TogetherApiKey = AnsiConsole.Prompt( + new TextPrompt("Together AI API Key:") + .DefaultValue(string.IsNullOrWhiteSpace(config.TogetherApiKey) ? "" : config.TogetherApiKey) + .AllowEmpty()); + + config.LlmModel = AnsiConsole.Prompt( + new SelectionPrompt() + .Title("Select [green]LLM Model[/]:") + .AddChoices(new[] { "meta-llama/Llama-3.3-70B-Instruct-Turbo", "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "openai/gpt-oss-20b" })); + } + else + { + config.LlmModel = AnsiConsole.Prompt( + new SelectionPrompt() + .Title("Select [green]LLM Model[/]:") + .AddChoices(new[] { "openai/gpt-oss-20b", "llama-3.1-8b-instant", "llama-3.3-70b-versatile" }) + .UseConverter(c => c == "openai/gpt-oss-20b" ? "openai/gpt-oss-20b (Fastest)" : c == "llama-3.1-8b-instant" ? "llama-3.1-8b-instant (Cheapest)" : "llama-3.3-70b-versatile (More Accurate)")); + + if (config.LlmModel.Contains(" ")) config.LlmModel = config.LlmModel.Split(' ')[0]; + } + + config.ReasoningEffort = AnsiConsole.Prompt( + new SelectionPrompt() + .Title("Select [green]Reasoning Effort[/]:") + .AddChoices(new[] { "none", "low" }) + .UseConverter(c => c == "none" ? "None (Standard)" : "Low (Moderate Reasoning)")); config.WhisperModel = AnsiConsole.Prompt( new SelectionPrompt() diff --git a/Configuration/ToakConfig.cs b/Configuration/ToakConfig.cs index 2fe7f2d..653f3bd 100644 --- a/Configuration/ToakConfig.cs +++ b/Configuration/ToakConfig.cs @@ -3,6 +3,8 @@ namespace Toak.Configuration; public class ToakConfig { public string GroqApiKey { get; set; } = string.Empty; + public string TogetherApiKey { get; set; } = string.Empty; + public string LlmProvider { get; set; } = "groq"; // groq or together public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool public string AudioBackend { get; set; } = "pw-record"; // pw-record or ffmpeg public bool ModulePunctuation { get; set; } = true; @@ -10,6 +12,7 @@ public class ToakConfig public string WhisperLanguage { get; set; } = string.Empty; public string LlmModel { get; set; } = Toak.Core.Constants.Defaults.LlmModel; + public string ReasoningEffort { get; set; } = "none"; // none or low public string WhisperModel { get; set; } = Toak.Core.Constants.Defaults.WhisperModel; public string StartSoundPath { get; set; } = "Assets/Audio/beep.wav"; public string StopSoundPath { get; set; } = "Assets/Audio/beep.wav"; diff --git a/Core/DaemonService.cs b/Core/DaemonService.cs index c48ca49..835cedf 100644 --- a/Core/DaemonService.cs +++ b/Core/DaemonService.cs @@ -53,14 +53,24 @@ public static class DaemonService var stateTracker = new StateTracker(); var notifications = new Notifications(); - var groqClient = new GroqApiClient(config.GroqApiKey); + var speechClient = new OpenAiCompatibleClient(config.GroqApiKey); + ILlmClient llmClient; + if (config.LlmProvider == "together") + { + llmClient = new OpenAiCompatibleClient(config.TogetherApiKey, "https://api.together.xyz/v1/", config.ReasoningEffort); + } + else + { + llmClient = new OpenAiCompatibleClient(config.GroqApiKey, "https://api.groq.com/openai/v1/", config.ReasoningEffort); + } + IAudioRecorder recorder = config.AudioBackend == "ffmpeg" ? new FfmpegAudioRecorder(stateTracker, notifications) : new AudioRecorder(stateTracker, notifications); var orchestrator = new TranscriptionOrchestrator( - groqClient, - groqClient, + speechClient, + llmClient, configManager, recorder, notifications, diff --git a/Serialization/AppJsonSerializerContext.cs b/Serialization/AppJsonSerializerContext.cs index 5ffbe0e..fc425d4 100644 --- a/Serialization/AppJsonSerializerContext.cs +++ b/Serialization/AppJsonSerializerContext.cs @@ -8,16 +8,16 @@ namespace Toak.Serialization; [JsonSourceGenerationOptions(WriteIndented = true, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull)] [JsonSerializable(typeof(ToakConfig))] [JsonSerializable(typeof(WhisperResponse))] -[JsonSerializable(typeof(LlamaRequest))] -[JsonSerializable(typeof(LlamaRequestMessage))] -[JsonSerializable(typeof(LlamaResponse))] -[JsonSerializable(typeof(LlamaChoice))] -[JsonSerializable(typeof(LlamaRequestMessage[]))] -[JsonSerializable(typeof(LlamaChoice[]))] -[JsonSerializable(typeof(LlamaStreamResponse))] -[JsonSerializable(typeof(LlamaStreamChoice))] -[JsonSerializable(typeof(LlamaStreamDelta))] -[JsonSerializable(typeof(LlamaStreamChoice[]))] +[JsonSerializable(typeof(OpenAiRequest))] +[JsonSerializable(typeof(OpenAiRequestMessage))] +[JsonSerializable(typeof(OpenAiResponse))] +[JsonSerializable(typeof(OpenAiChoice))] +[JsonSerializable(typeof(OpenAiRequestMessage[]))] +[JsonSerializable(typeof(OpenAiChoice[]))] +[JsonSerializable(typeof(OpenAiStreamResponse))] +[JsonSerializable(typeof(OpenAiStreamChoice))] +[JsonSerializable(typeof(OpenAiStreamDelta))] +[JsonSerializable(typeof(OpenAiStreamChoice[]))] [JsonSerializable(typeof(Toak.Core.Skills.SkillDefinition))] [JsonSerializable(typeof(Toak.Core.HistoryEntry))] internal partial class AppJsonSerializerContext : JsonSerializerContext