From 4e04cc604270844d4ae12075652e682007ae8f86 Mon Sep 17 00:00:00 2001
From: TomiEckert <tomivoook23@gmail.com>
Date: Sat, 28 Feb 2026 16:09:41 +0100
Subject: [PATCH] feat: Introduce an OpenAI-compatible client to replace the
 Groq-specific client and enable multiple LLM providers.

---
 .../{LlamaModels.cs => OpenAiModels.cs}       | 26 +++++-----
 ...ApiClient.cs => OpenAiCompatibleClient.cs} | 47 ++++++++++---------
 Commands/LatencyTestCommand.cs                |  6 +--
 Commands/OnboardCommand.cs                    | 43 +++++++++++++----
 Configuration/ToakConfig.cs                   |  3 ++
 Core/DaemonService.cs                         | 16 +++++--
 Serialization/AppJsonSerializerContext.cs     | 20 ++++----
 7 files changed, 104 insertions(+), 57 deletions(-)
 rename Api/Models/{LlamaModels.cs => OpenAiModels.cs} (52%)
 rename Api/{GroqApiClient.cs => OpenAiCompatibleClient.cs} (70%)
diff --git a/Api/Models/LlamaModels.cs b/Api/Models/OpenAiModels.cs
similarity index 52%
rename from Api/Models/LlamaModels.cs
rename to Api/Models/OpenAiModels.cs
index a84cbac..5eaef58 100644
--- a/Api/Models/LlamaModels.cs
+++ b/Api/Models/OpenAiModels.cs
@@ -2,7 +2,7 @@ using System.Text.Json.Serialization;
 
 namespace Toak.Api.Models;
 
-public class LlamaRequestMessage
+public class OpenAiRequestMessage
 {
     [JsonPropertyName("role")]
     public string Role { get; set; } = string.Empty;
@@ -10,43 +10,45 @@ public class LlamaRequestMessage
     public string Content { get; set; } = string.Empty;
 }
 
-public class LlamaRequest
+public class OpenAiRequest
 {
     [JsonPropertyName("model")]
     public string Model { get; set; } = "llama-3.1-8b-instant";
     [JsonPropertyName("messages")]
-    public LlamaRequestMessage[] Messages { get; set; } = Array.Empty<LlamaRequestMessage>();
+    public OpenAiRequestMessage[] Messages { get; set; } = Array.Empty<OpenAiRequestMessage>();
     [JsonPropertyName("temperature")]
     public double Temperature { get; set; } = 0.0;
     [JsonPropertyName("stream")]
     public bool? Stream { get; set; }
+    [JsonPropertyName("reasoning_effort")]
+    public string? ReasoningEffort { get; set; }
 }
 
-public class LlamaResponse
+public class OpenAiResponse
 {
     [JsonPropertyName("choices")]
-    public LlamaChoice[] Choices { get; set; } = Array.Empty<LlamaChoice>();
+    public OpenAiChoice[] Choices { get; set; } = Array.Empty<OpenAiChoice>();
 }
 
-public class LlamaChoice
+public class OpenAiChoice
 {
     [JsonPropertyName("message")]
-    public LlamaRequestMessage Message { get; set; } = new();
+    public OpenAiRequestMessage Message { get; set; } = new();
 }
 
-public class LlamaStreamResponse
+public class OpenAiStreamResponse
 {
     [JsonPropertyName("choices")]
-    public LlamaStreamChoice[] Choices { get; set; } = Array.Empty<LlamaStreamChoice>();
+    public OpenAiStreamChoice[] Choices { get; set; } = Array.Empty<OpenAiStreamChoice>();
 }
 
-public class LlamaStreamChoice
+public class OpenAiStreamChoice
 {
     [JsonPropertyName("delta")]
-    public LlamaStreamDelta Delta { get; set; } = new();
+    public OpenAiStreamDelta Delta { get; set; } = new();
 }
 
-public class LlamaStreamDelta
+public class OpenAiStreamDelta
 {
     [JsonPropertyName("content")]
     public string? Content { get; set; }
diff --git a/Api/GroqApiClient.cs b/Api/OpenAiCompatibleClient.cs
similarity index 70%
rename from Api/GroqApiClient.cs
rename to Api/OpenAiCompatibleClient.cs
index 7122daf..bbdfd87 100644
--- a/Api/GroqApiClient.cs
+++ b/Api/OpenAiCompatibleClient.cs
@@ -9,19 +9,22 @@ using Toak.Core.Interfaces;
 
 namespace Toak.Api;
 
-public class GroqApiClient : ISpeechClient, ILlmClient
+public class OpenAiCompatibleClient : ISpeechClient, ILlmClient
 {
     private readonly HttpClient _httpClient;
+    private readonly string? _reasoningEffort;
 
-    public GroqApiClient(string apiKey)
+    public OpenAiCompatibleClient(string apiKey, string baseUrl = "https://api.groq.com/openai/v1/", string? reasoningEffort = null)
     {
         _httpClient = new HttpClient();
         _httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey);
-        _httpClient.BaseAddress = new Uri("https://api.groq.com/openai/v1/");
+        _httpClient.BaseAddress = new Uri(baseUrl);
+        _reasoningEffort = reasoningEffort == "none" ? null : reasoningEffort;
     }
 
     public async Task<string> TranscribeAsync(string filePath, string language = "", string model = Toak.Core.Constants.Defaults.WhisperModel)
     {
+        // ... (TranscribeAsync content remains same except maybe some internal comments or contexts)
         using var content = new MultipartFormDataContent();
         using var fileStream = File.OpenRead(filePath);
         using var streamContent = new StreamContent(fileStream);
@@ -31,8 +34,6 @@ public class GroqApiClient : ISpeechClient, ILlmClient
         
         string modelToUse = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.WhisperModel : model;
 
-        // according to docs whisper-large-v3-turbo requires the language to be provided if it is to be translated later potentially or if we need the most accurate behavior
-        // Actually, if we want language param, we can pass it to either model
         content.Add(new StringContent(modelToUse), "model");
 
         if (!string.IsNullOrWhiteSpace(language))
@@ -58,62 +59,64 @@ public class GroqApiClient : ISpeechClient, ILlmClient
 
     public async Task<string> RefineTextAsync(string rawTranscript, string systemPrompt, string model = Toak.Core.Constants.Defaults.LlmModel)
     {
-        var requestBody = new LlamaRequest
+        var requestBody = new OpenAiRequest
         {
             Model = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.LlmModel : model,
             Temperature = 0.0,
+            ReasoningEffort = _reasoningEffort,
             Messages = new[]
             {
-                new LlamaRequestMessage { Role = "system", Content = systemPrompt },
-                new LlamaRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" }
+                new OpenAiRequestMessage { Role = "system", Content = systemPrompt },
+                new OpenAiRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" }
             }
         };
 
-        var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json");
+        var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.OpenAiRequest), System.Text.Encoding.UTF8, "application/json");
 
-        Logger.LogDebug($"Sending Llama API request (model: {requestBody.Model})...");
+        Logger.LogDebug($"Sending OpenAi API request (model: {requestBody.Model})...");
         var response = await _httpClient.PostAsync("chat/completions", jsonContent);
-        Logger.LogDebug($"Llama API response status: {response.StatusCode}");
+        Logger.LogDebug($"OpenAi API response status: {response.StatusCode}");
 
         if (!response.IsSuccessStatusCode)
         {
             var error = await response.Content.ReadAsStringAsync();
-            throw new Exception($"Llama API Error: {response.StatusCode} - {error}");
+            throw new Exception($"OpenAi API Error: {response.StatusCode} - {error}");
         }
 
         var json = await response.Content.ReadAsStringAsync();
-        var result = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.LlamaResponse);
+        var result = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.OpenAiResponse);
 
         return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty;
     }
 
     public async IAsyncEnumerable<string> RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = Toak.Core.Constants.Defaults.LlmModel)
     {
-        var requestBody = new LlamaRequest
+        var requestBody = new OpenAiRequest
         {
             Model = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.LlmModel : model,
             Temperature = 0.0,
             Stream = true,
+            ReasoningEffort = _reasoningEffort,
             Messages = new[]
             {
-                new LlamaRequestMessage { Role = "system", Content = systemPrompt },
-                new LlamaRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" }
+                new OpenAiRequestMessage { Role = "system", Content = systemPrompt },
+                new OpenAiRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" }
             }
         };
 
-        var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json");
+        var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.OpenAiRequest), System.Text.Encoding.UTF8, "application/json");
 
         using var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions") { Content = jsonContent };
         request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/event-stream"));
         
-        Logger.LogDebug($"Sending Llama Steam API request (model: {requestBody.Model})...");
+        Logger.LogDebug($"Sending OpenAi Steam API request (model: {requestBody.Model})...");
         using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
-        Logger.LogDebug($"Llama Stream API response status: {response.StatusCode}");
+        Logger.LogDebug($"OpenAi Stream API response status: {response.StatusCode}");
 
         if (!response.IsSuccessStatusCode)
         {
             var error = await response.Content.ReadAsStringAsync();
-            throw new Exception($"Llama API Error: {response.StatusCode} - {error}");
+            throw new Exception($"OpenAi API Error: {response.StatusCode} - {error}");
         }
 
         using var stream = await response.Content.ReadAsStreamAsync();
@@ -128,7 +131,7 @@ public class GroqApiClient : ISpeechClient, ILlmClient
                 var data = line.Substring("data: ".Length).Trim();
                 if (data == "[DONE]") break;
 
-                var chunk = JsonSerializer.Deserialize(data, AppJsonSerializerContext.Default.LlamaStreamResponse);
+                var chunk = JsonSerializer.Deserialize(data, AppJsonSerializerContext.Default.OpenAiStreamResponse);
                 var content = chunk?.Choices?.FirstOrDefault()?.Delta?.Content;
                 if (!string.IsNullOrEmpty(content))
                 {
@@ -138,3 +141,5 @@ public class GroqApiClient : ISpeechClient, ILlmClient
         }
     }
 }
+
+
diff --git a/Commands/LatencyTestCommand.cs b/Commands/LatencyTestCommand.cs
index 87cd09a..96c5fc4 100644
--- a/Commands/LatencyTestCommand.cs
+++ b/Commands/LatencyTestCommand.cs
@@ -42,7 +42,7 @@ public static class LatencyTestCommand
             return;
         }
 
-        var groq = new GroqApiClient(config.GroqApiKey);
+        var client = new OpenAiCompatibleClient(config.GroqApiKey);
         
         try
         {
@@ -51,13 +51,13 @@ public static class LatencyTestCommand
                 {
                     ctx.Status("Testing STT (Whisper)...");
                     var sttWatch = Stopwatch.StartNew();
-                    var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel);
+                    var transcript = await client.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel);
                     sttWatch.Stop();
                     
                     ctx.Status("Testing LLM (Llama)...");
                     var systemPrompt = PromptBuilder.BuildPrompt(config);
                     var llmWatch = Stopwatch.StartNew();
-                    var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel);
+                    var refinedText = await client.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel);
                     llmWatch.Stop();
 
                     var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds;
diff --git a/Commands/OnboardCommand.cs b/Commands/OnboardCommand.cs
index 7ba087f..81ef28b 100644
--- a/Commands/OnboardCommand.cs
+++ b/Commands/OnboardCommand.cs
@@ -21,17 +21,44 @@ public static class OnboardCommand
         AnsiConsole.WriteLine();
 
         config.GroqApiKey = AnsiConsole.Prompt(
-            new TextPrompt<string>("Groq API Key:")
+            new TextPrompt<string>("Groq API Key (required for Whisper):")
                 .DefaultValue(string.IsNullOrWhiteSpace(config.GroqApiKey) ? "" : config.GroqApiKey)
                 .AllowEmpty());
-        
-        config.LlmModel = AnsiConsole.Prompt(
+
+        config.LlmProvider = AnsiConsole.Prompt(
             new SelectionPrompt<string>()
-                .Title("Select [green]LLM Model[/]:")
-                .AddChoices(new[] { "openai/gpt-oss-20b", "llama-3.1-8b-instant", "llama-3.3-70b-versatile" })
-                .UseConverter(c => c == "openai/gpt-oss-20b" ? "openai/gpt-oss-20b (Fastest)" : c == "llama-3.1-8b-instant" ? "llama-3.1-8b-instant (Cheapest)" : "llama-3.3-70b-versatile (More Accurate)"));
-                
-        if (config.LlmModel.Contains(" ")) config.LlmModel = config.LlmModel.Split(' ')[0];
+                .Title("Select [green]LLM Provider[/]:")
+                .AddChoices(new[] { "groq", "together" })
+                .UseConverter(c => c == "groq" ? "Groq (Default)" : "Together AI"));
+
+        if (config.LlmProvider == "together")
+        {
+            config.TogetherApiKey = AnsiConsole.Prompt(
+                new TextPrompt<string>("Together AI API Key:")
+                    .DefaultValue(string.IsNullOrWhiteSpace(config.TogetherApiKey) ? "" : config.TogetherApiKey)
+                    .AllowEmpty());
+            
+            config.LlmModel = AnsiConsole.Prompt(
+                new SelectionPrompt<string>()
+                    .Title("Select [green]LLM Model[/]:")
+                    .AddChoices(new[] { "meta-llama/Llama-3.3-70B-Instruct-Turbo", "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "openai/gpt-oss-20b" }));
+        }
+        else
+        {
+            config.LlmModel = AnsiConsole.Prompt(
+                new SelectionPrompt<string>()
+                    .Title("Select [green]LLM Model[/]:")
+                    .AddChoices(new[] { "openai/gpt-oss-20b", "llama-3.1-8b-instant", "llama-3.3-70b-versatile" })
+                    .UseConverter(c => c == "openai/gpt-oss-20b" ? "openai/gpt-oss-20b (Fastest)" : c == "llama-3.1-8b-instant" ? "llama-3.1-8b-instant (Cheapest)" : "llama-3.3-70b-versatile (More Accurate)"));
+            
+            if (config.LlmModel.Contains(" ")) config.LlmModel = config.LlmModel.Split(' ')[0];
+        }
+
+        config.ReasoningEffort = AnsiConsole.Prompt(
+            new SelectionPrompt<string>()
+                .Title("Select [green]Reasoning Effort[/]:")
+                .AddChoices(new[] { "none", "low" })
+                .UseConverter(c => c == "none" ? "None (Standard)" : "Low (Moderate Reasoning)"));
 
         config.WhisperModel = AnsiConsole.Prompt(
             new SelectionPrompt<string>()
diff --git a/Configuration/ToakConfig.cs b/Configuration/ToakConfig.cs
index 2fe7f2d..653f3bd 100644
--- a/Configuration/ToakConfig.cs
+++ b/Configuration/ToakConfig.cs
@@ -3,6 +3,8 @@ namespace Toak.Configuration;
 public class ToakConfig
 {
     public string GroqApiKey { get; set; } = string.Empty;
+    public string TogetherApiKey { get; set; } = string.Empty;
+    public string LlmProvider { get; set; } = "groq"; // groq or together
     public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool
     public string AudioBackend { get; set; } = "pw-record"; // pw-record or ffmpeg
     public bool ModulePunctuation { get; set; } = true;
@@ -10,6 +12,7 @@ public class ToakConfig
 
     public string WhisperLanguage { get; set; } = string.Empty;
     public string LlmModel { get; set; } = Toak.Core.Constants.Defaults.LlmModel;
+    public string ReasoningEffort { get; set; } = "none"; // none or low
     public string WhisperModel { get; set; } = Toak.Core.Constants.Defaults.WhisperModel;
     public string StartSoundPath { get; set; } = "Assets/Audio/beep.wav";
     public string StopSoundPath { get; set; } = "Assets/Audio/beep.wav";
diff --git a/Core/DaemonService.cs b/Core/DaemonService.cs
index c48ca49..835cedf 100644
--- a/Core/DaemonService.cs
+++ b/Core/DaemonService.cs
@@ -53,14 +53,24 @@ public static class DaemonService
         var stateTracker = new StateTracker();
         var notifications = new Notifications();
 
-        var groqClient = new GroqApiClient(config.GroqApiKey);
+        var speechClient = new OpenAiCompatibleClient(config.GroqApiKey);
+        ILlmClient llmClient;
+        if (config.LlmProvider == "together")
+        {
+            llmClient = new OpenAiCompatibleClient(config.TogetherApiKey, "https://api.together.xyz/v1/", config.ReasoningEffort);
+        }
+        else
+        {
+            llmClient = new OpenAiCompatibleClient(config.GroqApiKey, "https://api.groq.com/openai/v1/", config.ReasoningEffort);
+        }
+
         IAudioRecorder recorder = config.AudioBackend == "ffmpeg" 
             ? new FfmpegAudioRecorder(stateTracker, notifications) 
             : new AudioRecorder(stateTracker, notifications);
 
         var orchestrator = new TranscriptionOrchestrator(
-            groqClient, 
-            groqClient, 
+            speechClient, 
+            llmClient, 
             configManager, 
             recorder, 
             notifications, 
diff --git a/Serialization/AppJsonSerializerContext.cs b/Serialization/AppJsonSerializerContext.cs
index 5ffbe0e..fc425d4 100644
--- a/Serialization/AppJsonSerializerContext.cs
+++ b/Serialization/AppJsonSerializerContext.cs
@@ -8,16 +8,16 @@ namespace Toak.Serialization;
 [JsonSourceGenerationOptions(WriteIndented = true, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull)]
 [JsonSerializable(typeof(ToakConfig))]
 [JsonSerializable(typeof(WhisperResponse))]
-[JsonSerializable(typeof(LlamaRequest))]
-[JsonSerializable(typeof(LlamaRequestMessage))]
-[JsonSerializable(typeof(LlamaResponse))]
-[JsonSerializable(typeof(LlamaChoice))]
-[JsonSerializable(typeof(LlamaRequestMessage[]))]
-[JsonSerializable(typeof(LlamaChoice[]))]
-[JsonSerializable(typeof(LlamaStreamResponse))]
-[JsonSerializable(typeof(LlamaStreamChoice))]
-[JsonSerializable(typeof(LlamaStreamDelta))]
-[JsonSerializable(typeof(LlamaStreamChoice[]))]
+[JsonSerializable(typeof(OpenAiRequest))]
+[JsonSerializable(typeof(OpenAiRequestMessage))]
+[JsonSerializable(typeof(OpenAiResponse))]
+[JsonSerializable(typeof(OpenAiChoice))]
+[JsonSerializable(typeof(OpenAiRequestMessage[]))]
+[JsonSerializable(typeof(OpenAiChoice[]))]
+[JsonSerializable(typeof(OpenAiStreamResponse))]
+[JsonSerializable(typeof(OpenAiStreamChoice))]
+[JsonSerializable(typeof(OpenAiStreamDelta))]
+[JsonSerializable(typeof(OpenAiStreamChoice[]))]
 [JsonSerializable(typeof(Toak.Core.Skills.SkillDefinition))]
 [JsonSerializable(typeof(Toak.Core.HistoryEntry))]
 internal partial class AppJsonSerializerContext : JsonSerializerContext