1
0

feat: Introduce an OpenAI-compatible client to replace the Groq-specific client and enable multiple LLM providers.

This commit is contained in:
2026-02-28 16:09:41 +01:00
parent 3ceecbe5ee
commit 4e04cc6042
7 changed files with 104 additions and 57 deletions

View File

@@ -2,7 +2,7 @@ using System.Text.Json.Serialization;
namespace Toak.Api.Models; namespace Toak.Api.Models;
public class LlamaRequestMessage public class OpenAiRequestMessage
{ {
[JsonPropertyName("role")] [JsonPropertyName("role")]
public string Role { get; set; } = string.Empty; public string Role { get; set; } = string.Empty;
@@ -10,43 +10,45 @@ public class LlamaRequestMessage
public string Content { get; set; } = string.Empty; public string Content { get; set; } = string.Empty;
} }
public class LlamaRequest public class OpenAiRequest
{ {
[JsonPropertyName("model")] [JsonPropertyName("model")]
public string Model { get; set; } = "llama-3.1-8b-instant"; public string Model { get; set; } = "llama-3.1-8b-instant";
[JsonPropertyName("messages")] [JsonPropertyName("messages")]
public LlamaRequestMessage[] Messages { get; set; } = Array.Empty<LlamaRequestMessage>(); public OpenAiRequestMessage[] Messages { get; set; } = Array.Empty<OpenAiRequestMessage>();
[JsonPropertyName("temperature")] [JsonPropertyName("temperature")]
public double Temperature { get; set; } = 0.0; public double Temperature { get; set; } = 0.0;
[JsonPropertyName("stream")] [JsonPropertyName("stream")]
public bool? Stream { get; set; } public bool? Stream { get; set; }
[JsonPropertyName("reasoning_effort")]
public string? ReasoningEffort { get; set; }
} }
public class LlamaResponse public class OpenAiResponse
{ {
[JsonPropertyName("choices")] [JsonPropertyName("choices")]
public LlamaChoice[] Choices { get; set; } = Array.Empty<LlamaChoice>(); public OpenAiChoice[] Choices { get; set; } = Array.Empty<OpenAiChoice>();
} }
public class LlamaChoice public class OpenAiChoice
{ {
[JsonPropertyName("message")] [JsonPropertyName("message")]
public LlamaRequestMessage Message { get; set; } = new(); public OpenAiRequestMessage Message { get; set; } = new();
} }
public class LlamaStreamResponse public class OpenAiStreamResponse
{ {
[JsonPropertyName("choices")] [JsonPropertyName("choices")]
public LlamaStreamChoice[] Choices { get; set; } = Array.Empty<LlamaStreamChoice>(); public OpenAiStreamChoice[] Choices { get; set; } = Array.Empty<OpenAiStreamChoice>();
} }
public class LlamaStreamChoice public class OpenAiStreamChoice
{ {
[JsonPropertyName("delta")] [JsonPropertyName("delta")]
public LlamaStreamDelta Delta { get; set; } = new(); public OpenAiStreamDelta Delta { get; set; } = new();
} }
public class LlamaStreamDelta public class OpenAiStreamDelta
{ {
[JsonPropertyName("content")] [JsonPropertyName("content")]
public string? Content { get; set; } public string? Content { get; set; }

View File

@@ -9,19 +9,22 @@ using Toak.Core.Interfaces;
namespace Toak.Api; namespace Toak.Api;
public class GroqApiClient : ISpeechClient, ILlmClient public class OpenAiCompatibleClient : ISpeechClient, ILlmClient
{ {
private readonly HttpClient _httpClient; private readonly HttpClient _httpClient;
private readonly string? _reasoningEffort;
public GroqApiClient(string apiKey) public OpenAiCompatibleClient(string apiKey, string baseUrl = "https://api.groq.com/openai/v1/", string? reasoningEffort = null)
{ {
_httpClient = new HttpClient(); _httpClient = new HttpClient();
_httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey); _httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey);
_httpClient.BaseAddress = new Uri("https://api.groq.com/openai/v1/"); _httpClient.BaseAddress = new Uri(baseUrl);
_reasoningEffort = reasoningEffort == "none" ? null : reasoningEffort;
} }
public async Task<string> TranscribeAsync(string filePath, string language = "", string model = Toak.Core.Constants.Defaults.WhisperModel) public async Task<string> TranscribeAsync(string filePath, string language = "", string model = Toak.Core.Constants.Defaults.WhisperModel)
{ {
// ... (TranscribeAsync content remains same except maybe some internal comments or contexts)
using var content = new MultipartFormDataContent(); using var content = new MultipartFormDataContent();
using var fileStream = File.OpenRead(filePath); using var fileStream = File.OpenRead(filePath);
using var streamContent = new StreamContent(fileStream); using var streamContent = new StreamContent(fileStream);
@@ -31,8 +34,6 @@ public class GroqApiClient : ISpeechClient, ILlmClient
string modelToUse = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.WhisperModel : model; string modelToUse = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.WhisperModel : model;
// according to docs whisper-large-v3-turbo requires the language to be provided if it is to be translated later potentially or if we need the most accurate behavior
// Actually, if we want language param, we can pass it to either model
content.Add(new StringContent(modelToUse), "model"); content.Add(new StringContent(modelToUse), "model");
if (!string.IsNullOrWhiteSpace(language)) if (!string.IsNullOrWhiteSpace(language))
@@ -58,62 +59,64 @@ public class GroqApiClient : ISpeechClient, ILlmClient
public async Task<string> RefineTextAsync(string rawTranscript, string systemPrompt, string model = Toak.Core.Constants.Defaults.LlmModel) public async Task<string> RefineTextAsync(string rawTranscript, string systemPrompt, string model = Toak.Core.Constants.Defaults.LlmModel)
{ {
var requestBody = new LlamaRequest var requestBody = new OpenAiRequest
{ {
Model = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.LlmModel : model, Model = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.LlmModel : model,
Temperature = 0.0, Temperature = 0.0,
ReasoningEffort = _reasoningEffort,
Messages = new[] Messages = new[]
{ {
new LlamaRequestMessage { Role = "system", Content = systemPrompt }, new OpenAiRequestMessage { Role = "system", Content = systemPrompt },
new LlamaRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" } new OpenAiRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" }
} }
}; };
var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json"); var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.OpenAiRequest), System.Text.Encoding.UTF8, "application/json");
Logger.LogDebug($"Sending Llama API request (model: {requestBody.Model})..."); Logger.LogDebug($"Sending OpenAi API request (model: {requestBody.Model})...");
var response = await _httpClient.PostAsync("chat/completions", jsonContent); var response = await _httpClient.PostAsync("chat/completions", jsonContent);
Logger.LogDebug($"Llama API response status: {response.StatusCode}"); Logger.LogDebug($"OpenAi API response status: {response.StatusCode}");
if (!response.IsSuccessStatusCode) if (!response.IsSuccessStatusCode)
{ {
var error = await response.Content.ReadAsStringAsync(); var error = await response.Content.ReadAsStringAsync();
throw new Exception($"Llama API Error: {response.StatusCode} - {error}"); throw new Exception($"OpenAi API Error: {response.StatusCode} - {error}");
} }
var json = await response.Content.ReadAsStringAsync(); var json = await response.Content.ReadAsStringAsync();
var result = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.LlamaResponse); var result = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.OpenAiResponse);
return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty; return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty;
} }
public async IAsyncEnumerable<string> RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = Toak.Core.Constants.Defaults.LlmModel) public async IAsyncEnumerable<string> RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = Toak.Core.Constants.Defaults.LlmModel)
{ {
var requestBody = new LlamaRequest var requestBody = new OpenAiRequest
{ {
Model = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.LlmModel : model, Model = string.IsNullOrWhiteSpace(model) ? Toak.Core.Constants.Defaults.LlmModel : model,
Temperature = 0.0, Temperature = 0.0,
Stream = true, Stream = true,
ReasoningEffort = _reasoningEffort,
Messages = new[] Messages = new[]
{ {
new LlamaRequestMessage { Role = "system", Content = systemPrompt }, new OpenAiRequestMessage { Role = "system", Content = systemPrompt },
new LlamaRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" } new OpenAiRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" }
} }
}; };
var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json"); var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.OpenAiRequest), System.Text.Encoding.UTF8, "application/json");
using var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions") { Content = jsonContent }; using var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions") { Content = jsonContent };
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/event-stream")); request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/event-stream"));
Logger.LogDebug($"Sending Llama Steam API request (model: {requestBody.Model})..."); Logger.LogDebug($"Sending OpenAi Steam API request (model: {requestBody.Model})...");
using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead); using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
Logger.LogDebug($"Llama Stream API response status: {response.StatusCode}"); Logger.LogDebug($"OpenAi Stream API response status: {response.StatusCode}");
if (!response.IsSuccessStatusCode) if (!response.IsSuccessStatusCode)
{ {
var error = await response.Content.ReadAsStringAsync(); var error = await response.Content.ReadAsStringAsync();
throw new Exception($"Llama API Error: {response.StatusCode} - {error}"); throw new Exception($"OpenAi API Error: {response.StatusCode} - {error}");
} }
using var stream = await response.Content.ReadAsStreamAsync(); using var stream = await response.Content.ReadAsStreamAsync();
@@ -128,7 +131,7 @@ public class GroqApiClient : ISpeechClient, ILlmClient
var data = line.Substring("data: ".Length).Trim(); var data = line.Substring("data: ".Length).Trim();
if (data == "[DONE]") break; if (data == "[DONE]") break;
var chunk = JsonSerializer.Deserialize(data, AppJsonSerializerContext.Default.LlamaStreamResponse); var chunk = JsonSerializer.Deserialize(data, AppJsonSerializerContext.Default.OpenAiStreamResponse);
var content = chunk?.Choices?.FirstOrDefault()?.Delta?.Content; var content = chunk?.Choices?.FirstOrDefault()?.Delta?.Content;
if (!string.IsNullOrEmpty(content)) if (!string.IsNullOrEmpty(content))
{ {
@@ -138,3 +141,5 @@ public class GroqApiClient : ISpeechClient, ILlmClient
} }
} }
} }

View File

@@ -42,7 +42,7 @@ public static class LatencyTestCommand
return; return;
} }
var groq = new GroqApiClient(config.GroqApiKey); var client = new OpenAiCompatibleClient(config.GroqApiKey);
try try
{ {
@@ -51,13 +51,13 @@ public static class LatencyTestCommand
{ {
ctx.Status("Testing STT (Whisper)..."); ctx.Status("Testing STT (Whisper)...");
var sttWatch = Stopwatch.StartNew(); var sttWatch = Stopwatch.StartNew();
var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel); var transcript = await client.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel);
sttWatch.Stop(); sttWatch.Stop();
ctx.Status("Testing LLM (Llama)..."); ctx.Status("Testing LLM (Llama)...");
var systemPrompt = PromptBuilder.BuildPrompt(config); var systemPrompt = PromptBuilder.BuildPrompt(config);
var llmWatch = Stopwatch.StartNew(); var llmWatch = Stopwatch.StartNew();
var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel); var refinedText = await client.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel);
llmWatch.Stop(); llmWatch.Stop();
var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds; var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds;

View File

@@ -21,17 +21,44 @@ public static class OnboardCommand
AnsiConsole.WriteLine(); AnsiConsole.WriteLine();
config.GroqApiKey = AnsiConsole.Prompt( config.GroqApiKey = AnsiConsole.Prompt(
new TextPrompt<string>("Groq API Key:") new TextPrompt<string>("Groq API Key (required for Whisper):")
.DefaultValue(string.IsNullOrWhiteSpace(config.GroqApiKey) ? "" : config.GroqApiKey) .DefaultValue(string.IsNullOrWhiteSpace(config.GroqApiKey) ? "" : config.GroqApiKey)
.AllowEmpty()); .AllowEmpty());
config.LlmModel = AnsiConsole.Prompt( config.LlmProvider = AnsiConsole.Prompt(
new SelectionPrompt<string>() new SelectionPrompt<string>()
.Title("Select [green]LLM Model[/]:") .Title("Select [green]LLM Provider[/]:")
.AddChoices(new[] { "openai/gpt-oss-20b", "llama-3.1-8b-instant", "llama-3.3-70b-versatile" }) .AddChoices(new[] { "groq", "together" })
.UseConverter(c => c == "openai/gpt-oss-20b" ? "openai/gpt-oss-20b (Fastest)" : c == "llama-3.1-8b-instant" ? "llama-3.1-8b-instant (Cheapest)" : "llama-3.3-70b-versatile (More Accurate)")); .UseConverter(c => c == "groq" ? "Groq (Default)" : "Together AI"));
if (config.LlmModel.Contains(" ")) config.LlmModel = config.LlmModel.Split(' ')[0]; if (config.LlmProvider == "together")
{
config.TogetherApiKey = AnsiConsole.Prompt(
new TextPrompt<string>("Together AI API Key:")
.DefaultValue(string.IsNullOrWhiteSpace(config.TogetherApiKey) ? "" : config.TogetherApiKey)
.AllowEmpty());
config.LlmModel = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title("Select [green]LLM Model[/]:")
.AddChoices(new[] { "meta-llama/Llama-3.3-70B-Instruct-Turbo", "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "openai/gpt-oss-20b" }));
}
else
{
config.LlmModel = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title("Select [green]LLM Model[/]:")
.AddChoices(new[] { "openai/gpt-oss-20b", "llama-3.1-8b-instant", "llama-3.3-70b-versatile" })
.UseConverter(c => c == "openai/gpt-oss-20b" ? "openai/gpt-oss-20b (Fastest)" : c == "llama-3.1-8b-instant" ? "llama-3.1-8b-instant (Cheapest)" : "llama-3.3-70b-versatile (More Accurate)"));
if (config.LlmModel.Contains(" ")) config.LlmModel = config.LlmModel.Split(' ')[0];
}
config.ReasoningEffort = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title("Select [green]Reasoning Effort[/]:")
.AddChoices(new[] { "none", "low" })
.UseConverter(c => c == "none" ? "None (Standard)" : "Low (Moderate Reasoning)"));
config.WhisperModel = AnsiConsole.Prompt( config.WhisperModel = AnsiConsole.Prompt(
new SelectionPrompt<string>() new SelectionPrompt<string>()

View File

@@ -3,6 +3,8 @@ namespace Toak.Configuration;
public class ToakConfig public class ToakConfig
{ {
public string GroqApiKey { get; set; } = string.Empty; public string GroqApiKey { get; set; } = string.Empty;
public string TogetherApiKey { get; set; } = string.Empty;
public string LlmProvider { get; set; } = "groq"; // groq or together
public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool public string TypingBackend { get; set; } = "xdotool"; // wtype or xdotool
public string AudioBackend { get; set; } = "pw-record"; // pw-record or ffmpeg public string AudioBackend { get; set; } = "pw-record"; // pw-record or ffmpeg
public bool ModulePunctuation { get; set; } = true; public bool ModulePunctuation { get; set; } = true;
@@ -10,6 +12,7 @@ public class ToakConfig
public string WhisperLanguage { get; set; } = string.Empty; public string WhisperLanguage { get; set; } = string.Empty;
public string LlmModel { get; set; } = Toak.Core.Constants.Defaults.LlmModel; public string LlmModel { get; set; } = Toak.Core.Constants.Defaults.LlmModel;
public string ReasoningEffort { get; set; } = "none"; // none or low
public string WhisperModel { get; set; } = Toak.Core.Constants.Defaults.WhisperModel; public string WhisperModel { get; set; } = Toak.Core.Constants.Defaults.WhisperModel;
public string StartSoundPath { get; set; } = "Assets/Audio/beep.wav"; public string StartSoundPath { get; set; } = "Assets/Audio/beep.wav";
public string StopSoundPath { get; set; } = "Assets/Audio/beep.wav"; public string StopSoundPath { get; set; } = "Assets/Audio/beep.wav";

View File

@@ -53,14 +53,24 @@ public static class DaemonService
var stateTracker = new StateTracker(); var stateTracker = new StateTracker();
var notifications = new Notifications(); var notifications = new Notifications();
var groqClient = new GroqApiClient(config.GroqApiKey); var speechClient = new OpenAiCompatibleClient(config.GroqApiKey);
ILlmClient llmClient;
if (config.LlmProvider == "together")
{
llmClient = new OpenAiCompatibleClient(config.TogetherApiKey, "https://api.together.xyz/v1/", config.ReasoningEffort);
}
else
{
llmClient = new OpenAiCompatibleClient(config.GroqApiKey, "https://api.groq.com/openai/v1/", config.ReasoningEffort);
}
IAudioRecorder recorder = config.AudioBackend == "ffmpeg" IAudioRecorder recorder = config.AudioBackend == "ffmpeg"
? new FfmpegAudioRecorder(stateTracker, notifications) ? new FfmpegAudioRecorder(stateTracker, notifications)
: new AudioRecorder(stateTracker, notifications); : new AudioRecorder(stateTracker, notifications);
var orchestrator = new TranscriptionOrchestrator( var orchestrator = new TranscriptionOrchestrator(
groqClient, speechClient,
groqClient, llmClient,
configManager, configManager,
recorder, recorder,
notifications, notifications,

View File

@@ -8,16 +8,16 @@ namespace Toak.Serialization;
[JsonSourceGenerationOptions(WriteIndented = true, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull)] [JsonSourceGenerationOptions(WriteIndented = true, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull)]
[JsonSerializable(typeof(ToakConfig))] [JsonSerializable(typeof(ToakConfig))]
[JsonSerializable(typeof(WhisperResponse))] [JsonSerializable(typeof(WhisperResponse))]
[JsonSerializable(typeof(LlamaRequest))] [JsonSerializable(typeof(OpenAiRequest))]
[JsonSerializable(typeof(LlamaRequestMessage))] [JsonSerializable(typeof(OpenAiRequestMessage))]
[JsonSerializable(typeof(LlamaResponse))] [JsonSerializable(typeof(OpenAiResponse))]
[JsonSerializable(typeof(LlamaChoice))] [JsonSerializable(typeof(OpenAiChoice))]
[JsonSerializable(typeof(LlamaRequestMessage[]))] [JsonSerializable(typeof(OpenAiRequestMessage[]))]
[JsonSerializable(typeof(LlamaChoice[]))] [JsonSerializable(typeof(OpenAiChoice[]))]
[JsonSerializable(typeof(LlamaStreamResponse))] [JsonSerializable(typeof(OpenAiStreamResponse))]
[JsonSerializable(typeof(LlamaStreamChoice))] [JsonSerializable(typeof(OpenAiStreamChoice))]
[JsonSerializable(typeof(LlamaStreamDelta))] [JsonSerializable(typeof(OpenAiStreamDelta))]
[JsonSerializable(typeof(LlamaStreamChoice[]))] [JsonSerializable(typeof(OpenAiStreamChoice[]))]
[JsonSerializable(typeof(Toak.Core.Skills.SkillDefinition))] [JsonSerializable(typeof(Toak.Core.Skills.SkillDefinition))]
[JsonSerializable(typeof(Toak.Core.HistoryEntry))] [JsonSerializable(typeof(Toak.Core.HistoryEntry))]
internal partial class AppJsonSerializerContext : JsonSerializerContext internal partial class AppJsonSerializerContext : JsonSerializerContext