using System.Net.Http.Headers; using System.Text.Json; using System.Text.Json.Serialization; using Toak.Api.Models; using Toak.Serialization; using Toak.Core; using Toak.Core.Interfaces; namespace Toak.Api; public class GroqApiClient : ISpeechClient, ILlmClient { private readonly HttpClient _httpClient; public GroqApiClient(string apiKey) { _httpClient = new HttpClient(); _httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey); _httpClient.BaseAddress = new Uri("https://api.groq.com/openai/v1/"); } public async Task TranscribeAsync(string filePath, string language = "", string model = "whisper-large-v3-turbo") { using var content = new MultipartFormDataContent(); using var fileStream = File.OpenRead(filePath); using var streamContent = new StreamContent(fileStream); streamContent.Headers.ContentType = new MediaTypeHeaderValue("audio/wav"); // or mpeg content.Add(streamContent, "file", Path.GetFileName(filePath)); string modelToUse = string.IsNullOrWhiteSpace(model) ? "whisper-large-v3-turbo" : model; // according to docs whisper-large-v3-turbo requires the language to be provided if it is to be translated later potentially or if we need the most accurate behavior // Actually, if we want language param, we can pass it to either model content.Add(new StringContent(modelToUse), "model"); if (!string.IsNullOrWhiteSpace(language)) { var firstLang = language.Split(',')[0].Trim(); content.Add(new StringContent(firstLang), "language"); } Logger.LogDebug($"Sending Whisper API request ({modelToUse})..."); var response = await _httpClient.PostAsync("audio/transcriptions", content); Logger.LogDebug($"Whisper API response status: {response.StatusCode}"); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); throw new Exception($"Whisper API Error: {response.StatusCode} - {error}"); } var json = await response.Content.ReadAsStringAsync(); var result = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.WhisperResponse); return result?.Text ?? string.Empty; } public async Task RefineTextAsync(string rawTranscript, string systemPrompt, string model = "openai/gpt-oss-20b") { var requestBody = new LlamaRequest { Model = string.IsNullOrWhiteSpace(model) ? "openai/gpt-oss-20b" : model, Temperature = 0.0, Messages = new[] { new LlamaRequestMessage { Role = "system", Content = systemPrompt }, new LlamaRequestMessage { Role = "user", Content = $"{rawTranscript}" } } }; var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json"); Logger.LogDebug($"Sending Llama API request (model: {requestBody.Model})..."); var response = await _httpClient.PostAsync("chat/completions", jsonContent); Logger.LogDebug($"Llama API response status: {response.StatusCode}"); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); throw new Exception($"Llama API Error: {response.StatusCode} - {error}"); } var json = await response.Content.ReadAsStringAsync(); var result = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.LlamaResponse); return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty; } public async IAsyncEnumerable RefineTextStreamAsync(string rawTranscript, string systemPrompt, string model = "openai/gpt-oss-20b") { var requestBody = new LlamaRequest { Model = string.IsNullOrWhiteSpace(model) ? "openai/gpt-oss-20b" : model, Temperature = 0.0, Stream = true, Messages = new[] { new LlamaRequestMessage { Role = "system", Content = systemPrompt }, new LlamaRequestMessage { Role = "user", Content = $"{rawTranscript}" } } }; var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json"); using var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions") { Content = jsonContent }; request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/event-stream")); Logger.LogDebug($"Sending Llama Steam API request (model: {requestBody.Model})..."); using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead); Logger.LogDebug($"Llama Stream API response status: {response.StatusCode}"); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); throw new Exception($"Llama API Error: {response.StatusCode} - {error}"); } using var stream = await response.Content.ReadAsStreamAsync(); using var reader = new StreamReader(stream); string? line; while ((line = await reader.ReadLineAsync()) != null) { if (string.IsNullOrWhiteSpace(line)) continue; if (line.StartsWith("data: ")) { var data = line.Substring("data: ".Length).Trim(); if (data == "[DONE]") break; var chunk = JsonSerializer.Deserialize(data, AppJsonSerializerContext.Default.LlamaStreamResponse); var content = chunk?.Choices?.FirstOrDefault()?.Delta?.Content; if (!string.IsNullOrEmpty(content)) { yield return content; } } } } }