using System.Net.Http.Headers; using System.Text.Json; using System.Text.Json.Serialization; namespace Toak; public class WhisperResponse { [JsonPropertyName("text")] public string Text { get; set; } = string.Empty; } public class LlamaRequestMessage { [JsonPropertyName("role")] public string Role { get; set; } = string.Empty; [JsonPropertyName("content")] public string Content { get; set; } = string.Empty; } public class LlamaRequest { [JsonPropertyName("model")] public string Model { get; set; } = "llama-3.1-8b-instant"; [JsonPropertyName("messages")] public LlamaRequestMessage[] Messages { get; set; } = Array.Empty(); [JsonPropertyName("temperature")] public double Temperature { get; set; } = 0.0; } public class LlamaResponse { [JsonPropertyName("choices")] public LlamaChoice[] Choices { get; set; } = Array.Empty(); } public class LlamaChoice { [JsonPropertyName("message")] public LlamaRequestMessage Message { get; set; } = new(); } public class GroqApiClient { private readonly HttpClient _httpClient; public GroqApiClient(string apiKey) { _httpClient = new HttpClient(); _httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey); _httpClient.BaseAddress = new Uri("https://api.groq.com/openai/v1/"); } public async Task TranscribeAsync(string filePath, string language = "", string model = "whisper-large-v3-turbo") { using var content = new MultipartFormDataContent(); using var fileStream = File.OpenRead(filePath); using var streamContent = new StreamContent(fileStream); streamContent.Headers.ContentType = new MediaTypeHeaderValue("audio/wav"); // or mpeg content.Add(streamContent, "file", Path.GetFileName(filePath)); string modelToUse = string.IsNullOrWhiteSpace(model) ? "whisper-large-v3-turbo" : model; // according to docs whisper-large-v3-turbo requires the language to be provided if it is to be translated later potentially or if we need the most accurate behavior // Actually, if we want language param, we can pass it to either model content.Add(new StringContent(modelToUse), "model"); if (!string.IsNullOrWhiteSpace(language)) { var firstLang = language.Split(',')[0].Trim(); content.Add(new StringContent(firstLang), "language"); } var response = await _httpClient.PostAsync("audio/transcriptions", content); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); throw new Exception($"Whisper API Error: {response.StatusCode} - {error}"); } var json = await response.Content.ReadAsStringAsync(); var result = JsonSerializer.Deserialize(json); return result?.Text ?? string.Empty; } public async Task RefineTextAsync(string rawTranscript, string systemPrompt, string model = "openai/gpt-oss-20b") { var requestBody = new LlamaRequest { Model = string.IsNullOrWhiteSpace(model) ? "openai/gpt-oss-20b" : model, Temperature = 0.0, Messages = new[] { new LlamaRequestMessage { Role = "system", Content = systemPrompt }, new LlamaRequestMessage { Role = "user", Content = $"{rawTranscript}" } } }; var jsonOptions = new JsonSerializerOptions { DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull }; var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, jsonOptions), System.Text.Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync("chat/completions", jsonContent); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); throw new Exception($"Llama API Error: {response.StatusCode} - {error}"); } var json = await response.Content.ReadAsStringAsync(); var result = JsonSerializer.Deserialize(json); return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty; } }