Toak/GroqApiClient.cs

using System.Net.Http.Headers;
using System.Text.Json;
using System.Text.Json.Serialization;

namespace Toak;

public class WhisperResponse
{
    [JsonPropertyName("text")]
    public string Text { get; set; } = string.Empty;
}

public class LlamaRequestMessage
{
    [JsonPropertyName("role")]
    public string Role { get; set; } = string.Empty;
    [JsonPropertyName("content")]
    public string Content { get; set; } = string.Empty;
}

public class LlamaRequest
{
    [JsonPropertyName("model")]
    public string Model { get; set; } = "llama-3.1-8b-instant";
    [JsonPropertyName("messages")]
    public LlamaRequestMessage[] Messages { get; set; } = Array.Empty<LlamaRequestMessage>();
    [JsonPropertyName("temperature")]
    public double Temperature { get; set; } = 0.0;
}

public class LlamaResponse
{
    [JsonPropertyName("choices")]
    public LlamaChoice[] Choices { get; set; } = Array.Empty<LlamaChoice>();
}

public class LlamaChoice
{
    [JsonPropertyName("message")]
    public LlamaRequestMessage Message { get; set; } = new();
}

public class GroqApiClient
{
    private readonly HttpClient _httpClient;

    public GroqApiClient(string apiKey)
    {
        _httpClient = new HttpClient();
        _httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey);
        _httpClient.BaseAddress = new Uri("https://api.groq.com/openai/v1/");
    }

    public async Task<string> TranscribeAsync(string filePath, string language = "")
    {
        using var content = new MultipartFormDataContent();
        using var fileStream = File.OpenRead(filePath);
        using var streamContent = new StreamContent(fileStream);

        streamContent.Headers.ContentType = new MediaTypeHeaderValue("audio/wav"); // or mpeg
        content.Add(streamContent, "file", Path.GetFileName(filePath));

        string modelToUse = "whisper-large-v3-turbo";

        // according to docs whisper-large-v3-turbo requires the language to be provided if it is to be translated later potentially or if we need the most accurate behavior
        // Actually, if we want language param, we can pass it to either model
        content.Add(new StringContent(modelToUse), "model");

        if (!string.IsNullOrWhiteSpace(language))
        {
            var firstLang = language.Split(',')[0].Trim();
            content.Add(new StringContent(firstLang), "language");
        }

        var response = await _httpClient.PostAsync("audio/transcriptions", content);

        if (!response.IsSuccessStatusCode)
        {
            var error = await response.Content.ReadAsStringAsync();
            throw new Exception($"Whisper API Error: {response.StatusCode} - {error}");
        }

        var json = await response.Content.ReadAsStringAsync();
        var result = JsonSerializer.Deserialize<WhisperResponse>(json);
        return result?.Text ?? string.Empty;
    }

    public async Task<string> RefineTextAsync(string rawTranscript, string systemPrompt)
    {
        var requestBody = new LlamaRequest
        {
            Model = "openai/gpt-oss-20b",
            Temperature = 0.0,
            Messages = new[]
            {
                new LlamaRequestMessage { Role = "system", Content = systemPrompt },
                new LlamaRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" }
            }
        };

        var jsonOptions = new JsonSerializerOptions { DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull };
        var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, jsonOptions), System.Text.Encoding.UTF8, "application/json");

        var response = await _httpClient.PostAsync("chat/completions", jsonContent);

        if (!response.IsSuccessStatusCode)
        {
            var error = await response.Content.ReadAsStringAsync();
            throw new Exception($"Llama API Error: {response.StatusCode} - {error}");
        }

        var json = await response.Content.ReadAsStringAsync();
        var result = JsonSerializer.Deserialize<LlamaResponse>(json);

        return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty;
    }
}