1
0
Files
Toak/Api/GroqApiClient.cs

84 lines
3.4 KiB
C#

using System.Net.Http.Headers;
using System.Text.Json;
using System.Text.Json.Serialization;
using Toak.Api.Models;
using Toak.Serialization;
namespace Toak.Api;
public class GroqApiClient
{
private readonly HttpClient _httpClient;
public GroqApiClient(string apiKey)
{
_httpClient = new HttpClient();
_httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey);
_httpClient.BaseAddress = new Uri("https://api.groq.com/openai/v1/");
}
public async Task<string> TranscribeAsync(string filePath, string language = "", string model = "whisper-large-v3-turbo")
{
using var content = new MultipartFormDataContent();
using var fileStream = File.OpenRead(filePath);
using var streamContent = new StreamContent(fileStream);
streamContent.Headers.ContentType = new MediaTypeHeaderValue("audio/wav"); // or mpeg
content.Add(streamContent, "file", Path.GetFileName(filePath));
string modelToUse = string.IsNullOrWhiteSpace(model) ? "whisper-large-v3-turbo" : model;
// according to docs whisper-large-v3-turbo requires the language to be provided if it is to be translated later potentially or if we need the most accurate behavior
// Actually, if we want language param, we can pass it to either model
content.Add(new StringContent(modelToUse), "model");
if (!string.IsNullOrWhiteSpace(language))
{
var firstLang = language.Split(',')[0].Trim();
content.Add(new StringContent(firstLang), "language");
}
var response = await _httpClient.PostAsync("audio/transcriptions", content);
if (!response.IsSuccessStatusCode)
{
var error = await response.Content.ReadAsStringAsync();
throw new Exception($"Whisper API Error: {response.StatusCode} - {error}");
}
var json = await response.Content.ReadAsStringAsync();
var result = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.WhisperResponse);
return result?.Text ?? string.Empty;
}
public async Task<string> RefineTextAsync(string rawTranscript, string systemPrompt, string model = "openai/gpt-oss-20b")
{
var requestBody = new LlamaRequest
{
Model = string.IsNullOrWhiteSpace(model) ? "openai/gpt-oss-20b" : model,
Temperature = 0.0,
Messages = new[]
{
new LlamaRequestMessage { Role = "system", Content = systemPrompt },
new LlamaRequestMessage { Role = "user", Content = $"<transcript>{rawTranscript}</transcript>" }
}
};
var jsonContent = new StringContent(JsonSerializer.Serialize(requestBody, AppJsonSerializerContext.Default.LlamaRequest), System.Text.Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("chat/completions", jsonContent);
if (!response.IsSuccessStatusCode)
{
var error = await response.Content.ReadAsStringAsync();
throw new Exception($"Llama API Error: {response.StatusCode} - {error}");
}
var json = await response.Content.ReadAsStringAsync();
var result = JsonSerializer.Deserialize(json, AppJsonSerializerContext.Default.LlamaResponse);
return result?.Choices?.FirstOrDefault()?.Message?.Content ?? string.Empty;
}
}