feat: Add configurable LLM and Whisper models with user selection and API integration.
This commit is contained in:
@@ -14,6 +14,8 @@ public class ToakConfig
|
|||||||
public bool StructureSmartParagraphing { get; set; } = true;
|
public bool StructureSmartParagraphing { get; set; } = true;
|
||||||
public string TargetLanguage { get; set; } = string.Empty;
|
public string TargetLanguage { get; set; } = string.Empty;
|
||||||
public string WhisperLanguage { get; set; } = string.Empty;
|
public string WhisperLanguage { get; set; } = string.Empty;
|
||||||
|
public string LlmModel { get; set; } = "openai/gpt-oss-20b";
|
||||||
|
public string WhisperModel { get; set; } = "whisper-large-v3-turbo";
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class ConfigManager
|
public static class ConfigManager
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ public class GroqApiClient
|
|||||||
_httpClient.BaseAddress = new Uri("https://api.groq.com/openai/v1/");
|
_httpClient.BaseAddress = new Uri("https://api.groq.com/openai/v1/");
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task<string> TranscribeAsync(string filePath, string language = "")
|
public async Task<string> TranscribeAsync(string filePath, string language = "", string model = "whisper-large-v3-turbo")
|
||||||
{
|
{
|
||||||
using var content = new MultipartFormDataContent();
|
using var content = new MultipartFormDataContent();
|
||||||
using var fileStream = File.OpenRead(filePath);
|
using var fileStream = File.OpenRead(filePath);
|
||||||
@@ -60,7 +60,7 @@ public class GroqApiClient
|
|||||||
streamContent.Headers.ContentType = new MediaTypeHeaderValue("audio/wav"); // or mpeg
|
streamContent.Headers.ContentType = new MediaTypeHeaderValue("audio/wav"); // or mpeg
|
||||||
content.Add(streamContent, "file", Path.GetFileName(filePath));
|
content.Add(streamContent, "file", Path.GetFileName(filePath));
|
||||||
|
|
||||||
string modelToUse = "whisper-large-v3-turbo";
|
string modelToUse = string.IsNullOrWhiteSpace(model) ? "whisper-large-v3-turbo" : model;
|
||||||
|
|
||||||
// according to docs whisper-large-v3-turbo requires the language to be provided if it is to be translated later potentially or if we need the most accurate behavior
|
// according to docs whisper-large-v3-turbo requires the language to be provided if it is to be translated later potentially or if we need the most accurate behavior
|
||||||
// Actually, if we want language param, we can pass it to either model
|
// Actually, if we want language param, we can pass it to either model
|
||||||
@@ -85,11 +85,11 @@ public class GroqApiClient
|
|||||||
return result?.Text ?? string.Empty;
|
return result?.Text ?? string.Empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task<string> RefineTextAsync(string rawTranscript, string systemPrompt)
|
public async Task<string> RefineTextAsync(string rawTranscript, string systemPrompt, string model = "openai/gpt-oss-20b")
|
||||||
{
|
{
|
||||||
var requestBody = new LlamaRequest
|
var requestBody = new LlamaRequest
|
||||||
{
|
{
|
||||||
Model = "openai/gpt-oss-20b",
|
Model = string.IsNullOrWhiteSpace(model) ? "openai/gpt-oss-20b" : model,
|
||||||
Temperature = 0.0,
|
Temperature = 0.0,
|
||||||
Messages = new[]
|
Messages = new[]
|
||||||
{
|
{
|
||||||
|
|||||||
39
Program.cs
39
Program.cs
@@ -44,6 +44,25 @@ if (command == "onboard")
|
|||||||
var key = Console.ReadLine();
|
var key = Console.ReadLine();
|
||||||
if (!string.IsNullOrWhiteSpace(key)) config.GroqApiKey = key;
|
if (!string.IsNullOrWhiteSpace(key)) config.GroqApiKey = key;
|
||||||
|
|
||||||
|
Console.WriteLine();
|
||||||
|
Console.WriteLine("LLM Model:");
|
||||||
|
Console.WriteLine(" 1) openai/gpt-oss-20b -- fastest");
|
||||||
|
Console.WriteLine(" 2) llama-3.1-8b-instant -- cheapest, but dumb");
|
||||||
|
Console.Write($"Select 1 or 2 [{config.LlmModel}]: ");
|
||||||
|
var llmSelection = Console.ReadLine()?.Trim();
|
||||||
|
if (llmSelection == "1" || llmSelection == "openai/gpt-oss-20b") config.LlmModel = "openai/gpt-oss-20b";
|
||||||
|
else if (llmSelection == "2" || llmSelection == "llama-3.1-8b-instant") config.LlmModel = "llama-3.1-8b-instant";
|
||||||
|
|
||||||
|
Console.WriteLine();
|
||||||
|
Console.WriteLine("Whisper Model:");
|
||||||
|
Console.WriteLine(" 1) whisper-large-v3 -- large model, very accurate");
|
||||||
|
Console.WriteLine(" 2) whisper-large-v3-turbo -- very fast, a bit less accurate");
|
||||||
|
Console.Write($"Select 1 or 2 [{config.WhisperModel}]: ");
|
||||||
|
var whisperSelection = Console.ReadLine()?.Trim();
|
||||||
|
if (whisperSelection == "1" || whisperSelection == "whisper-large-v3") config.WhisperModel = "whisper-large-v3";
|
||||||
|
else if (whisperSelection == "2" || whisperSelection == "whisper-large-v3-turbo") config.WhisperModel = "whisper-large-v3-turbo";
|
||||||
|
|
||||||
|
Console.WriteLine();
|
||||||
Console.Write($"Microphone Spoken Language (e.g. en, es, zh) [{config.WhisperLanguage}]: ");
|
Console.Write($"Microphone Spoken Language (e.g. en, es, zh) [{config.WhisperLanguage}]: ");
|
||||||
var lang = Console.ReadLine();
|
var lang = Console.ReadLine();
|
||||||
if (!string.IsNullOrWhiteSpace(lang)) config.WhisperLanguage = lang.ToLowerInvariant();
|
if (!string.IsNullOrWhiteSpace(lang)) config.WhisperLanguage = lang.ToLowerInvariant();
|
||||||
@@ -62,6 +81,8 @@ if (command == "show")
|
|||||||
var config = ConfigManager.LoadConfig();
|
var config = ConfigManager.LoadConfig();
|
||||||
Console.WriteLine("Current Configuration:");
|
Console.WriteLine("Current Configuration:");
|
||||||
Console.WriteLine($" Groq API Key: {(string.IsNullOrEmpty(config.GroqApiKey) ? "Not Set" : "Set")}");
|
Console.WriteLine($" Groq API Key: {(string.IsNullOrEmpty(config.GroqApiKey) ? "Not Set" : "Set")}");
|
||||||
|
Console.WriteLine($" LLM Model: {config.LlmModel}");
|
||||||
|
Console.WriteLine($" Whisper Model: {config.WhisperModel}");
|
||||||
Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}");
|
Console.WriteLine($" Spoken Language: {(string.IsNullOrEmpty(config.WhisperLanguage) ? "Auto" : config.WhisperLanguage)}");
|
||||||
Console.WriteLine($" Typing Backend: {config.TypingBackend}");
|
Console.WriteLine($" Typing Backend: {config.TypingBackend}");
|
||||||
Console.WriteLine($" Style Mode: {config.StyleMode}");
|
Console.WriteLine($" Style Mode: {config.StyleMode}");
|
||||||
@@ -78,7 +99,7 @@ if (command == "config")
|
|||||||
if (argsNoFlags.Length < 3)
|
if (argsNoFlags.Length < 3)
|
||||||
{
|
{
|
||||||
Console.WriteLine("Usage: toak config <key> <value>");
|
Console.WriteLine("Usage: toak config <key> <value>");
|
||||||
Console.WriteLine("Keys: style, backend, punctuation, tech, bullets, paragraphs");
|
Console.WriteLine("Keys: llm, whisper, style, language, backend, punctuation, tech, bullets, paragraphs");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -88,6 +109,14 @@ if (command == "config")
|
|||||||
|
|
||||||
switch (key)
|
switch (key)
|
||||||
{
|
{
|
||||||
|
case "llm":
|
||||||
|
config.LlmModel = val;
|
||||||
|
Console.WriteLine($"LLM Model set to {val}");
|
||||||
|
break;
|
||||||
|
case "whisper":
|
||||||
|
config.WhisperModel = val;
|
||||||
|
Console.WriteLine($"Whisper Model set to {val}");
|
||||||
|
break;
|
||||||
case "style":
|
case "style":
|
||||||
if (val == "professional" || val == "concise" || val == "casual") {
|
if (val == "professional" || val == "concise" || val == "casual") {
|
||||||
config.StyleMode = val;
|
config.StyleMode = val;
|
||||||
@@ -182,13 +211,13 @@ if (command == "latency-test")
|
|||||||
{
|
{
|
||||||
Console.WriteLine("Testing STT (Whisper)...");
|
Console.WriteLine("Testing STT (Whisper)...");
|
||||||
var sttWatch = Stopwatch.StartNew();
|
var sttWatch = Stopwatch.StartNew();
|
||||||
var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage);
|
var transcript = await groq.TranscribeAsync(testWavPath, config.WhisperLanguage, config.WhisperModel);
|
||||||
sttWatch.Stop();
|
sttWatch.Stop();
|
||||||
|
|
||||||
Console.WriteLine("Testing LLM (Llama)...");
|
Console.WriteLine("Testing LLM (Llama)...");
|
||||||
var systemPrompt = PromptBuilder.BuildPrompt(config);
|
var systemPrompt = PromptBuilder.BuildPrompt(config);
|
||||||
var llmWatch = Stopwatch.StartNew();
|
var llmWatch = Stopwatch.StartNew();
|
||||||
var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt);
|
var refinedText = await groq.RefineTextAsync("Hello world, this is a latency test.", systemPrompt, config.LlmModel);
|
||||||
llmWatch.Stop();
|
llmWatch.Stop();
|
||||||
|
|
||||||
var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds;
|
var total = sttWatch.ElapsedMilliseconds + llmWatch.ElapsedMilliseconds;
|
||||||
@@ -246,7 +275,7 @@ if (command == "toggle")
|
|||||||
var stopWatch = Stopwatch.StartNew();
|
var stopWatch = Stopwatch.StartNew();
|
||||||
|
|
||||||
// 1. STT
|
// 1. STT
|
||||||
var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage);
|
var transcript = await groq.TranscribeAsync(wavPath, config.WhisperLanguage, config.WhisperModel);
|
||||||
if (string.IsNullOrWhiteSpace(transcript))
|
if (string.IsNullOrWhiteSpace(transcript))
|
||||||
{
|
{
|
||||||
if (!pipeToStdout) Notifications.Notify("Toak", "Could not transcribe audio.");
|
if (!pipeToStdout) Notifications.Notify("Toak", "Could not transcribe audio.");
|
||||||
@@ -259,7 +288,7 @@ if (command == "toggle")
|
|||||||
if (!rawOutput)
|
if (!rawOutput)
|
||||||
{
|
{
|
||||||
var systemPrompt = PromptBuilder.BuildPrompt(config);
|
var systemPrompt = PromptBuilder.BuildPrompt(config);
|
||||||
finalText = await groq.RefineTextAsync(transcript, systemPrompt);
|
finalText = await groq.RefineTextAsync(transcript, systemPrompt, config.LlmModel);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Output
|
// 3. Output
|
||||||
|
|||||||
Reference in New Issue
Block a user