1
0

feat: Introduce a pluggable LLM provider system with token extraction, pricing, and updated setup configuration.

This commit is contained in:
2026-03-05 22:02:22 +01:00
parent 4476cc7f15
commit c7e7976d9d
12 changed files with 499 additions and 28 deletions

View File

@@ -0,0 +1,89 @@
using System.Net.Http.Headers;
using System.Text.Json;
namespace AnchorCli.Providers;
/// <summary>
/// Generic token extractor for any OpenAI-compatible endpoint.
/// Tries common header names and JSON body parsing.
/// </summary>
internal sealed class GenericTokenExtractor : ITokenExtractor
{
public string ProviderName => "Generic";
public (int inputTokens, int outputTokens)? ExtractTokens(HttpResponseHeaders headers, string? responseBody)
{
// Try various common header names
var headerNames = new[] {
"x-total-tokens",
"x-ai-response-tokens",
"x-tokens",
"x-prompt-tokens",
"x-completion-tokens"
};
foreach (var headerName in headerNames)
{
if (headers.TryGetValues(headerName, out var values))
{
if (int.TryParse(values.FirstOrDefault(), out var tokens))
{
// Assume all tokens are output if we can't determine split
return (0, tokens);
}
}
}
// Fallback: try parsing from response body JSON
if (!string.IsNullOrEmpty(responseBody))
{
try
{
using var doc = JsonDocument.Parse(responseBody);
var root = doc.RootElement;
// Try standard OpenAI format: usage.prompt_tokens, usage.completion_tokens
if (root.TryGetProperty("usage", out var usage))
{
var prompt = usage.TryGetProperty("prompt_tokens", out var p) ? p.GetInt32() : 0;
var completion = usage.TryGetProperty("completion_tokens", out var c) ? c.GetInt32() : 0;
if (prompt > 0 || completion > 0)
{
return (prompt, completion);
}
}
}
catch
{
// Ignore parsing errors
}
}
return null;
}
public int? ExtractLatency(HttpResponseHeaders headers)
{
// Try various common latency headers
var headerNames = new[] {
"x-response-time",
"x-response-timing",
"x-latency-ms",
"x-duration-ms"
};
foreach (var headerName in headerNames)
{
if (headers.TryGetValues(headerName, out var values))
{
if (int.TryParse(values.FirstOrDefault(), out var latency))
{
return latency;
}
}
}
return null;
}
}

61
Providers/GroqProvider.cs Normal file
View File

@@ -0,0 +1,61 @@
using System.Net.Http.Headers;
namespace AnchorCli.Providers;
/// <summary>
/// Token extractor for Groq responses.
/// </summary>
internal sealed class GroqTokenExtractor : ITokenExtractor
{
public string ProviderName => "Groq";
public (int inputTokens, int outputTokens)? ExtractTokens(HttpResponseHeaders headers, string? responseBody)
{
// Groq provides x-groq-tokens header (format: "n;<prompt_tokens>,n;<completion_tokens>")
if (headers.TryGetValues("x-groq-tokens", out var values))
{
var tokenStr = values.FirstOrDefault();
if (!string.IsNullOrEmpty(tokenStr))
{
// Parse format: "n;123,n;45" where first is prompt, second is completion
var parts = tokenStr.Split(',');
if (parts.Length >= 2)
{
var inputPart = parts[0].Trim();
var outputPart = parts[1].Trim();
// Extract numbers after "n;"
if (inputPart.StartsWith("n;") && outputPart.StartsWith("n;"))
{
if (int.TryParse(inputPart[2..], out var input) &&
int.TryParse(outputPart[2..], out var output))
{
return (input, output);
}
}
}
}
}
// Fallback: try parsing from response body
if (!string.IsNullOrEmpty(responseBody))
{
// TODO: Parse usage from JSON body if headers aren't available
}
return null;
}
public int? ExtractLatency(HttpResponseHeaders headers)
{
if (headers.TryGetValues("x-groq-response-time", out var values))
{
if (int.TryParse(values.FirstOrDefault(), out var latency))
{
return latency;
}
}
return null;
}
}

View File

@@ -0,0 +1,18 @@
using AnchorCli.OpenRouter;
namespace AnchorCli.Providers;
/// <summary>
/// Interface for fetching model pricing information.
/// </summary>
internal interface IPricingProvider
{
/// <summary>
/// Fetches pricing info for a specific model.
/// </summary>
Task<ModelInfo?> GetModelInfoAsync(string modelId, CancellationToken ct = default);
/// <summary>
/// Fetches all available models with pricing.
/// </summary>
Task<Dictionary<string, ModelInfo>> GetAllModelsAsync(CancellationToken ct = default);
}

View File

@@ -0,0 +1,25 @@
using System.Net.Http.Headers;
namespace AnchorCli.Providers;
/// <summary>
/// Interface for extracting token usage from provider responses.
/// </summary>
internal interface ITokenExtractor
{
/// <summary>
/// Extracts token usage from response headers and/or body.
/// Returns (inputTokens, outputTokens) or null if unavailable.
/// </summary>
(int inputTokens, int outputTokens)? ExtractTokens(HttpResponseHeaders headers, string? responseBody);
/// <summary>
/// Gets the latency from response headers (in ms).
/// </summary>
int? ExtractLatency(HttpResponseHeaders headers);
/// <summary>
/// Gets the provider name for display purposes.
/// </summary>
string ProviderName { get; }
}

View File

@@ -0,0 +1,39 @@
using System.Net.Http.Headers;
namespace AnchorCli.Providers;
/// <summary>
/// Token extractor for Ollama responses.
/// Ollama doesn't provide official token counts, so we estimate.
/// </summary>
internal sealed class OllamaTokenExtractor : ITokenExtractor
{
public string ProviderName => "Ollama";
public (int inputTokens, int outputTokens)? ExtractTokens(HttpResponseHeaders headers, string? responseBody)
{
// Ollama doesn't provide token headers
return null;
}
public int? ExtractLatency(HttpResponseHeaders headers)
{
// Ollama doesn't provide latency headers
return null;
}
/// <summary>
/// Estimates token count from text length (rough approximation).
/// Assumes ~4 characters per token on average.
/// </summary>
public static int EstimateTokens(string text)
{
if (string.IsNullOrEmpty(text))
{
return 0;
}
// Rough estimate: 4 characters per token
return text.Length / 4;
}
}

View File

@@ -0,0 +1,40 @@
using System.Net.Http.Json;
using System.Text.Json;
using AnchorCli.OpenRouter;
namespace AnchorCli.Providers;
/// <summary>
/// Pricing provider for OpenRouter API.
/// </summary>
internal sealed class OpenRouterProvider : IPricingProvider
{
private const string ModelsUrl = "https://openrouter.ai/api/v1/models";
private static readonly HttpClient Http = new();
private Dictionary<string, ModelInfo>? _models;
static OpenRouterProvider()
{
OpenRouterHeaders.ApplyTo(Http);
}
public async Task<Dictionary<string, ModelInfo>> GetAllModelsAsync(CancellationToken ct = default)
{
if (_models != null) return _models;
var response = await Http.GetAsync(ModelsUrl, ct);
response.EnsureSuccessStatusCode();
var json = await response.Content.ReadAsStringAsync(ct);
var result = JsonSerializer.Deserialize(json, AppJsonContext.Default.ModelsResponse);
_models = result?.Data?.ToDictionary(m => m.Id) ?? [];
return _models;
}
public async Task<ModelInfo?> GetModelInfoAsync(string modelId, CancellationToken ct = default)
{
var models = await GetAllModelsAsync(ct);
return models.GetValueOrDefault(modelId);
}
}

View File

@@ -0,0 +1,42 @@
using System.Net.Http.Headers;
namespace AnchorCli.Providers;
/// <summary>
/// Token extractor for OpenRouter responses.
/// </summary>
internal sealed class OpenRouterTokenExtractor : ITokenExtractor
{
public string ProviderName => "OpenRouter";
public (int inputTokens, int outputTokens)? ExtractTokens(HttpResponseHeaders headers, string? responseBody)
{
// OpenRouter provides x-total-tokens header
if (headers.TryGetValues("x-total-tokens", out var values))
{
// Note: OpenRouter only provides total tokens, not split
// We'll estimate split based on typical ratios if needed
if (long.TryParse(values.FirstOrDefault(), out var total))
{
// For now, return total as output (placeholder until we have better splitting)
// In practice, you'd need to track input separately from the request
return (0, (int)total);
}
}
return null;
}
public int? ExtractLatency(HttpResponseHeaders headers)
{
if (headers.TryGetValues("x-response-timing", out var values))
{
if (int.TryParse(values.FirstOrDefault(), out var latency))
{
return latency;
}
}
return null;
}
}

View File

@@ -0,0 +1,70 @@
namespace AnchorCli.Providers;
/// <summary>
/// Factory for creating provider instances based on endpoint or provider name.
/// </summary>
internal static class ProviderFactory
{
/// <summary>
/// Creates a token extractor based on the provider name.
/// </summary>
public static ITokenExtractor CreateTokenExtractor(string providerName)
{
return providerName.ToLowerInvariant() switch
{
"openrouter" => new OpenRouterTokenExtractor(),
"groq" => new GroqTokenExtractor(),
"ollama" => new OllamaTokenExtractor(),
_ => new GenericTokenExtractor()
};
}
/// <summary>
/// Creates a token extractor by auto-detecting from the endpoint URL.
/// </summary>
public static ITokenExtractor CreateTokenExtractorForEndpoint(string endpoint)
{
if (string.IsNullOrEmpty(endpoint))
{
return new GenericTokenExtractor();
}
var url = endpoint.ToLowerInvariant();
if (url.Contains("openrouter"))
{
return new OpenRouterTokenExtractor();
}
if (url.Contains("groq"))
{
return new GroqTokenExtractor();
}
if (url.Contains("ollama") || url.Contains("localhost") || url.Contains("127.0.0.1"))
{
return new OllamaTokenExtractor();
}
return new GenericTokenExtractor();
}
/// <summary>
/// Creates a pricing provider based on the provider name.
/// Only OpenRouter has a pricing API currently.
/// </summary>
public static IPricingProvider? CreatePricingProvider(string providerName)
{
return providerName.ToLowerInvariant() switch
{
"openrouter" => new OpenRouterProvider(),
_ => null // Other providers don't have pricing APIs yet
};
}
/// <summary>
/// Determines if an endpoint is OpenRouter.
/// </summary>
public static bool IsOpenRouter(string endpoint) =>
!string.IsNullOrEmpty(endpoint) && endpoint.Contains("openrouter", StringComparison.OrdinalIgnoreCase);
}