38 lines
1.2 KiB
C#
38 lines
1.2 KiB
C#
using System.Numerics.Tensors;
|
|
|
|
namespace OpenQuery.Services;
|
|
|
|
public class EmbeddingService
|
|
{
|
|
private readonly OpenRouterClient _client;
|
|
private readonly string _embeddingModel;
|
|
|
|
public EmbeddingService(OpenRouterClient client, string embeddingModel = "openai/text-embedding-3-small")
|
|
{
|
|
_client = client;
|
|
_embeddingModel = embeddingModel;
|
|
}
|
|
|
|
public async Task<float[][]> GetEmbeddingsAsync(List<string> texts)
|
|
{
|
|
var results = new List<float[]>();
|
|
const int batchSize = 300;
|
|
|
|
for (var i = 0; i < texts.Count; i += batchSize)
|
|
{
|
|
if (texts.Count > batchSize)
|
|
Console.WriteLine(
|
|
$"[Generating {Math.Ceiling(i / (double)batchSize)}/{Math.Ceiling(texts.Count / (double)batchSize)} batch of embeddings]");
|
|
var batch = texts.Skip(i).Take(batchSize).ToList();
|
|
var batchResults = await _client.EmbedAsync(_embeddingModel, batch);
|
|
results.AddRange(batchResults);
|
|
}
|
|
|
|
return results.ToArray();
|
|
}
|
|
|
|
public static float CosineSimilarity(float[] vector1, float[] vector2)
|
|
{
|
|
return TensorPrimitives.CosineSimilarity(vector1, vector2);
|
|
}
|
|
} |