feat: parallel async processing and compact output mode

Major performance improvements:
- Parallel search execution across all queries
- Parallel article fetching with 10 concurrent limit
- Parallel embeddings with rate limiting (4 concurrent)
- Polly integration for retry resilience

New features:
- Add -v/--verbose flag for detailed output
- Compact single-line status mode with braille spinner
- StatusReporter service for unified output handling
- Query generation and errors hidden in compact mode
- ANSI escape codes for clean line updates

New files:
- Services/RateLimiter.cs - Semaphore-based concurrency control
- Services/StatusReporter.cs - Verbose/compact output handler
- Models/ParallelOptions.cs - Parallel processing configuration

All changes maintain Native AOT compatibility.
This commit is contained in:
2026-03-18 22:16:28 +01:00
parent 9d4bec7a17
commit b28d8998f7
9 changed files with 579 additions and 109 deletions

View File

@@ -1,4 +1,7 @@
using System.Numerics.Tensors;
using OpenQuery.Models;
using Polly;
using Polly.Retry;
namespace OpenQuery.Services;
@@ -6,33 +9,156 @@ public class EmbeddingService
{
private readonly OpenRouterClient _client;
private readonly string _embeddingModel;
private readonly ParallelProcessingOptions _options;
private readonly RateLimiter _rateLimiter;
private readonly ResiliencePipeline _retryPipeline;
public EmbeddingService(OpenRouterClient client, string embeddingModel = "openai/text-embedding-3-small")
{
_client = client;
_embeddingModel = embeddingModel;
_options = new ParallelProcessingOptions();
_rateLimiter = new RateLimiter(_options.MaxConcurrentEmbeddingRequests);
_retryPipeline = new ResiliencePipelineBuilder()
.AddRetry(new RetryStrategyOptions
{
MaxRetryAttempts = 3,
Delay = TimeSpan.FromSeconds(1),
BackoffType = DelayBackoffType.Exponential,
ShouldHandle = new PredicateBuilder()
.Handle<HttpRequestException>()
})
.Build();
}
public async Task<float[][]> GetEmbeddingsAsync(List<string> texts)
public async Task<float[][]> GetEmbeddingsAsync(
List<string> texts,
Action<string>? onProgress = null,
CancellationToken cancellationToken = default)
{
var results = new List<float[]>();
const int batchSize = 300;
for (var i = 0; i < texts.Count; i += batchSize)
{
if (texts.Count > batchSize)
Console.WriteLine(
$"[Generating {Math.Ceiling(i / (double)batchSize)}/{Math.Ceiling(texts.Count / (double)batchSize)} batch of embeddings]");
var batch = texts.Skip(i).Take(batchSize).ToList();
var batchResults = await _client.EmbedAsync(_embeddingModel, batch);
results.AddRange(batchResults);
}
return results.ToArray();
var batchSize = _options.EmbeddingBatchSize;
var totalBatches = (int)Math.Ceiling(texts.Count / (double)batchSize);
var results = new List<(int batchIndex, float[][] embeddings)>();
var batchIndices = Enumerable.Range(0, totalBatches).ToList();
await Parallel.ForEachAsync(
batchIndices,
new ParallelOptions
{
MaxDegreeOfParallelism = _options.MaxConcurrentEmbeddingRequests,
CancellationToken = cancellationToken
},
async (batchIndex, ct) =>
{
var startIndex = batchIndex * batchSize;
var batch = texts.Skip(startIndex).Take(batchSize).ToList();
onProgress?.Invoke($"[Generating embeddings: batch {batchIndex + 1}/{totalBatches}]");
try
{
var batchResults = await _rateLimiter.ExecuteAsync(async () =>
await _retryPipeline.ExecuteAsync(async token =>
await _client.EmbedAsync(_embeddingModel, batch),
ct),
ct);
lock (results)
{
results.Add((batchIndex, batchResults));
}
}
catch
{
// Skip failed batches, return empty embeddings for this batch
var emptyBatch = new float[batch.Count][];
for (var i = 0; i < batch.Count; i++)
{
emptyBatch[i] = [];
}
lock (results)
{
results.Add((batchIndex, emptyBatch));
}
}
});
// Reassemble results in order
var orderedResults = results
.OrderBy(r => r.batchIndex)
.SelectMany(r => r.embeddings)
.ToArray();
return orderedResults;
}
public async Task<float[]> GetEmbeddingAsync(
string text,
CancellationToken cancellationToken = default)
{
var results = await _rateLimiter.ExecuteAsync(async () =>
await _retryPipeline.ExecuteAsync(async token =>
await _client.EmbedAsync(_embeddingModel, [text]),
cancellationToken),
cancellationToken);
return results[0];
}
public async Task<float[][]> GetEmbeddingsWithRateLimitAsync(
List<string> texts,
Action<string>? onProgress = null,
CancellationToken cancellationToken = default)
{
var batchSize = _options.EmbeddingBatchSize;
var totalBatches = (int)Math.Ceiling(texts.Count / (double)batchSize);
var results = new float[totalBatches][][];
var completedBatches = 0;
await Parallel.ForEachAsync(
Enumerable.Range(0, totalBatches),
new ParallelOptions
{
MaxDegreeOfParallelism = _options.MaxConcurrentEmbeddingRequests,
CancellationToken = cancellationToken
},
async (batchIndex, ct) =>
{
var startIndex = batchIndex * batchSize;
var batch = texts.Skip(startIndex).Take(batchSize).ToList();
var currentBatch = Interlocked.Increment(ref completedBatches);
onProgress?.Invoke($"[Generating embeddings: batch {currentBatch}/{totalBatches}]");
try
{
var batchResults = await _rateLimiter.ExecuteAsync(async () =>
await _retryPipeline.ExecuteAsync(async token =>
await _client.EmbedAsync(_embeddingModel, batch),
ct),
ct);
results[batchIndex] = batchResults;
}
catch
{
// Skip failed batches
results[batchIndex] = new float[batch.Count][];
for (var i = 0; i < batch.Count; i++)
{
results[batchIndex][i] = [];
}
}
});
return results.SelectMany(r => r).ToArray();
}
public static float CosineSimilarity(float[] vector1, float[] vector2)
{
return TensorPrimitives.CosineSimilarity(vector1, vector2);
}
}
}