feat: parallel async processing and compact output mode
Major performance improvements: - Parallel search execution across all queries - Parallel article fetching with 10 concurrent limit - Parallel embeddings with rate limiting (4 concurrent) - Polly integration for retry resilience New features: - Add -v/--verbose flag for detailed output - Compact single-line status mode with braille spinner - StatusReporter service for unified output handling - Query generation and errors hidden in compact mode - ANSI escape codes for clean line updates New files: - Services/RateLimiter.cs - Semaphore-based concurrency control - Services/StatusReporter.cs - Verbose/compact output handler - Models/ParallelOptions.cs - Parallel processing configuration All changes maintain Native AOT compatibility.
This commit is contained in:
@@ -1,4 +1,7 @@
|
||||
using System.Numerics.Tensors;
|
||||
using OpenQuery.Models;
|
||||
using Polly;
|
||||
using Polly.Retry;
|
||||
|
||||
namespace OpenQuery.Services;
|
||||
|
||||
@@ -6,33 +9,156 @@ public class EmbeddingService
|
||||
{
|
||||
private readonly OpenRouterClient _client;
|
||||
private readonly string _embeddingModel;
|
||||
private readonly ParallelProcessingOptions _options;
|
||||
private readonly RateLimiter _rateLimiter;
|
||||
private readonly ResiliencePipeline _retryPipeline;
|
||||
|
||||
public EmbeddingService(OpenRouterClient client, string embeddingModel = "openai/text-embedding-3-small")
|
||||
{
|
||||
_client = client;
|
||||
_embeddingModel = embeddingModel;
|
||||
_options = new ParallelProcessingOptions();
|
||||
_rateLimiter = new RateLimiter(_options.MaxConcurrentEmbeddingRequests);
|
||||
|
||||
_retryPipeline = new ResiliencePipelineBuilder()
|
||||
.AddRetry(new RetryStrategyOptions
|
||||
{
|
||||
MaxRetryAttempts = 3,
|
||||
Delay = TimeSpan.FromSeconds(1),
|
||||
BackoffType = DelayBackoffType.Exponential,
|
||||
ShouldHandle = new PredicateBuilder()
|
||||
.Handle<HttpRequestException>()
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
public async Task<float[][]> GetEmbeddingsAsync(List<string> texts)
|
||||
public async Task<float[][]> GetEmbeddingsAsync(
|
||||
List<string> texts,
|
||||
Action<string>? onProgress = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var results = new List<float[]>();
|
||||
const int batchSize = 300;
|
||||
|
||||
for (var i = 0; i < texts.Count; i += batchSize)
|
||||
{
|
||||
if (texts.Count > batchSize)
|
||||
Console.WriteLine(
|
||||
$"[Generating {Math.Ceiling(i / (double)batchSize)}/{Math.Ceiling(texts.Count / (double)batchSize)} batch of embeddings]");
|
||||
var batch = texts.Skip(i).Take(batchSize).ToList();
|
||||
var batchResults = await _client.EmbedAsync(_embeddingModel, batch);
|
||||
results.AddRange(batchResults);
|
||||
}
|
||||
|
||||
return results.ToArray();
|
||||
var batchSize = _options.EmbeddingBatchSize;
|
||||
var totalBatches = (int)Math.Ceiling(texts.Count / (double)batchSize);
|
||||
var results = new List<(int batchIndex, float[][] embeddings)>();
|
||||
|
||||
var batchIndices = Enumerable.Range(0, totalBatches).ToList();
|
||||
|
||||
await Parallel.ForEachAsync(
|
||||
batchIndices,
|
||||
new ParallelOptions
|
||||
{
|
||||
MaxDegreeOfParallelism = _options.MaxConcurrentEmbeddingRequests,
|
||||
CancellationToken = cancellationToken
|
||||
},
|
||||
async (batchIndex, ct) =>
|
||||
{
|
||||
var startIndex = batchIndex * batchSize;
|
||||
var batch = texts.Skip(startIndex).Take(batchSize).ToList();
|
||||
|
||||
onProgress?.Invoke($"[Generating embeddings: batch {batchIndex + 1}/{totalBatches}]");
|
||||
|
||||
try
|
||||
{
|
||||
var batchResults = await _rateLimiter.ExecuteAsync(async () =>
|
||||
await _retryPipeline.ExecuteAsync(async token =>
|
||||
await _client.EmbedAsync(_embeddingModel, batch),
|
||||
ct),
|
||||
ct);
|
||||
|
||||
lock (results)
|
||||
{
|
||||
results.Add((batchIndex, batchResults));
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Skip failed batches, return empty embeddings for this batch
|
||||
var emptyBatch = new float[batch.Count][];
|
||||
for (var i = 0; i < batch.Count; i++)
|
||||
{
|
||||
emptyBatch[i] = [];
|
||||
}
|
||||
lock (results)
|
||||
{
|
||||
results.Add((batchIndex, emptyBatch));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Reassemble results in order
|
||||
var orderedResults = results
|
||||
.OrderBy(r => r.batchIndex)
|
||||
.SelectMany(r => r.embeddings)
|
||||
.ToArray();
|
||||
|
||||
return orderedResults;
|
||||
}
|
||||
|
||||
public async Task<float[]> GetEmbeddingAsync(
|
||||
string text,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var results = await _rateLimiter.ExecuteAsync(async () =>
|
||||
await _retryPipeline.ExecuteAsync(async token =>
|
||||
await _client.EmbedAsync(_embeddingModel, [text]),
|
||||
cancellationToken),
|
||||
cancellationToken);
|
||||
|
||||
return results[0];
|
||||
}
|
||||
|
||||
public async Task<float[][]> GetEmbeddingsWithRateLimitAsync(
|
||||
List<string> texts,
|
||||
Action<string>? onProgress = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var batchSize = _options.EmbeddingBatchSize;
|
||||
var totalBatches = (int)Math.Ceiling(texts.Count / (double)batchSize);
|
||||
var results = new float[totalBatches][][];
|
||||
|
||||
var completedBatches = 0;
|
||||
|
||||
await Parallel.ForEachAsync(
|
||||
Enumerable.Range(0, totalBatches),
|
||||
new ParallelOptions
|
||||
{
|
||||
MaxDegreeOfParallelism = _options.MaxConcurrentEmbeddingRequests,
|
||||
CancellationToken = cancellationToken
|
||||
},
|
||||
async (batchIndex, ct) =>
|
||||
{
|
||||
var startIndex = batchIndex * batchSize;
|
||||
var batch = texts.Skip(startIndex).Take(batchSize).ToList();
|
||||
|
||||
var currentBatch = Interlocked.Increment(ref completedBatches);
|
||||
onProgress?.Invoke($"[Generating embeddings: batch {currentBatch}/{totalBatches}]");
|
||||
|
||||
try
|
||||
{
|
||||
var batchResults = await _rateLimiter.ExecuteAsync(async () =>
|
||||
await _retryPipeline.ExecuteAsync(async token =>
|
||||
await _client.EmbedAsync(_embeddingModel, batch),
|
||||
ct),
|
||||
ct);
|
||||
|
||||
results[batchIndex] = batchResults;
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Skip failed batches
|
||||
results[batchIndex] = new float[batch.Count][];
|
||||
for (var i = 0; i < batch.Count; i++)
|
||||
{
|
||||
results[batchIndex][i] = [];
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return results.SelectMany(r => r).ToArray();
|
||||
}
|
||||
|
||||
public static float CosineSimilarity(float[] vector1, float[] vector2)
|
||||
{
|
||||
return TensorPrimitives.CosineSimilarity(vector1, vector2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
42
Services/RateLimiter.cs
Normal file
42
Services/RateLimiter.cs
Normal file
@@ -0,0 +1,42 @@
|
||||
namespace OpenQuery.Services;
|
||||
|
||||
public sealed class RateLimiter : IAsyncDisposable
|
||||
{
|
||||
private readonly SemaphoreSlim _semaphore;
|
||||
|
||||
public RateLimiter(int maxConcurrentRequests)
|
||||
{
|
||||
_semaphore = new SemaphoreSlim(maxConcurrentRequests, maxConcurrentRequests);
|
||||
}
|
||||
|
||||
public async Task<T> ExecuteAsync<T>(Func<Task<T>> action, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _semaphore.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
return await action();
|
||||
}
|
||||
finally
|
||||
{
|
||||
_semaphore.Release();
|
||||
}
|
||||
}
|
||||
|
||||
public async Task ExecuteAsync(Func<Task> action, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _semaphore.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
await action();
|
||||
}
|
||||
finally
|
||||
{
|
||||
_semaphore.Release();
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
_semaphore.Dispose();
|
||||
}
|
||||
}
|
||||
128
Services/StatusReporter.cs
Normal file
128
Services/StatusReporter.cs
Normal file
@@ -0,0 +1,128 @@
|
||||
using System.Threading.Channels;
|
||||
|
||||
namespace OpenQuery.Services;
|
||||
|
||||
public class StatusReporter : IDisposable
|
||||
{
|
||||
private readonly bool _verbose;
|
||||
private readonly char[] _spinnerChars = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
||||
private string? _currentMessage;
|
||||
private CancellationTokenSource? _spinnerCts;
|
||||
private Task? _spinnerTask;
|
||||
private readonly Channel<string> _statusChannel;
|
||||
private readonly Task _statusProcessor;
|
||||
|
||||
public StatusReporter(bool verbose)
|
||||
{
|
||||
_verbose = verbose;
|
||||
_statusChannel = Channel.CreateUnbounded<string>();
|
||||
_statusProcessor = ProcessStatusUpdatesAsync();
|
||||
}
|
||||
|
||||
private async Task ProcessStatusUpdatesAsync()
|
||||
{
|
||||
await foreach (var message in _statusChannel.Reader.ReadAllAsync())
|
||||
{
|
||||
if (_verbose)
|
||||
{
|
||||
Console.WriteLine(message);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Clear current line using ANSI escape code
|
||||
Console.Write("\r\x1b[K");
|
||||
|
||||
// Write new status with spinner (use first spinner char for static updates)
|
||||
Console.Write($"{_spinnerChars[0]} {message}");
|
||||
|
||||
_currentMessage = message;
|
||||
}
|
||||
}
|
||||
|
||||
public void UpdateStatus(string message)
|
||||
{
|
||||
_statusChannel.Writer.TryWrite(message);
|
||||
}
|
||||
|
||||
public void ClearStatus()
|
||||
{
|
||||
if (_verbose) return;
|
||||
|
||||
Console.Write("\r\x1b[K");
|
||||
_currentMessage = null;
|
||||
}
|
||||
|
||||
public void WriteFinal(string text)
|
||||
{
|
||||
if (_verbose)
|
||||
{
|
||||
Console.WriteLine(text);
|
||||
return;
|
||||
}
|
||||
|
||||
StopSpinner();
|
||||
Console.Write("\r\x1b[K");
|
||||
Console.Write(text);
|
||||
Console.WriteLine();
|
||||
}
|
||||
|
||||
public void StartSpinner()
|
||||
{
|
||||
if (_verbose || _spinnerCts != null) return;
|
||||
|
||||
_spinnerCts = new CancellationTokenSource();
|
||||
_spinnerTask = Task.Run(async () =>
|
||||
{
|
||||
var spinner = _spinnerChars;
|
||||
var index = 0;
|
||||
while (_spinnerCts is { Token.IsCancellationRequested: false })
|
||||
{
|
||||
if (_currentMessage != null)
|
||||
{
|
||||
Console.Write("\r\x1b[K");
|
||||
var charIndex = index++ % spinner.Length;
|
||||
Console.Write($"{spinner[charIndex]} {_currentMessage}");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(100, _spinnerCts.Token);
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}, _spinnerCts.Token);
|
||||
}
|
||||
|
||||
public void StopSpinner()
|
||||
{
|
||||
if (_spinnerCts == null) return;
|
||||
|
||||
_spinnerCts.Cancel();
|
||||
_spinnerTask?.GetAwaiter().GetResult();
|
||||
_spinnerCts = null;
|
||||
_spinnerTask = null;
|
||||
}
|
||||
|
||||
public void WriteLine(string text)
|
||||
{
|
||||
if (_verbose)
|
||||
{
|
||||
Console.WriteLine(text);
|
||||
return;
|
||||
}
|
||||
|
||||
StopSpinner();
|
||||
ClearStatus();
|
||||
Console.WriteLine(text);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_statusChannel.Writer.Complete();
|
||||
_statusProcessor.GetAwaiter().GetResult();
|
||||
StopSpinner();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user