89 lines
3.2 KiB
C#
89 lines
3.2 KiB
C#
using OpenQuery.Models;
|
|
using OpenQuery.Services;
|
|
|
|
namespace OpenQuery.Tools;
|
|
|
|
public class SearchTool
|
|
{
|
|
private readonly SearxngClient _searxngClient;
|
|
private readonly EmbeddingService _embeddingService;
|
|
|
|
public static string Name => "search";
|
|
public static string Description => "Search the web for information on a topic";
|
|
|
|
public SearchTool(
|
|
SearxngClient searxngClient,
|
|
EmbeddingService embeddingService)
|
|
{
|
|
_searxngClient = searxngClient;
|
|
_embeddingService = embeddingService;
|
|
}
|
|
|
|
public async Task<string> ExecuteAsync(string originalQuery, List<string> generatedQueries, int maxResults, int topChunksLimit, Action<string>? onProgress = null)
|
|
{
|
|
var allResults = new List<SearxngResult>();
|
|
|
|
foreach (var query in generatedQueries)
|
|
{
|
|
onProgress?.Invoke($"[Searching web for '{query}'...]");
|
|
var results = await _searxngClient.SearchAsync(query, maxResults);
|
|
allResults.AddRange(results);
|
|
}
|
|
|
|
var uniqueResults = allResults.DistinctBy(r => r.Url).ToList();
|
|
|
|
if (uniqueResults.Count == 0)
|
|
return "No search results found.";
|
|
|
|
onProgress?.Invoke($"[Found {uniqueResults.Count} unique results across all queries. Fetching and reading articles...]");
|
|
var chunks = new List<Chunk>();
|
|
|
|
foreach (var result in uniqueResults)
|
|
{
|
|
try
|
|
{
|
|
var article = await ArticleService.FetchArticleAsync(result.Url);
|
|
if (!article.IsReadable || string.IsNullOrEmpty(article.TextContent)) continue;
|
|
var textChunks = ChunkingService.ChunkText(article.TextContent);
|
|
|
|
chunks.AddRange(textChunks.Select(chunkText => new Chunk(chunkText, result.Url, article.Title)));
|
|
}
|
|
catch
|
|
{
|
|
// ignored
|
|
}
|
|
}
|
|
|
|
if (chunks.Count == 0)
|
|
return "Found search results but could not extract readable content.";
|
|
|
|
onProgress?.Invoke($"[Extracted {chunks.Count} text chunks. Generating embeddings for semantic search...]");
|
|
var chunkTexts = chunks.Select(c => c.Content).ToList();
|
|
var embeddings = await _embeddingService.GetEmbeddingsAsync(chunkTexts);
|
|
|
|
for (var i = 0; i < chunks.Count; i++)
|
|
{
|
|
chunks[i] = chunks[i] with { Embedding = embeddings[i] };
|
|
}
|
|
|
|
var queryEmbedding = (await _embeddingService.GetEmbeddingsAsync([originalQuery]))[0];
|
|
|
|
foreach (var chunk in chunks)
|
|
{
|
|
chunk.Score = EmbeddingService.CosineSimilarity(queryEmbedding, chunk.Embedding!);
|
|
}
|
|
|
|
var topChunks = chunks.OrderByDescending(c => c.Score).Take(topChunksLimit).ToList();
|
|
|
|
onProgress?.Invoke($"[Found top {topChunks.Count} most relevant chunks overall. Generating answer...]");
|
|
var context = string.Join("\n\n", topChunks.Select((c, i) =>
|
|
$"[Source {i + 1}: {c.Title ?? "Unknown"}]({c.SourceUrl})\n{c.Content}"));
|
|
|
|
return context;
|
|
}
|
|
|
|
public static string Execute(string argumentsJson)
|
|
{
|
|
throw new InvalidOperationException("Use ExecuteAsync instead");
|
|
}
|
|
} |