initial release
This commit is contained in:
89
Tools/SearchTool.cs
Normal file
89
Tools/SearchTool.cs
Normal file
@@ -0,0 +1,89 @@
|
||||
using OpenQuery.Models;
|
||||
using OpenQuery.Services;
|
||||
|
||||
namespace OpenQuery.Tools;
|
||||
|
||||
public class SearchTool
|
||||
{
|
||||
private readonly SearxngClient _searxngClient;
|
||||
private readonly EmbeddingService _embeddingService;
|
||||
|
||||
public static string Name => "search";
|
||||
public static string Description => "Search the web for information on a topic";
|
||||
|
||||
public SearchTool(
|
||||
SearxngClient searxngClient,
|
||||
EmbeddingService embeddingService)
|
||||
{
|
||||
_searxngClient = searxngClient;
|
||||
_embeddingService = embeddingService;
|
||||
}
|
||||
|
||||
public async Task<string> ExecuteAsync(string originalQuery, List<string> generatedQueries, int maxResults, int topChunksLimit, Action<string>? onProgress = null)
|
||||
{
|
||||
var allResults = new List<SearxngResult>();
|
||||
|
||||
foreach (var query in generatedQueries)
|
||||
{
|
||||
onProgress?.Invoke($"[Searching web for '{query}'...]");
|
||||
var results = await _searxngClient.SearchAsync(query, maxResults);
|
||||
allResults.AddRange(results);
|
||||
}
|
||||
|
||||
var uniqueResults = allResults.DistinctBy(r => r.Url).ToList();
|
||||
|
||||
if (uniqueResults.Count == 0)
|
||||
return "No search results found.";
|
||||
|
||||
onProgress?.Invoke($"[Found {uniqueResults.Count} unique results across all queries. Fetching and reading articles...]");
|
||||
var chunks = new List<Chunk>();
|
||||
|
||||
foreach (var result in uniqueResults)
|
||||
{
|
||||
try
|
||||
{
|
||||
var article = await ArticleService.FetchArticleAsync(result.Url);
|
||||
if (!article.IsReadable || string.IsNullOrEmpty(article.TextContent)) continue;
|
||||
var textChunks = ChunkingService.ChunkText(article.TextContent);
|
||||
|
||||
chunks.AddRange(textChunks.Select(chunkText => new Chunk(chunkText, result.Url, article.Title)));
|
||||
}
|
||||
catch
|
||||
{
|
||||
// ignored
|
||||
}
|
||||
}
|
||||
|
||||
if (chunks.Count == 0)
|
||||
return "Found search results but could not extract readable content.";
|
||||
|
||||
onProgress?.Invoke($"[Extracted {chunks.Count} text chunks. Generating embeddings for semantic search...]");
|
||||
var chunkTexts = chunks.Select(c => c.Content).ToList();
|
||||
var embeddings = await _embeddingService.GetEmbeddingsAsync(chunkTexts);
|
||||
|
||||
for (var i = 0; i < chunks.Count; i++)
|
||||
{
|
||||
chunks[i] = chunks[i] with { Embedding = embeddings[i] };
|
||||
}
|
||||
|
||||
var queryEmbedding = (await _embeddingService.GetEmbeddingsAsync([originalQuery]))[0];
|
||||
|
||||
foreach (var chunk in chunks)
|
||||
{
|
||||
chunk.Score = EmbeddingService.CosineSimilarity(queryEmbedding, chunk.Embedding!);
|
||||
}
|
||||
|
||||
var topChunks = chunks.OrderByDescending(c => c.Score).Take(topChunksLimit).ToList();
|
||||
|
||||
onProgress?.Invoke($"[Found top {topChunks.Count} most relevant chunks overall. Generating answer...]");
|
||||
var context = string.Join("\n\n", topChunks.Select((c, i) =>
|
||||
$"[Source {i + 1}: {c.Title ?? "Unknown"}]({c.SourceUrl})\n{c.Content}"));
|
||||
|
||||
return context;
|
||||
}
|
||||
|
||||
public static string Execute(string argumentsJson)
|
||||
{
|
||||
throw new InvalidOperationException("Use ExecuteAsync instead");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user