initial release
This commit is contained in:
32
Services/ChunkingService.cs
Normal file
32
Services/ChunkingService.cs
Normal file
@@ -0,0 +1,32 @@
|
||||
namespace OpenQuery.Services;
|
||||
|
||||
public static class ChunkingService
|
||||
{
|
||||
private const int MAX_CHUNK_SIZE = 500;
|
||||
|
||||
public static List<string> ChunkText(string text)
|
||||
{
|
||||
var chunks = new List<string>();
|
||||
var start = 0;
|
||||
|
||||
while (start < text.Length)
|
||||
{
|
||||
var length = Math.Min(MAX_CHUNK_SIZE, text.Length - start);
|
||||
|
||||
if (start + length < text.Length)
|
||||
{
|
||||
var lastSpace = text.LastIndexOfAny([' ', '\n', '\r', '.', '!'], start + length, length);
|
||||
if (lastSpace > start)
|
||||
length = lastSpace - start + 1;
|
||||
}
|
||||
|
||||
var chunk = text.Substring(start, length).Trim();
|
||||
if (!string.IsNullOrEmpty(chunk))
|
||||
chunks.Add(chunk);
|
||||
|
||||
start += length;
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user