18 Star 133 Fork 63

编程语言算法集 / C-Sharp

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
InvertedIndex.cs 2.42 KB
一键复制 编辑 原始数据 按行查看 历史
Gerson Jr 提交于 2024-01-08 14:18 . Switch to file-scoped namespaces (#434)
using System.Collections.Generic;
using System.Linq;
namespace DataStructures;
/// <summary>
/// Inverted index is the simplest form of document indexing,
/// allowing performing boolean queries on text data.
///
/// This realization is just simplified for better understanding the process of indexing
/// and working on straightforward string inputs.
/// </summary>
public class InvertedIndex
{
private readonly Dictionary<string, List<string>> invertedIndex = new();
/// <summary>
/// Build inverted index with source name and source content.
/// </summary>
/// <param name="sourceName">Name of the source.</param>
/// <param name="sourceContent">Content of the source.</param>
public void AddToIndex(string sourceName, string sourceContent)
{
var context = sourceContent.Split(' ').Distinct();
foreach (var word in context)
{
if (!invertedIndex.ContainsKey(word))
{
invertedIndex.Add(word, new List<string> { sourceName });
}
else
{
invertedIndex[word].Add(sourceName);
}
}
}
/// <summary>
/// Returns the source names contains ALL terms inside at same time.
/// </summary>
/// <param name="terms">List of terms.</param>
/// <returns>Source names.</returns>
public IEnumerable<string> And(IEnumerable<string> terms)
{
var entries = terms
.Select(term => invertedIndex
.Where(x => x.Key.Equals(term))
.SelectMany(x => x.Value))
.ToList();
var intersection = entries
.Skip(1)
.Aggregate(new HashSet<string>(entries.First()), (hashSet, enumerable) =>
{
hashSet.IntersectWith(enumerable);
return hashSet;
});
return intersection;
}
/// <summary>
/// Returns the source names contains AT LEAST ONE from terms inside.
/// </summary>
/// <param name="terms">List of terms.</param>
/// <returns>Source names.</returns>
public IEnumerable<string> Or(IEnumerable<string> terms)
{
var sources = new List<string>();
foreach (var term in terms)
{
var source = invertedIndex
.Where(x => x.Key.Equals(term))
.SelectMany(x => x.Value);
sources.AddRange(source);
}
return sources.Distinct();
}
}
C#
1
https://gitee.com/TheAlgorithms/C-Sharp.git
git@gitee.com:TheAlgorithms/C-Sharp.git
TheAlgorithms
C-Sharp
C-Sharp
master

搜索帮助