Skip to content

Commit

Permalink
feat: added Highlighting to opensearch
Browse files Browse the repository at this point in the history
Refs: SITKO-CORE-T-24
  • Loading branch information
IgorAlymov committed Aug 7, 2024
1 parent 005ec9d commit 077aa30
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 42 deletions.
15 changes: 13 additions & 2 deletions src/Sitko.Core.Repository.Search/BaseRepositorySearchProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,22 @@ await repository.GetAllAsync(q => q.Paginate(page, batchSize).OrderBy(e => e.Id)
}
}

protected override Task<TEntity[]> GetEntitiesAsync(TSearchModel[] searchModels,
protected override async Task<(TEntity entity, TSearchModel searchResult)[]> GetEntitiesAsync(TSearchModel[] searchModels,
CancellationToken cancellationToken = default)
{
var ids = searchModels.Select(s => ParseId(s.Id)).Distinct().ToArray();
return repository.GetByIdsAsync(ids, cancellationToken);
var entities = await repository.GetByIdsAsync(ids, cancellationToken);
List<(TEntity, TSearchModel)> result = [];
foreach (var entity in entities)
{
var searchModel = searchModels.ToList().FirstOrDefault(model => model.Id == entity.Id.ToString());
if (searchModel != null)
{
result.Add((entity, searchModel));
}
}

return result.ToArray();
}

protected override string GetId(TEntity entity) =>
Expand Down
2 changes: 2 additions & 0 deletions src/Sitko.Core.Search.OpenSearch/OpenSearchModuleOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ public class OpenSearchModuleOptions : SearchModuleOptions
public bool EnableClientLogging { get; set; }
public bool DisableCertificatesValidation { get; set; }
public string CustomStemmer { get; set; } = "";
public string PreTags { get; set; } = "";
public string PostTags { get; set; } = "";
}

public class OpenSearchModuleOptionsValidator : AbstractValidator<OpenSearchModuleOptions>
Expand Down
22 changes: 16 additions & 6 deletions src/Sitko.Core.Search.OpenSearch/OpenSearchSearcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,17 @@ public async Task<TSearchModel[]> SearchAsync(string indexName, string term, int
SearchType searchType, CancellationToken cancellationToken = default)
{
indexName = $"{Options.Prefix}_{indexName}";
var results = await GetClient()
var searchResponse = await GetClient()
.SearchAsync<TSearchModel>(x => GetSearchRequest(x, indexName, term, searchType, limit), cancellationToken);
if (results.ServerError != null)
if (searchResponse.ServerError != null)
{
logger.LogError("Error while searching in {IndexName}: {ErrorText}", indexName, results.ServerError);
logger.LogError("Error while searching in {IndexName}: {ErrorText}", indexName, searchResponse.ServerError);
}

return results.Documents.ToArray();
var result = searchResponse.Hits.Select(h =>
(TSearchModel)new BaseSearchModel(h.Source.Id, h.Source.Title, h.Source.Content, h.Source.Url,
h.Source.Date, h.Highlight)).ToArray();
return result;
}

public async Task<TSearchModel[]> GetSimilarAsync(string indexName, string id, int limit,
Expand Down Expand Up @@ -241,7 +244,7 @@ private static string GetSearchText(string? term)
return names;
}

private static SearchDescriptor<TSearchModel> GetSearchRequest(SearchDescriptor<TSearchModel> descriptor,
private SearchDescriptor<TSearchModel> GetSearchRequest(SearchDescriptor<TSearchModel> descriptor,
string indexName, string term, SearchType searchType, int limit = 0)
{
var names = GetSearchText(term);
Expand All @@ -260,7 +263,14 @@ private static SearchDescriptor<TSearchModel> GetSearchRequest(SearchDescriptor<
break;
}

return descriptor.Sort(s => s.Descending(SortSpecialField.Score).Descending(model => model.Date))
return descriptor
.Highlight(h =>
h.Fields(fs => fs
.Field(p => p.Title)
.PreTags(Options.PreTags)
.PostTags(Options.PostTags))
)
.Sort(s => s.Descending(SortSpecialField.Score).Descending(model => model.Date))
.Size(limit > 0 ? limit : 20)
.Index(indexName.ToLowerInvariant());
}
Expand Down
12 changes: 6 additions & 6 deletions src/Sitko.Core.Search/BaseSearchProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
namespace Sitko.Core.Search;

[PublicAPI]
public abstract class BaseSearchProvider<T, TEntityPk, TSearchModel> : ISearchProvider<T, TEntityPk>
public abstract class BaseSearchProvider<T, TEntityPk, TSearchModel> : ISearchProvider<T, TEntityPk, TSearchModel>
where T : class where TSearchModel : BaseSearchModel
{
private readonly ISearcher<TSearchModel> searcher;
Expand All @@ -31,7 +31,7 @@ public Task<long> CountAsync(string term, CancellationToken cancellationToken =
public Task InitAsync(CancellationToken cancellationToken = default) =>
searcher.InitAsync(IndexName, cancellationToken);

public async Task<T[]> SearchAsync(string term, int limit, SearchType searchType, CancellationToken cancellationToken = default)
public async Task<(T entity, TSearchModel searchResult)[]> SearchAsync(string term, int limit, SearchType searchType, CancellationToken cancellationToken = default)
{
var result = await searcher.SearchAsync(IndexName, term, limit, searchType, cancellationToken);
return await LoadEntities(result, cancellationToken);
Expand All @@ -44,7 +44,7 @@ public async Task<TEntityPk[]> GetIdsAsync(string term, int limit, SearchType se
return result.Select(m => ParseId(m.Id)).ToArray();
}

public async Task<T[]> GetSimilarAsync(string id, int limit, CancellationToken cancellationToken = default)
public async Task<(T entity, TSearchModel searchResult)[]> GetSimilarAsync(string id, int limit, CancellationToken cancellationToken = default)
{
var result = await searcher.GetSimilarAsync(IndexName, id, limit, cancellationToken);
return await LoadEntities(result, cancellationToken);
Expand Down Expand Up @@ -74,17 +74,17 @@ await searcher.DeleteAsync(IndexName, await GetSearchModelsAsync(entities, cance

protected abstract TEntityPk ParseId(string id);

protected virtual async Task<T[]> LoadEntities(TSearchModel[] searchModels,
protected virtual async Task<(T entity, TSearchModel searchResult)[]> LoadEntities(TSearchModel[] searchModels,
CancellationToken cancellationToken = default)
{
var entities = await GetEntitiesAsync(searchModels, cancellationToken);
return entities.OrderBy(e => Array.FindIndex(searchModels, model => model.Id == GetId(e))).ToArray();
return entities.OrderBy(e => Array.FindIndex(searchModels, model => model.Id == GetId(e.entity))).ToArray();
}

protected abstract Task<TSearchModel[]> GetSearchModelsAsync(T[] entities,
CancellationToken cancellationToken = default);

protected abstract Task<T[]> GetEntitiesAsync(TSearchModel[] searchModels,
protected abstract Task<(T entity, TSearchModel searchResult)[]> GetEntitiesAsync(TSearchModel[] searchModels,
CancellationToken cancellationToken = default);

protected abstract string GetId(T entity);
Expand Down
38 changes: 30 additions & 8 deletions src/Sitko.Core.Search/ISearchProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,40 @@ public interface ISearchProvider
Task InitAsync(CancellationToken cancellationToken = default);
}

public interface ISearchProvider<TEntity, TEntityPk> : ISearchProvider where TEntity : class
public interface ISearchProvider<TEntity> : ISearchProvider where TEntity : class
{
Task<TEntity[]> SearchAsync(string term, int limit, SearchType searchType, CancellationToken cancellationToken = default);
Task<TEntityPk[]> GetIdsAsync(string term, int limit, SearchType searchType, CancellationToken cancellationToken = default);
Task<TEntity[]> GetSimilarAsync(string id, int limit, CancellationToken cancellationToken = default);

Task<TEntityPk[]> GetSimilarIdsAsync(string id, int limit,
CancellationToken cancellationToken = default);

Task AddOrUpdateEntityAsync(TEntity entity, CancellationToken cancellationToken = default);
Task<bool> AddOrUpdateEntitiesAsync(TEntity[] entities, CancellationToken cancellationToken = default);
Task<bool> DeleteEntityAsync(TEntity entity, CancellationToken cancellationToken = default);
Task<bool> DeleteEntitiesAsync(TEntity[] entities, CancellationToken cancellationToken = default);
}

public interface ISearchProvider<TEntity, TEntityPk> : ISearchProvider<TEntity> where TEntity : class
{
Task<TEntity[]> SearchAsync(string term, int limit, SearchType searchType,
CancellationToken cancellationToken = default);

Task<TEntityPk[]> GetIdsAsync(string term, int limit, SearchType searchType,
CancellationToken cancellationToken = default);

Task<TEntity[]> GetSimilarAsync(string id, int limit, CancellationToken cancellationToken = default);

Task<TEntityPk[]> GetSimilarIdsAsync(string id, int limit,
CancellationToken cancellationToken = default);
}

public interface ISearchProvider<TEntity, TEntityPk, TSearchModel> : ISearchProvider<TEntity>
where TEntity : class where TSearchModel : BaseSearchModel
{
Task<(TEntity entity, TSearchModel searchResult)[]> SearchAsync(string term, int limit, SearchType searchType,
CancellationToken cancellationToken = default);

Task<TEntityPk[]> GetIdsAsync(string term, int limit, SearchType searchType,
CancellationToken cancellationToken = default);

Task<(TEntity entity, TSearchModel searchResult)[]> GetSimilarAsync(string id, int limit,
CancellationToken cancellationToken = default);

Task<TEntityPk[]> GetSimilarIdsAsync(string id, int limit,
CancellationToken cancellationToken = default);
}
6 changes: 4 additions & 2 deletions src/Sitko.Core.Search/SearchModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ namespace Sitko.Core.Search;

public class BaseSearchModel
{
public BaseSearchModel(string id, string title, string url, string content, DateTimeOffset date)
public BaseSearchModel(string id, string title, string url, string content, DateTimeOffset date,
IReadOnlyDictionary<string, IReadOnlyCollection<string>>? highlight = null)
{
Highlight = highlight;
Id = id;
Title = title;
Url = url;
Expand All @@ -16,5 +18,5 @@ public BaseSearchModel(string id, string title, string url, string content, Date
public string Url { get; set; }
public DateTimeOffset Date { get; set; }
public string Content { get; set; }
public IReadOnlyDictionary<string, IReadOnlyCollection<string>>? Highlight { get; set; }
}

7 changes: 4 additions & 3 deletions src/Sitko.Core.Search/SearchModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,11 @@ public override async Task InitAsync(IApplicationContext applicationContext, ISe

public static class SearchModuleExtensions
{
public static IServiceCollection RegisterSearchProvider<TSearchProvider, TEntity, TEntityPk>(
public static IServiceCollection RegisterSearchProvider<TSearchProvider, TEntity, TEntityPk, TSearchModel>(
this IServiceCollection serviceCollection)
where TSearchProvider : class, ISearchProvider<TEntity, TEntityPk>
where TEntity : class =>
where TSearchProvider : class, ISearchProvider<TEntity, TEntityPk, TSearchModel>
where TEntity : class
where TSearchModel : BaseSearchModel =>
serviceCollection.Scan(a => a.FromType<TSearchProvider>().AsSelfWithInterfaces());
}

Expand Down
17 changes: 14 additions & 3 deletions tests/Sitko.Core.Search.ElasticSearch.Tests/ElasticSearchTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ protected override IHostApplicationBuilder ConfigureApplication(IHostApplication
});

hostBuilder.Services.AddSingleton<TestModelProvider>();
hostBuilder.Services.RegisterSearchProvider<TestSearchProvider, TestModel, Guid>();
hostBuilder.Services.RegisterSearchProvider<TestSearchProvider, TestModel, Guid, BaseSearchModel>();
return hostBuilder;
}
}
Expand Down Expand Up @@ -91,11 +91,22 @@ protected override Task<BaseSearchModel[]> GetSearchModelsAsync(TestModel[] enti
Task.FromResult(entities
.Select(e => new BaseSearchModel(e.Id.ToString(), e.Title, e.Url, e.Description, e.Date)).ToArray());

protected override Task<TestModel[]> GetEntitiesAsync(BaseSearchModel[] searchModels,
protected override Task<(TestModel entity, BaseSearchModel searchResult)[]> GetEntitiesAsync(BaseSearchModel[] searchModels,
CancellationToken cancellationToken = default)
{
var ids = searchModels.Select(m => Guid.Parse(m.Id));
return Task.FromResult(testModelProvider.Models.Where(m => ids.Contains(m.Id)).ToArray());
var entities = testModelProvider.Models.Where(m => ids.Contains(m.Id));
List<(TestModel, BaseSearchModel)> result = [];
foreach (var entity in entities)
{
var searchModel = searchModels.ToList().FirstOrDefault(model => model.Id == entity.Id.ToString());
if (searchModel != null)
{
result.Add((entity, searchModel));
}
}

return Task.FromResult(result.ToArray());
}

protected override string GetId(TestModel entity) => entity.Id.ToString();
Expand Down
62 changes: 50 additions & 12 deletions tests/Sitko.Core.Search.OpenSearch.Tests/OpenSearchTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public async Task SearchAsync()
{
var scope = await GetScopeAsync();
var provider = scope.GetService<TestModelProvider>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid>>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid, BaseSearchModel>>();
await searchProvider.DeleteIndexAsync();
await searchProvider.InitAsync();

Expand All @@ -40,7 +40,6 @@ public async Task SearchAsync()
await Task.Delay(TimeSpan.FromSeconds(5));
var result = await searchProvider.SearchAsync("samsung", 10, SearchType.Morphology);
result.Length.Should().Be(provider.Models.Count);
result.First().Id.Should().Be(barModel.Id);
}

[Theory(DisplayName = "MorphologyRusTest")]
Expand All @@ -53,7 +52,7 @@ public async Task MorphologyRusTestAsync(int foundDocs, string searchText)
{
var scope = await GetScopeAsync();
var provider = scope.GetService<TestModelProvider>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid>>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid, BaseSearchModel>>();
await searchProvider.DeleteIndexAsync();
await searchProvider.InitAsync();

Expand All @@ -78,7 +77,7 @@ public async Task MorphologyEngTestAsync()
{
var scope = await GetScopeAsync();
var provider = scope.GetService<TestModelProvider>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid>>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid, BaseSearchModel>>();
await searchProvider.DeleteIndexAsync();
await searchProvider.InitAsync();

Expand All @@ -105,7 +104,7 @@ public async Task PartialSearchEngTestAsync(int foundDocs, string searchText)
{
var scope = await GetScopeAsync();
var provider = scope.GetService<TestModelProvider>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid>>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid, BaseSearchModel>>();
await searchProvider.DeleteIndexAsync();
await searchProvider.InitAsync();

Expand All @@ -132,7 +131,7 @@ public async Task PartialSearchRusTestAsync(int foundDocs, string searchText)
{
var scope = await GetScopeAsync();
var provider = scope.GetService<TestModelProvider>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid>>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid, BaseSearchModel>>();
await searchProvider.DeleteIndexAsync();
await searchProvider.InitAsync();

Expand All @@ -157,7 +156,7 @@ public async Task SearchByNumbersTestAsync(int foundDocs, string searchText, Sea
{
var scope = await GetScopeAsync();
var provider = scope.GetService<TestModelProvider>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid>>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid, BaseSearchModel>>();
await searchProvider.DeleteIndexAsync();
await searchProvider.InitAsync();
var firstGuid = Guid.Parse("dd134352-da92-4cd2-9c" + searchText + "-440be713aba5");
Expand All @@ -181,7 +180,7 @@ public async Task IncorrectLayoutKeyboardTestAsync()
{
var scope = await GetScopeAsync();
var provider = scope.GetService<TestModelProvider>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid>>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid, BaseSearchModel>>();
await searchProvider.DeleteIndexAsync();
await searchProvider.InitAsync();

Expand All @@ -195,6 +194,31 @@ public async Task IncorrectLayoutKeyboardTestAsync()
var result = await searchProvider.SearchAsync("лщдуыф", 10, SearchType.Wildcard);
result.Length.Should().Be(1);
}

[Fact]
public async Task HighlightingTestAsync()
{
var scope = await GetScopeAsync();
var provider = scope.GetService<TestModelProvider>();
var searchProvider = scope.GetService<ISearchProvider<TestModel, Guid, BaseSearchModel>>();
await searchProvider.DeleteIndexAsync();
await searchProvider.InitAsync();

var firstModel = new TestModel
{
Title = "Геймеры играют в компьютерные игры.", Description = "MMI", Url = "mmicentre"
};
var secondModel = new TestModel { Title = "MMI", Description = "mmicentre", Url = "mmicentre" };
provider.AddModel(firstModel).AddModel(secondModel);

await searchProvider.AddOrUpdateEntitiesAsync(provider.Models.ToArray());
await Task.Delay(TimeSpan.FromSeconds(5));

var result = await searchProvider.SearchAsync("играют", 10, SearchType.Morphology);
result.Length.Should().Be(1);
result.First().searchResult.Highlight.Count.Should().Be(1);
result.First().searchResult.Highlight.First().Value.Contains("<span class='highlight'>");
}
}

public class OpenSearchTestScope : BaseTestScope
Expand All @@ -212,10 +236,12 @@ protected override IHostApplicationBuilder ConfigureApplication(IHostApplication
moduleOptions.InitProviders = false;
moduleOptions.DisableCertificatesValidation = true;
moduleOptions.CustomStemmer = "russian";
moduleOptions.PreTags = "<span class='highlight'>";
moduleOptions.PreTags = "</span>";
});

hostBuilder.Services.AddSingleton<TestModelProvider>();
hostBuilder.Services.RegisterSearchProvider<TestSearchProvider, TestModel, Guid>();
hostBuilder.Services.RegisterSearchProvider<TestSearchProvider, TestModel, Guid, BaseSearchModel>();
return hostBuilder;
}
}
Expand All @@ -240,13 +266,25 @@ public class TestSearchProvider(
protected override Task<BaseSearchModel[]> GetSearchModelsAsync(TestModel[] entities,
CancellationToken cancellationToken = default) =>
Task.FromResult(entities
.Select(e => new BaseSearchModel(e.Id.ToString(), e.Title, e.Url, e.Description, e.Date)).ToArray());
.Select(e => new BaseSearchModel(e.Id.ToString(), e.Title, e.Url, e.Description, e.Date))
.ToArray());

protected override Task<TestModel[]> GetEntitiesAsync(BaseSearchModel[] searchModels,
protected override Task<(TestModel entity, BaseSearchModel searchResult)[]> GetEntitiesAsync(BaseSearchModel[] searchModels,
CancellationToken cancellationToken = default)
{
var ids = searchModels.Select(m => Guid.Parse(m.Id));
return Task.FromResult(testModelProvider.Models.Where(m => ids.Contains(m.Id)).ToArray());
var entities = testModelProvider.Models.Where(m => ids.Contains(m.Id));
List<(TestModel, BaseSearchModel)> result = [];
foreach (var entity in entities)
{
var searchModel = searchModels.ToList().FirstOrDefault(model => model.Id == entity.Id.ToString());
if (searchModel != null)
{
result.Add((entity, searchModel));
}
}

return Task.FromResult(result.ToArray());
}

protected override string GetId(TestModel entity) => entity.Id.ToString();
Expand Down

0 comments on commit 077aa30

Please sign in to comment.