From 92eb6fcd5ce841a2b5a4e83d186b7e4ac496dea2 Mon Sep 17 00:00:00 2001 From: IgorAlymov Date: Tue, 6 Aug 2024 11:03:13 +0500 Subject: [PATCH] feat: added CharFilters for open search Refs: SITKO-CORE-T-23 --- .../OpenSearchHelper.cs | 63 +++++++++++++++++++ .../OpenSearchSearcher.cs | 24 +++++-- .../OpenSearchTests.cs | 20 ++++++ 3 files changed, 102 insertions(+), 5 deletions(-) create mode 100644 src/Sitko.Core.Search.OpenSearch/OpenSearchHelper.cs diff --git a/src/Sitko.Core.Search.OpenSearch/OpenSearchHelper.cs b/src/Sitko.Core.Search.OpenSearch/OpenSearchHelper.cs new file mode 100644 index 00000000..7b86a5a5 --- /dev/null +++ b/src/Sitko.Core.Search.OpenSearch/OpenSearchHelper.cs @@ -0,0 +1,63 @@ +namespace Sitko.Core.Search.OpenSearch; + +public static class OpenSearchHelper +{ + public static readonly string[] RusEnKeys = + [ + "a => ф", + "b => и", + "c => с", + "d => в", + "e => у", + "f => а", + "g => п", + "h => р", + "i => ш", + "j => о", + "k => л", + "l => д", + "m => ь", + "n => т", + "o => щ", + "p => з", + "r => к", + "s => ы", + "t => е", + "u => г", + "v => м", + "w => ц", + "x => ч", + "y => н", + "z => я", + "A => Ф", + "B => И", + "C => С", + "D => В", + "E => У", + "F => А", + "G => П", + "H => Р", + "I => Ш", + "J => О", + "K => Л", + "L => Д", + "M => Ь", + "N => Т", + "O => Щ", + "P => З", + "R => К", + "S => Ы", + "T => Е", + "U => Г", + "V => М", + "W => Ц", + "X => Ч", + "Y => Н", + "Z => Я", + "[ => х", + "] => ъ", + "; => ж", + "< => б", + "> => ю" + ]; +} diff --git a/src/Sitko.Core.Search.OpenSearch/OpenSearchSearcher.cs b/src/Sitko.Core.Search.OpenSearch/OpenSearchSearcher.cs index b6958594..2908a23c 100644 --- a/src/Sitko.Core.Search.OpenSearch/OpenSearchSearcher.cs +++ b/src/Sitko.Core.Search.OpenSearch/OpenSearchSearcher.cs @@ -15,7 +15,9 @@ public class OpenSearchSearcher( private OpenSearchModuleOptions Options => optionsMonitor.CurrentValue; private OpenSearchClient? client; private const string CustomAnalyze = "custom_analyze"; + private const string CustomCharFilterAnalyze = "char_filter_analyze"; private const string StemmerName = "custom_stemmer"; + private const string CustomCharFilter = "rus_en_key"; public async Task AddOrUpdateAsync(string indexName, IEnumerable searchModels, CancellationToken cancellationToken = default) @@ -264,12 +266,20 @@ private static SearchDescriptor GetSearchRequest(SearchDescriptor< } private AnalysisDescriptor CreateAnalysisDescriptor(AnalysisDescriptor a) => - a.Analyzers(aa => aa.Custom(CustomAnalyze, ca => ca - .Tokenizer("standard") - .Filters("lowercase", "stop", "snowball", StemmerName) + a.Analyzers(aa => + aa.Custom(CustomAnalyze, ca => ca + .Tokenizer("standard") + .Filters("lowercase", "stop", "snowball", StemmerName)) + .Custom(CustomCharFilterAnalyze, ca => ca + .Tokenizer("standard") + .Filters("lowercase", "stop") + .CharFilters(CustomCharFilter)) ) - ).TokenFilters(descriptor => - descriptor.Stemmer(StemmerName, filterDescriptor => filterDescriptor.Language(Options.CustomStemmer))); + .CharFilters(descriptor => + descriptor.Mapping(CustomCharFilter, + filterDescriptor => filterDescriptor.Mappings(OpenSearchHelper.RusEnKeys))) + .TokenFilters(descriptor => + descriptor.Stemmer(StemmerName, filterDescriptor => filterDescriptor.Language(Options.CustomStemmer))); private CreateIndexDescriptor CreateIndexDescriptor(CreateIndexDescriptor createIndexDescriptor) => createIndexDescriptor.Settings(s => s.Analysis(CreateAnalysisDescriptor)) @@ -279,6 +289,10 @@ private CreateIndexDescriptor CreateIndexDescriptor(CreateIndexDescriptor create .Name(n => n.Content) .Analyzer(CustomAnalyze) ) + .Text(t => t + .Name(n => n.Title) + .Analyzer(CustomCharFilterAnalyze) + ) ) ); } diff --git a/tests/Sitko.Core.Search.OpenSearch.Tests/OpenSearchTests.cs b/tests/Sitko.Core.Search.OpenSearch.Tests/OpenSearchTests.cs index 0401da72..1a217a29 100644 --- a/tests/Sitko.Core.Search.OpenSearch.Tests/OpenSearchTests.cs +++ b/tests/Sitko.Core.Search.OpenSearch.Tests/OpenSearchTests.cs @@ -175,6 +175,26 @@ public async Task SearchByNumbersTestAsync(int foundDocs, string searchText, Sea var result2 = await searchProvider.SearchAsync(searchText, 10, searchType); result2.Length.Should().Be(foundDocs); } + + [Fact] + public async Task IncorrectLayoutKeyboardTestAsync() + { + var scope = await GetScopeAsync(); + var provider = scope.GetService(); + var searchProvider = scope.GetService>(); + await searchProvider.DeleteIndexAsync(); + await searchProvider.InitAsync(); + + var firstModel = new TestModel { Title = "kolesa", Description = "MMI", Url = "/page/" }; + var secondModel = new TestModel { Title = "MMI", Description = "mmicentre", Url = "mmicentre" }; + provider.AddModel(firstModel).AddModel(secondModel); + + await searchProvider.AddOrUpdateEntitiesAsync(provider.Models.ToArray()); + await Task.Delay(TimeSpan.FromSeconds(5)); + + var result = await searchProvider.SearchAsync("лщдуыф", 10, SearchType.Wildcard); + result.Length.Should().Be(1); + } } public class OpenSearchTestScope : BaseTestScope