From 9e4992594e204c055122aeeb5dfd76bd85d82750 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 15:50:30 +0100
Subject: [PATCH 01/18] Removed stopword remover from lexicon

---
 src/senaite/core/api/catalog.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/senaite/core/api/catalog.py b/src/senaite/core/api/catalog.py
index 4d94a88736..8d534e2f37 100644
--- a/src/senaite/core/api/catalog.py
+++ b/src/senaite/core/api/catalog.py
@@ -149,8 +149,7 @@ def add_zc_text_index(catalog, index, lex_id="Lexicon", indexed_attrs=None):
         # create the lexicon first
         splitter = Splitter()
         casenormalizer = CaseNormalizer()
-        stopwordremover = StopWordAndSingleCharRemover()
-        pipeline = [splitter, casenormalizer, stopwordremover]
+        pipeline = [splitter, casenormalizer]
         lexicon = PLexicon(lex_id, "Lexicon", *pipeline)
         catalog._setObject(lex_id, lexicon)
 

From 81c9b2f4f2ca6328cb52b1393aa21260b190d0bf Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 15:50:51 +0100
Subject: [PATCH 02/18] Explicit values for listing_searchable_text index

---
 src/senaite/core/catalog/indexer/sample.py | 30 ++++++++++++----------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/src/senaite/core/catalog/indexer/sample.py b/src/senaite/core/catalog/indexer/sample.py
index 299d95bf35..db9f31b358 100644
--- a/src/senaite/core/catalog/indexer/sample.py
+++ b/src/senaite/core/catalog/indexer/sample.py
@@ -21,8 +21,6 @@
 from bika.lims import api
 from bika.lims.interfaces import IAnalysisRequest
 from plone.indexer import indexer
-from senaite.core.catalog import SAMPLE_CATALOG
-from senaite.core.catalog.utils import get_searchable_text_tokens
 from senaite.core.interfaces import ISampleCatalog
 
 
@@ -91,20 +89,26 @@ def is_received(instance):
 
 @indexer(IAnalysisRequest, ISampleCatalog)
 def listing_searchable_text(instance):
-    """Retrieves all the values of metadata columns in the catalog for
-    wildcard searches
-    :return: all metadata values joined in a string
+    """Retrieves most commonly searched values for samples
+
+    :returns: string with search terms
     """
     entries = set()
-    catalog = SAMPLE_CATALOG
 
-    # add searchable text tokens for the root sample
-    tokens = get_searchable_text_tokens(instance, catalog)
-    entries.update(tokens)
+    for obj in [instance] + instance.getDescendants():
+        entries.add(obj.getId())
+        entries.add(obj.getClientOrderNumber())
+        entries.add(obj.getClientReference())
+        entries.add(obj.getClientSampleID())
+
+        # we use this approach to bypass the computed fields
+        sampletype = obj.getSampleType()
+        entries.add(sampletype.Title() if sampletype else '')
+
+        samplepoint = obj.getSamplePoint()
+        entries.add(samplepoint.Title() if samplepoint else '')
 
-    # add searchable text tokens for descendant samples
-    for descendant in instance.getDescendants():
-        tokens = get_searchable_text_tokens(descendant, catalog)
-        entries.update(tokens)
+        batch = obj.getBatch()
+        entries.add(batch.getBatchID() if batch else '')
 
     return u" ".join(list(entries))

From 0f142d54b064ef3ea215a3dc9d1d67d650b33244 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 15:51:22 +0100
Subject: [PATCH 03/18] Upgrade step added

---
 .../core/profiles/default/metadata.xml        |  2 +-
 src/senaite/core/upgrade/v02_05_000.py        | 21 +++++++++++++++++++
 src/senaite/core/upgrade/v02_05_000.zcml      |  8 +++++++
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/senaite/core/profiles/default/metadata.xml b/src/senaite/core/profiles/default/metadata.xml
index 3da64c6b1f..9975121cb2 100644
--- a/src/senaite/core/profiles/default/metadata.xml
+++ b/src/senaite/core/profiles/default/metadata.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <metadata>
-  <version>2500</version>
+  <version>2501</version>
   <dependencies>
     <dependency>profile-Products.ATContentTypes:base</dependency>
     <dependency>profile-Products.CMFEditions:CMFEditions</dependency>
diff --git a/src/senaite/core/upgrade/v02_05_000.py b/src/senaite/core/upgrade/v02_05_000.py
index 275ff97da7..30290a7241 100644
--- a/src/senaite/core/upgrade/v02_05_000.py
+++ b/src/senaite/core/upgrade/v02_05_000.py
@@ -18,7 +18,12 @@
 # Copyright 2018-2023 by it's authors.
 # Some rights reserved, see README and LICENSE.
 
+from bika.lims import api
 from senaite.core import logger
+from senaite.core.api.catalog import add_index
+from senaite.core.api.catalog import del_index
+from senaite.core.api.catalog import reindex_index
+from senaite.core.catalog import SAMPLE_CATALOG
 from senaite.core.config import PROJECTNAME as product
 from senaite.core.upgrade import upgradestep
 from senaite.core.upgrade.utils import UpgradeUtils
@@ -44,3 +49,19 @@ def upgrade(tool):
 
     logger.info("{0} upgraded to version {1}".format(product, version))
     return True
+
+
+def rebuild_sample_zctext_index_and_lexicon(tool):
+    """Recreate sample listing_searchable_text ZCText index and Lexicon
+    """
+    # remove the existing index
+    index = "listing_searchable_text"
+    del_index(SAMPLE_CATALOG, index)
+    # remove the Lexicon
+    catalog = api.get_tool(SAMPLE_CATALOG)
+    if "Lexicon" in catalog.objectIds():
+        catalog.manage_delObjects("Lexicon")
+    # recreate the index + lexicon
+    add_index(SAMPLE_CATALOG, index, "ZCTextIndex")
+    # reindex
+    reindex_index(SAMPLE_CATALOG, index)
diff --git a/src/senaite/core/upgrade/v02_05_000.zcml b/src/senaite/core/upgrade/v02_05_000.zcml
index 03f96c3355..e5067eac7c 100644
--- a/src/senaite/core/upgrade/v02_05_000.zcml
+++ b/src/senaite/core/upgrade/v02_05_000.zcml
@@ -3,6 +3,14 @@
     xmlns:genericsetup="http://namespaces.zope.org/genericsetup"
     i18n_domain="senaite.core">
 
+  <genericsetup:upgradeStep
+      title="SENAITE.CORE 2.5.0: Recreate listing_searchable_text ZCText index and Lexicon in Sample Catalog"
+      description="Rebuild listing_searchable_text and Lexicon for better performance"
+      source="2500"
+      destination="2501"
+      handler=".v02_05_000.rebuild_sample_zctext_index_and_lexicon"
+      profile="senaite.core:default"/>
+
   <genericsetup:upgradeStep
       title="Upgrade to SENAITE.CORE 2.5.0"
       source="2423"

From a8e32f9db5f6ad50e6ca1d18b12d1b26c6bd8ca0 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 15:54:09 +0100
Subject: [PATCH 04/18] Removed unused import

---
 src/senaite/core/api/catalog.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/senaite/core/api/catalog.py b/src/senaite/core/api/catalog.py
index 8d534e2f37..af963cf316 100644
--- a/src/senaite/core/api/catalog.py
+++ b/src/senaite/core/api/catalog.py
@@ -28,7 +28,6 @@
 from Products.CMFPlone.UnicodeSplitter import CaseNormalizer
 from Products.CMFPlone.UnicodeSplitter import Splitter
 from Products.ZCatalog.interfaces import IZCatalog
-from Products.ZCTextIndex.Lexicon import StopWordAndSingleCharRemover
 from Products.ZCTextIndex.ZCTextIndex import PLexicon
 
 

From 7d2772306f44a8bebf3a73aa2019c6bf1a99e92c Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 16:10:47 +0100
Subject: [PATCH 05/18] Index also client ID and Name

---
 src/senaite/core/catalog/indexer/sample.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/senaite/core/catalog/indexer/sample.py b/src/senaite/core/catalog/indexer/sample.py
index db9f31b358..a3bd80da43 100644
--- a/src/senaite/core/catalog/indexer/sample.py
+++ b/src/senaite/core/catalog/indexer/sample.py
@@ -102,6 +102,10 @@ def listing_searchable_text(instance):
         entries.add(obj.getClientSampleID())
 
         # we use this approach to bypass the computed fields
+        client = obj.getClient()
+        entries.add(client.getName())
+        entries.add(client.getClientID())
+
         sampletype = obj.getSampleType()
         entries.add(sampletype.Title() if sampletype else '')
 

From 9354a674acae52eb145ee1de255309c5d746c19a Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 16:18:49 +0100
Subject: [PATCH 06/18] Changelog updated

---
 CHANGES.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index e6d6b20a6f..6b486dd92d 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,6 +4,7 @@ Changelog
 2.5.0 (unreleased)
 ------------------
 
+- #2273 Improve performance for sample listing index
 - #2272 Allow to configure the position of additional value columns
 
 

From f900ebc25f4b07a1e53b657b1f5ac897a62c1de5 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 16:48:34 +0100
Subject: [PATCH 07/18] Ensure unicodes

---
 src/senaite/core/catalog/indexer/sample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/senaite/core/catalog/indexer/sample.py b/src/senaite/core/catalog/indexer/sample.py
index a3bd80da43..ab7a05c05a 100644
--- a/src/senaite/core/catalog/indexer/sample.py
+++ b/src/senaite/core/catalog/indexer/sample.py
@@ -115,4 +115,4 @@ def listing_searchable_text(instance):
         batch = obj.getBatch()
         entries.add(batch.getBatchID() if batch else '')
 
-    return u" ".join(list(entries))
+    return u" ".join(map(api.safe_unicode, entries))

From f816f94f35f7d7df38711f1de59ad59b35890ecb Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 16:52:28 +0100
Subject: [PATCH 08/18] Use getId instead

---
 src/senaite/core/catalog/indexer/sample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/senaite/core/catalog/indexer/sample.py b/src/senaite/core/catalog/indexer/sample.py
index ab7a05c05a..d7ea9e7d84 100644
--- a/src/senaite/core/catalog/indexer/sample.py
+++ b/src/senaite/core/catalog/indexer/sample.py
@@ -113,6 +113,6 @@ def listing_searchable_text(instance):
         entries.add(samplepoint.Title() if samplepoint else '')
 
         batch = obj.getBatch()
-        entries.add(batch.getBatchID() if batch else '')
+        entries.add(batch.getId() if batch else '')
 
     return u" ".join(map(api.safe_unicode, entries))

From d41d9b6deb5ee66debee476e1ceb404e46908ae9 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 17:26:21 +0100
Subject: [PATCH 09/18] Do not split on `-`

---
 src/senaite/core/api/catalog.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/senaite/core/api/catalog.py b/src/senaite/core/api/catalog.py
index af963cf316..25b5eb8616 100644
--- a/src/senaite/core/api/catalog.py
+++ b/src/senaite/core/api/catalog.py
@@ -249,8 +249,8 @@ def append_op_after(index, token, tokens):
     # convert to unicode
     term = safe_unicode(qs)
 
-    # splits the string on all non alphanumeric characters
-    tokens = re.split(r"[^\w]", term, flags=re.U | re.I)
+    # splits the string on all non alphanumeric characters except -
+    tokens = re.split(r"[^\w-]", term, flags=re.U | re.I)
 
     # filter out all empty tokens
     tokens = filter(None, tokens)

From 2d6098be6ccd2d4e5cec7c452f245da39f2d7287 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 17:38:20 +0100
Subject: [PATCH 10/18] Allow certain characters

---
 src/senaite/core/api/catalog.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/senaite/core/api/catalog.py b/src/senaite/core/api/catalog.py
index 25b5eb8616..2258975d26 100644
--- a/src/senaite/core/api/catalog.py
+++ b/src/senaite/core/api/catalog.py
@@ -249,8 +249,8 @@ def append_op_after(index, token, tokens):
     # convert to unicode
     term = safe_unicode(qs)
 
-    # splits the string on all non alphanumeric characters except -
-    tokens = re.split(r"[^\w-]", term, flags=re.U | re.I)
+    # splits the string on unsupported characters
+    tokens = re.split(r"[^\w\-\_\.\%\<\>]", term, flags=re.U | re.I)
 
     # filter out all empty tokens
     tokens = filter(None, tokens)

From ec4dfc967543640e3b3ec736604198400b86a2d6 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 17:43:06 +0100
Subject: [PATCH 11/18] Fixed test

---
 .../core/tests/doctests/API_catalog.rst       | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/senaite/core/tests/doctests/API_catalog.rst b/src/senaite/core/tests/doctests/API_catalog.rst
index 316a262f8f..cab9730ca2 100644
--- a/src/senaite/core/tests/doctests/API_catalog.rst
+++ b/src/senaite/core/tests/doctests/API_catalog.rst
@@ -176,7 +176,7 @@ Searching for multiple unicode words:
 Searching for a concatenated word:
 
     >>> capi.to_searchable_text_qs("H2O-0001")
-    u'H2O* AND 0001*'
+    u'H2O-0001*'
 
 Searching for two words:
 
@@ -195,17 +195,29 @@ All wildcards are removed and replaced with `*` to avoid parse errors:
 
 Search with special characters:
 
+    >>> capi.to_searchable_text_qs("H2O_0001")
+    u'H2O_0001*'
+
+    >>> capi.to_searchable_text_qs("H2O.0001")
+    u'H2O.0001*'
+
+    >>> capi.to_searchable_text_qs("H2O<>0001")
+    u'H2O<>0001*'
+
+    >>> capi.to_searchable_text_qs("H2O%0001")
+    u'H2O%0001*'
+
     >>> capi.to_searchable_text_qs("'H2O-0001'")
-    u'H2O* AND 0001*'
+    u'H2O-0001*'
 
     >>> capi.to_searchable_text_qs("\'H2O-0001\'")
-    u'H2O* AND 0001*'
+    u'H2O-0001*'
 
     >>> capi.to_searchable_text_qs("(H2O-0001)*")
-    u'H2O* AND 0001*'
+    u'H2O-0001*'
 
     >>> capi.to_searchable_text_qs("****([H2O-0001])****")
-    u'H2O* AND 0001*'
+    u'H2O-0001*'
 
     >>> capi.to_searchable_text_qs("********************")
     u''

From 2a44590643c44fd0030494d83803ae8f3b530e61 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Sun, 12 Mar 2023 18:04:28 +0100
Subject: [PATCH 12/18] Allow more special characters

---
 src/senaite/core/api/catalog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/senaite/core/api/catalog.py b/src/senaite/core/api/catalog.py
index 2258975d26..beecfb5376 100644
--- a/src/senaite/core/api/catalog.py
+++ b/src/senaite/core/api/catalog.py
@@ -250,7 +250,7 @@ def append_op_after(index, token, tokens):
     term = safe_unicode(qs)
 
     # splits the string on unsupported characters
-    tokens = re.split(r"[^\w\-\_\.\%\<\>]", term, flags=re.U | re.I)
+    tokens = re.split(r"[^\w\-\_\.\%\<\>\+\{\}]", term, flags=re.U | re.I)
 
     # filter out all empty tokens
     tokens = filter(None, tokens)

From 760e9d72e21c965124e5a93cdd5a40d5740b2e8d Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Mon, 13 Mar 2023 10:05:20 +0100
Subject: [PATCH 13/18] Do not split on : and /

---
 src/senaite/core/api/catalog.py                 | 2 +-
 src/senaite/core/tests/doctests/API_catalog.rst | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/senaite/core/api/catalog.py b/src/senaite/core/api/catalog.py
index beecfb5376..de1ce79685 100644
--- a/src/senaite/core/api/catalog.py
+++ b/src/senaite/core/api/catalog.py
@@ -250,7 +250,7 @@ def append_op_after(index, token, tokens):
     term = safe_unicode(qs)
 
     # splits the string on unsupported characters
-    tokens = re.split(r"[^\w\-\_\.\%\<\>\+\{\}]", term, flags=re.U | re.I)
+    tokens = re.split(r"[^\w\-\_\.\%\<\>\+\{\}\:\/]", term, flags=re.U | re.I)
 
     # filter out all empty tokens
     tokens = filter(None, tokens)
diff --git a/src/senaite/core/tests/doctests/API_catalog.rst b/src/senaite/core/tests/doctests/API_catalog.rst
index cab9730ca2..bbb517b306 100644
--- a/src/senaite/core/tests/doctests/API_catalog.rst
+++ b/src/senaite/core/tests/doctests/API_catalog.rst
@@ -207,6 +207,12 @@ Search with special characters:
     >>> capi.to_searchable_text_qs("H2O%0001")
     u'H2O%0001*'
 
+    >>> capi.to_searchable_text_qs("H2O:0001")
+    u'H2O:0001*'
+
+    >>> capi.to_searchable_text_qs("H2O/0001")
+    u'H2O/0001*'
+
     >>> capi.to_searchable_text_qs("'H2O-0001'")
     u'H2O-0001*'
 
@@ -231,7 +237,7 @@ Search with special characters:
     >>> capi.to_searchable_text_qs("*H2O*")
     u'H2O*'
 
-    >>> capi.to_searchable_text_qs("And the question is: AND OR maybe NOT AND")
+    >>> capi.to_searchable_text_qs("And the question is AND OR maybe NOT AND")
     u'the* AND question* AND is* AND OR maybe* AND NOT*'
 
     >>> capi.to_searchable_text_qs("AND OR")

From 0d15ca4177f7cd8f3067a834f584c60089fd6741 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Mon, 13 Mar 2023 10:53:21 +0100
Subject: [PATCH 14/18] Support for wildcards

---
 src/senaite/core/api/catalog.py               | 22 ++++++++++--
 .../core/tests/doctests/API_catalog.rst       | 35 +++++++++++++------
 2 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/src/senaite/core/api/catalog.py b/src/senaite/core/api/catalog.py
index de1ce79685..1f8f8f7541 100644
--- a/src/senaite/core/api/catalog.py
+++ b/src/senaite/core/api/catalog.py
@@ -226,6 +226,7 @@ def to_searchable_text_qs(qs, op="AND", wildcard=True):
     :returns: sarchable text string
     """
     OPERATORS = ["AND", "OR"]
+    WILDCARDS = ["*", "?"]
 
     if op not in OPERATORS:
         op = "AND"
@@ -235,6 +236,9 @@ def to_searchable_text_qs(qs, op="AND", wildcard=True):
     def is_op(token):
         return token.upper() in OPERATORS
 
+    def is_wc(char):
+        return char in WILDCARDS
+
     def append_op_after(index, token, tokens):
         # do not append an operator after the last token
         if index == len(tokens) - 1:
@@ -249,8 +253,19 @@ def append_op_after(index, token, tokens):
     # convert to unicode
     term = safe_unicode(qs)
 
+    # Wildcards at the beginning are not allowed and therefore removed!
+    first_char = term[0] if len(term) > 0 else ""
+    if is_wc(first_char):
+        term = term.replace(first_char, "", 1)
+
     # splits the string on unsupported characters
-    tokens = re.split(r"[^\w\-\_\.\%\<\>\+\{\}\:\/]", term, flags=re.U | re.I)
+    regex = r"[^\w\-\_\.\%\<\>\+\{\}\:\/\?\$]"
+
+    # allow only words when searching just a single character
+    if len(term) == 1:
+        regex = r"[^\w]"
+
+    tokens = re.split(regex, term, flags=re.U | re.I)
 
     # filter out all empty tokens
     tokens = filter(None, tokens)
@@ -267,13 +282,16 @@ def append_op_after(index, token, tokens):
 
     for num, token in enumerate(tokens):
 
+        # retain wildcards at the end of a token
+        last_token_char = token[-1] if len(token) > 0 else ""
+
         # append operators without changes and continue
         if is_op(token):
             parts.append(token.upper())
             continue
 
         # append wildcard to token
-        if wildcard and not is_op(token):
+        if wildcard and not is_op(token) and not is_wc(last_token_char):
             token = token + "*"
 
         # append the token
diff --git a/src/senaite/core/tests/doctests/API_catalog.rst b/src/senaite/core/tests/doctests/API_catalog.rst
index bbb517b306..6321c6e1c4 100644
--- a/src/senaite/core/tests/doctests/API_catalog.rst
+++ b/src/senaite/core/tests/doctests/API_catalog.rst
@@ -163,6 +163,30 @@ Without wildcard:
     >>> capi.to_searchable_text_qs("sample", wildcard=False)
     u'sample'
 
+Wildcards at the beginning of the searchterms are not supported:
+
+    >>> capi.to_searchable_text_qs("?H2O")
+    u'H2O*'
+
+    >>> capi.to_searchable_text_qs("*H2O")
+    u'H2O*'
+
+Wildcards at the end of the searchterms are retained:
+
+    >>> capi.to_searchable_text_qs("H2O?")
+    u'H2O?'
+
+    >>> capi.to_searchable_text_qs("H2O*")
+    u'H2O*'
+
+If the search contains only a single character, it needs to be a word:
+
+    >>> capi.to_searchable_text_qs("W")
+    u'W*'
+
+    >>> capi.to_searchable_text_qs("$")
+    u''
+
 Searching for a unicode word:
 
     >>> capi.to_searchable_text_qs("AäOöUüZ")
@@ -188,11 +212,6 @@ Tricky query strings (with and/or in words or in between):
     >>> capi.to_searchable_text_qs("Fresh and Funky Oranges from Andorra")
     u'Fresh* AND Funky* AND Oranges* AND from* AND Andorra*'
 
-All wildcards are removed and replaced with `*` to avoid parse errors:
-
-    >>> capi.to_searchable_text_qs("Ca? OR Mg?")
-    u'Ca* OR Mg*'
-
 Search with special characters:
 
     >>> capi.to_searchable_text_qs("H2O_0001")
@@ -228,12 +247,6 @@ Search with special characters:
     >>> capi.to_searchable_text_qs("********************")
     u''
 
-    >>> capi.to_searchable_text_qs("????????????????????")
-    u''
-
-    >>> capi.to_searchable_text_qs("?H2O?")
-    u'H2O*'
-
     >>> capi.to_searchable_text_qs("*H2O*")
     u'H2O*'
 

From d01075c6f5791f634a3211d8240fc8031eaaaf7e Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Mon, 13 Mar 2023 12:07:09 +0100
Subject: [PATCH 15/18] URL unquote the searchterm first

---
 src/senaite/core/api/catalog.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/senaite/core/api/catalog.py b/src/senaite/core/api/catalog.py
index 1f8f8f7541..0098fe34c2 100644
--- a/src/senaite/core/api/catalog.py
+++ b/src/senaite/core/api/catalog.py
@@ -21,6 +21,7 @@
 import re
 
 import six
+from six.moves.urllib.parse import unquote_plus
 
 from bika.lims.api import APIError
 from bika.lims.api import get_tool
@@ -251,7 +252,7 @@ def append_op_after(index, token, tokens):
         return True
 
     # convert to unicode
-    term = safe_unicode(qs)
+    term = unquote_plus(safe_unicode(qs))
 
     # Wildcards at the beginning are not allowed and therefore removed!
     first_char = term[0] if len(term) > 0 else ""
@@ -260,6 +261,8 @@ def append_op_after(index, token, tokens):
 
     # splits the string on unsupported characters
     regex = r"[^\w\-\_\.\%\<\>\+\{\}\:\/\?\$]"
+    # splits the string on all characters that do not match the regex
+    regex = r"[^\w\-\_\.\%\<\>\+\{\}\:\/\?\$\"]"
 
     # allow only words when searching just a single character
     if len(term) == 1:

From f78b9dd1db07a37c763d71ed33736a9ed1bb4157 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Mon, 13 Mar 2023 12:08:34 +0100
Subject: [PATCH 16/18] Fix regex

---
 src/senaite/core/api/catalog.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/senaite/core/api/catalog.py b/src/senaite/core/api/catalog.py
index 0098fe34c2..0f9cc0fadb 100644
--- a/src/senaite/core/api/catalog.py
+++ b/src/senaite/core/api/catalog.py
@@ -259,10 +259,8 @@ def append_op_after(index, token, tokens):
     if is_wc(first_char):
         term = term.replace(first_char, "", 1)
 
-    # splits the string on unsupported characters
-    regex = r"[^\w\-\_\.\%\<\>\+\{\}\:\/\?\$]"
     # splits the string on all characters that do not match the regex
-    regex = r"[^\w\-\_\.\%\<\>\+\{\}\:\/\?\$\"]"
+    regex = r"[^\w\-\_\.\<\>\+\{\}\:\/\?\$\"]"
 
     # allow only words when searching just a single character
     if len(term) == 1:

From 23f7aace94c6efc68fd2cd8f198204d540e1c94b Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Mon, 13 Mar 2023 12:09:10 +0100
Subject: [PATCH 17/18] Percentages are filtered out

---
 src/senaite/core/tests/doctests/API_catalog.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/senaite/core/tests/doctests/API_catalog.rst b/src/senaite/core/tests/doctests/API_catalog.rst
index 6321c6e1c4..5c791893fa 100644
--- a/src/senaite/core/tests/doctests/API_catalog.rst
+++ b/src/senaite/core/tests/doctests/API_catalog.rst
@@ -223,9 +223,6 @@ Search with special characters:
     >>> capi.to_searchable_text_qs("H2O<>0001")
     u'H2O<>0001*'
 
-    >>> capi.to_searchable_text_qs("H2O%0001")
-    u'H2O%0001*'
-
     >>> capi.to_searchable_text_qs("H2O:0001")
     u'H2O:0001*'
 

From d7e68d0b79d16fb79f377bc4f41fe47e6f746235 Mon Sep 17 00:00:00 2001
From: Ramon Bartl <rb@ridingbytes.com>
Date: Mon, 13 Mar 2023 12:12:35 +0100
Subject: [PATCH 18/18] No literals

---
 src/senaite/core/api/catalog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/senaite/core/api/catalog.py b/src/senaite/core/api/catalog.py
index 0f9cc0fadb..4e1156c78c 100644
--- a/src/senaite/core/api/catalog.py
+++ b/src/senaite/core/api/catalog.py
@@ -260,7 +260,7 @@ def append_op_after(index, token, tokens):
         term = term.replace(first_char, "", 1)
 
     # splits the string on all characters that do not match the regex
-    regex = r"[^\w\-\_\.\<\>\+\{\}\:\/\?\$\"]"
+    regex = r"[^\w\-\_\.\<\>\+\{\}\:\/\?\$]"
 
     # allow only words when searching just a single character
     if len(term) == 1: