From b91e62e25248cd91448865103cc4665ed24f3415 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Mon, 8 Jul 2024 13:17:55 -0400 Subject: [PATCH 1/7] add clear method --- redisvl/index/index.py | 43 ++++++++++++++++++++++++++++++++++++++- redisvl/query/__init__.py | 10 +++++++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/redisvl/index/index.py b/redisvl/index/index.py index 1dfe2fe9..e0f9d2c4 100644 --- a/redisvl/index/index.py +++ b/redisvl/index/index.py @@ -23,7 +23,8 @@ from redis.commands.search.indexDefinition import IndexDefinition from redisvl.index.storage import HashStorage, JsonStorage -from redisvl.query.query import BaseQuery, CountQuery, FilterQuery +from redisvl.query import BaseQuery, CountQuery, FilterQuery +from redisvl.query.filter import FilterExpression from redisvl.redis.connection import ( RedisConnectionFactory, convert_index_info_to_schema, @@ -476,6 +477,26 @@ def delete(self, drop: bool = True): except: logger.exception("Error while deleting index") + def clear(self) -> int: + """Clear all keys in Redis associated with the index, leaving the index + available and in-place for future insertions or updates. + + Returns: + int: Count of records deleted from Redis. + """ + # Track deleted records + total_records_deleted: int = 0 + + # Paginate using queries and delete in batches + for batch in self.paginate( + FilterQuery(FilterExpression("*"), return_fields=["id"]), page_size=500 + ): + batch_keys = [record["id"] for record in batch] + self._redis_client.delete(*batch_keys) # type: ignore + total_records_deleted += len(batch_keys) + + return total_records_deleted + def load( self, data: Iterable[Any], @@ -894,6 +915,26 @@ async def delete(self, drop: bool = True): logger.exception("Error while deleting index") raise + async def clear(self) -> int: + """Clear all keys in Redis associated with the index, leaving the index + available and in-place for future insertions or updates. + + Returns: + int: Count of records deleted from Redis. + """ + # Track deleted records + total_records_deleted: int = 0 + + # Paginate using queries and delete in batches + async for batch in self.paginate( + FilterQuery(FilterExpression("*"), return_fields=["id"]), page_size=500 + ): + batch_keys = [record["id"] for record in batch] + await self._redis_client.delete(*batch_keys) # type: ignore + total_records_deleted += len(batch_keys) + + return total_records_deleted + async def load( self, data: Iterable[Any], diff --git a/redisvl/query/__init__.py b/redisvl/query/__init__.py index 16227f22..68182e0f 100644 --- a/redisvl/query/__init__.py +++ b/redisvl/query/__init__.py @@ -1,3 +1,9 @@ -from redisvl.query.query import CountQuery, FilterQuery, RangeQuery, VectorQuery +from redisvl.query.query import ( + BaseQuery, + CountQuery, + FilterQuery, + RangeQuery, + VectorQuery, +) -__all__ = ["VectorQuery", "FilterQuery", "RangeQuery", "CountQuery"] +__all__ = ["BaseQuery", "VectorQuery", "FilterQuery", "RangeQuery", "CountQuery"] From ad9c20e14fbd89f000cfbe93c54a7bb6b0a6e754 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Mon, 8 Jul 2024 13:18:12 -0400 Subject: [PATCH 2/7] update extension classes to use the new clear --- redisvl/extensions/llmcache/semantic.py | 5 +---- redisvl/extensions/session_manager/semantic_session.py | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/redisvl/extensions/llmcache/semantic.py b/redisvl/extensions/llmcache/semantic.py index 3956e7d6..023c1916 100644 --- a/redisvl/extensions/llmcache/semantic.py +++ b/redisvl/extensions/llmcache/semantic.py @@ -176,10 +176,7 @@ def set_vectorizer(self, vectorizer: BaseVectorizer) -> None: def clear(self) -> None: """Clear the cache of all keys while preserving the index.""" - with self._index.client.pipeline(transaction=False) as pipe: # type: ignore - for key in self._index.client.scan_iter(match=f"{self._index.prefix}:*"): # type: ignore - pipe.delete(key) - pipe.execute() + self._index.clear() def delete(self) -> None: """Clear the semantic cache of all keys and remove the underlying search diff --git a/redisvl/extensions/session_manager/semantic_session.py b/redisvl/extensions/session_manager/semantic_session.py index 6cfa9dc6..6ce253d5 100644 --- a/redisvl/extensions/session_manager/semantic_session.py +++ b/redisvl/extensions/session_manager/semantic_session.py @@ -130,10 +130,7 @@ def set_scope( def clear(self) -> None: """Clears the chat session history.""" - with self._index.client.pipeline(transaction=False) as pipe: # type: ignore - for key in self._index.client.scan_iter(match=f"{self._index.prefix}:*"): # type: ignore - pipe.delete(key) - pipe.execute() + self._index.clear() def delete(self) -> None: """Clear all conversation keys and remove the search index.""" From 1002b85010274cf4b0f7a22e42e1a8ceee7b4b78 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Mon, 8 Jul 2024 13:18:21 -0400 Subject: [PATCH 3/7] update tests and docs --- docs/user_guide/getting_started_01.ipynb | 35 ++++++++++++++++++++++-- schemas/schema.yaml | 2 +- tests/integration/test_flow.py | 7 +++++ tests/integration/test_flow_async.py | 7 +++++ 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/docs/user_guide/getting_started_01.ipynb b/docs/user_guide/getting_started_01.ipynb index 8fc40a0e..3daa609d 100644 --- a/docs/user_guide/getting_started_01.ipynb +++ b/docs/user_guide/getting_started_01.ipynb @@ -653,13 +653,44 @@ "## Cleanup" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below we will clean up after our work. First, you can optionally flush all data from Redis associated with the index by\n", + "using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n", + "\n", + "But if you want to clean up everything, including the index, just use `.delete()`\n", + "which will by default remove the index AND the underlying data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# (optionally) clear all data from Redis associated with the index\n", + "await index.clear()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# but the index is still in place\n", + "await index.exists()" + ] + }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "# clean up the index\n", + "# remove / delete the index in its entirety\n", "await index.delete()" ] } @@ -680,7 +711,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.10.14" }, "orig_nbformat": 4, "vscode": { diff --git a/schemas/schema.yaml b/schemas/schema.yaml index cc1ad287..3c852da7 100644 --- a/schemas/schema.yaml +++ b/schemas/schema.yaml @@ -9,7 +9,7 @@ index: fields: - name: user type: tag - path: '.user' + path: '$.user' - name: credit_score type: tag path: '$.credit_score' diff --git a/tests/integration/test_flow.py b/tests/integration/test_flow.py index 5bbda187..538b02d3 100644 --- a/tests/integration/test_flow.py +++ b/tests/integration/test_flow.py @@ -90,4 +90,11 @@ def hash_preprocess(item: dict) -> dict: for field in return_fields: assert getattr(doc1, field) == doc2[field] + count_deleted_keys = index.clear() + assert count_deleted_keys == len(sample_data) + + assert index.exists() == True + index.delete() + + assert index.exists() == False diff --git a/tests/integration/test_flow_async.py b/tests/integration/test_flow_async.py index 11762068..9f0c77cc 100644 --- a/tests/integration/test_flow_async.py +++ b/tests/integration/test_flow_async.py @@ -93,4 +93,11 @@ async def hash_preprocess(item: dict) -> dict: for field in return_fields: assert getattr(doc1, field) == doc2[field] + count_deleted_keys = await index.clear() + assert count_deleted_keys == len(sample_data) + + assert await index.exists() == True + await index.delete() + + assert await index.exists() == False From c4d35279717bdf6b68686e698226c67d0c6e335f Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Mon, 8 Jul 2024 21:43:39 -0400 Subject: [PATCH 4/7] temp comment out mistral --- tests/integration/test_vectorizers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index 69d7ec62..24f5224a 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -27,7 +27,7 @@ def skip_vectorizer() -> bool: VertexAITextVectorizer, CohereTextVectorizer, AzureOpenAITextVectorizer, - MistralAITextVectorizer, + # MistralAITextVectorizer, CustomTextVectorizer, ] ) From 835f5f95daa0a80ab1db6f511d9f0b3d4ca14e2d Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Mon, 8 Jul 2024 22:44:33 -0400 Subject: [PATCH 5/7] updates --- tests/integration/test_vectorizers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index 24f5224a..baa8a62b 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -218,7 +218,11 @@ def bad_return_type(text: str) -> str: @pytest.fixture( - params=[OpenAITextVectorizer, MistralAITextVectorizer, CustomTextVectorizer] + params=[ + OpenAITextVectorizer, + # MistralAITextVectorizer, + CustomTextVectorizer, + ] ) def avectorizer(request, skip_vectorizer): if skip_vectorizer: From cfccb37c7982ca5734d1834f46766483f853ad0b Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Mon, 8 Jul 2024 23:28:23 -0400 Subject: [PATCH 6/7] one more updte --- docs/user_guide/vectorizers_04.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/user_guide/vectorizers_04.ipynb b/docs/user_guide/vectorizers_04.ipynb index 24768b0d..f424e032 100644 --- a/docs/user_guide/vectorizers_04.ipynb +++ b/docs/user_guide/vectorizers_04.ipynb @@ -531,14 +531,14 @@ } ], "source": [ - "from redisvl.utils.vectorize import MistralAITextVectorizer\n", + "# from redisvl.utils.vectorize import MistralAITextVectorizer\n", "\n", - "mistral = MistralAITextVectorizer()\n", + "# mistral = MistralAITextVectorizer()\n", "\n", - "# mebed a sentence using their asyncronous method\n", - "test = await mistral.aembed(\"This is a test sentence.\")\n", - "print(\"Vector dimensions: \", len(test))\n", - "print(test[:10])" + "# # embed a sentence using their asyncronous method\n", + "# test = await mistral.aembed(\"This is a test sentence.\")\n", + "# print(\"Vector dimensions: \", len(test))\n", + "# print(test[:10])" ] }, { From d8ab61a0c4aeb77ea291819c05b1992981254c14 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Tue, 9 Jul 2024 10:13:15 -0400 Subject: [PATCH 7/7] updates --- redisvl/index/index.py | 8 ++++---- tests/integration/test_async_search_index.py | 12 ++++++++++++ tests/integration/test_search_index.py | 11 +++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/redisvl/index/index.py b/redisvl/index/index.py index e0f9d2c4..25b907f8 100644 --- a/redisvl/index/index.py +++ b/redisvl/index/index.py @@ -492,8 +492,8 @@ def clear(self) -> int: FilterQuery(FilterExpression("*"), return_fields=["id"]), page_size=500 ): batch_keys = [record["id"] for record in batch] - self._redis_client.delete(*batch_keys) # type: ignore - total_records_deleted += len(batch_keys) + record_deleted = self._redis_client.delete(*batch_keys) # type: ignore + total_records_deleted += record_deleted # type: ignore return total_records_deleted @@ -930,8 +930,8 @@ async def clear(self) -> int: FilterQuery(FilterExpression("*"), return_fields=["id"]), page_size=500 ): batch_keys = [record["id"] for record in batch] - await self._redis_client.delete(*batch_keys) # type: ignore - total_records_deleted += len(batch_keys) + records_deleted = await self._redis_client.delete(*batch_keys) # type: ignore + total_records_deleted += records_deleted # type: ignore return total_records_deleted diff --git a/tests/integration/test_async_search_index.py b/tests/integration/test_async_search_index.py index b4f57cf8..41e5a03a 100644 --- a/tests/integration/test_async_search_index.py +++ b/tests/integration/test_async_search_index.py @@ -172,6 +172,18 @@ async def test_search_index_delete(async_client, async_index): ) +@pytest.mark.asyncio +async def test_search_index_clear(async_client, async_index): + async_index.set_client(async_client) + await async_index.create(overwrite=True, drop=True) + data = [{"id": "1", "test": "foo"}] + await async_index.load(data, id_field="id") + + count = await async_index.clear() + assert count == len(data) + assert await async_index.exists() + + @pytest.mark.asyncio async def test_search_index_load_and_fetch(async_client, async_index): async_index.set_client(async_client) diff --git a/tests/integration/test_search_index.py b/tests/integration/test_search_index.py index 5243e6ba..1c215dc9 100644 --- a/tests/integration/test_search_index.py +++ b/tests/integration/test_search_index.py @@ -159,6 +159,17 @@ def test_search_index_delete(client, index): assert index.name not in convert_bytes(index.client.execute_command("FT._LIST")) +def test_search_index_clear(client, index): + index.set_client(client) + index.create(overwrite=True, drop=True) + data = [{"id": "1", "test": "foo"}] + index.load(data, id_field="id") + + count = index.clear() + assert count == len(data) + assert index.exists() + + def test_search_index_load_and_fetch(client, index): index.set_client(client) index.create(overwrite=True, drop=True)