redis · tylerhutcherson · Aug 26, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024
diff --git a/redisvl/extensions/llmcache/semantic.py b/redisvl/extensions/llmcache/semantic.py
@@ -37,6 +37,7 @@ def __init__(
         redis_client: Optional[Redis] = None,
         redis_url: str = "redis://localhost:6379",
         connection_kwargs: Dict[str, Any] = {},
+        overwrite: bool = False,
         **kwargs,
     ):
         """Semantic Cache for Large Language Models.
@@ -57,11 +58,14 @@ def __init__(
             redis_url (str, optional): The redis url. Defaults to redis://localhost:6379.
             connection_kwargs (Dict[str, Any]): The connection arguments
                 for the redis client. Defaults to empty {}.
+            overwrite (bool): Whether or not to force overwrite the schema for
+                the semantic cache index. Defaults to false.
 
         Raises:
             TypeError: If an invalid vectorizer is provided.
             TypeError: If the TTL value is not an int.
             ValueError: If the threshold is not between 0 and 1.
+            ValueError: If existing schema does not match new schema and overwrite is False.
         """
         super().__init__(ttl)
 
@@ -99,10 +103,23 @@ def __init__(
         elif redis_url:
             self._index.connect(redis_url=redis_url, **connection_kwargs)
 
+        # Check for existing cache index
+        if not overwrite and self._index.exists():
+            existing_index = SearchIndex.from_existing(
+                name, redis_client=self._index.client
+            )
+            if existing_index.schema != self._index.schema:
+                raise ValueError(
+                    f"Existing index {name} schema does not match the user provided schema for the semantic cache. "
+                    "If you wish to overwrite the index schema, set overwrite=True during initialization."
+                )
+
         # Initialize other components
         self._set_vectorizer(vectorizer)
         self.set_threshold(distance_threshold)
-        self._index.create(overwrite=False)
+
+        # Create the index
+        self._index.create(overwrite=overwrite, drop=False)
 
     def _modify_schema(
         self,

diff --git a/tests/integration/test_llmcache.py b/tests/integration/test_llmcache.py
@@ -513,3 +513,45 @@ def test_complex_filters(cache_with_filters):
         "prompt 1", filter_expression=combined_filter, num_results=5
     )
     assert len(results) == 1
+
+
+def test_index_updating(redis_url):
+    cache_no_tags = SemanticCache(
+        name="test_cache",
+        redis_url=redis_url,
+    )
+
+    cache_no_tags.store(
+        prompt="this prompt has tags",
+        response="this response has tags",
+        filters={"some_tag": "abc"},
+    )
+
+    # filterable_fields not defined in schema, so no tags will match
+    tag_filter = Tag("some_tag") == "abc"
+
+    response = cache_no_tags.check(
+        prompt="this prompt has a tag",
+        filter_expression=tag_filter,
+    )
+    assert response == []
+
+    with pytest.raises(ValueError):
+        cache_with_tags = SemanticCache(
+            name="test_cache",
+            redis_url=redis_url,
+            filterable_fields=[{"name": "some_tag", "type": "tag"}],
+        )
+
+    cache_overwrite = SemanticCache(
+        name="test_cache",
+        redis_url=redis_url,
+        filterable_fields=[{"name": "some_tag", "type": "tag"}],
+        overwrite=True,
+    )
+
+    response = cache_overwrite.check(
+        prompt="this prompt has a tag",
+        filter_expression=tag_filter,
+    )
+    assert len(response) == 1