Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds custom vectorizer support #156

Merged
merged 9 commits into from
Jun 25, 2024
203 changes: 129 additions & 74 deletions docs/user_guide/vectorizers_04.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"2. HuggingFace\n",
"3. Vertex AI\n",
"4. Cohere\n",
"5. Bringing your own vectorizer\n",
"\n",
"Before running this notebook, be sure to\n",
"1. Have installed ``redisvl`` and have that environment active for this notebook.\n",
Expand Down Expand Up @@ -64,7 +65,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -89,16 +90,16 @@
{
"data": {
"text/plain": [
"[-0.001025049015879631,\n",
" -0.0030993607360869646,\n",
" 0.0024536605924367905,\n",
" -0.004484387580305338,\n",
" -0.010331203229725361,\n",
" 0.012700922787189484,\n",
" -0.005368996877223253,\n",
" -0.0029411641880869865,\n",
" -0.0070833307690918446,\n",
" -0.03386051580309868]"
"[-0.0010508307022973895,\n",
" -0.0031670420430600643,\n",
" 0.0023781107738614082,\n",
" -0.004539588466286659,\n",
" -0.010320774279534817,\n",
" 0.012868634425103664,\n",
" -0.0054513863287866116,\n",
" -0.002984359161928296,\n",
" -0.0072814482264220715,\n",
" -0.033704183995723724]"
]
},
"execution_count": 3,
Expand Down Expand Up @@ -128,16 +129,16 @@
{
"data": {
"text/plain": [
"[-0.01747742109000683,\n",
" -5.228330701356754e-05,\n",
" 0.0013870716793462634,\n",
" -0.025637786835432053,\n",
" -0.01985435001552105,\n",
" 0.016117358580231667,\n",
" -0.0037306349258869886,\n",
" 0.0008945261361077428,\n",
" 0.006577865686267614,\n",
" -0.025091219693422318]"
"[-0.01749197021126747,\n",
" -5.238811718299985e-05,\n",
" 0.0013331907102838159,\n",
" -0.025576923042535782,\n",
" -0.019907286390662193,\n",
" 0.016106342896819115,\n",
" -0.003756451653316617,\n",
" 0.0009971122490242124,\n",
" 0.006661186460405588,\n",
" -0.024954024702310562]"
]
},
"execution_count": 4,
Expand Down Expand Up @@ -189,7 +190,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -202,34 +203,21 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Vector dimensions: 1536\n"
"ename": "ValueError",
"evalue": "AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT environment variable.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mredisvl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvectorize\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AzureOpenAITextVectorizer\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# create a vectorizer\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m az_oai \u001b[38;5;241m=\u001b[39m \u001b[43mAzureOpenAITextVectorizer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdeployment_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Must be your CUSTOM deployment name\u001b[39;49;00m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mapi_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_key\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_version\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mazure_endpoint\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mazure_endpoint\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m test \u001b[38;5;241m=\u001b[39m az_oai\u001b[38;5;241m.\u001b[39membed(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis is a test sentence.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVector dimensions: \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mlen\u001b[39m(test))\n",
tylerhutcherson marked this conversation as resolved.
Show resolved Hide resolved
"File \u001b[0;32m~/Documents/redisvl/redisvl/utils/vectorize/text/azureopenai.py:70\u001b[0m, in \u001b[0;36mAzureOpenAITextVectorizer.__init__\u001b[0;34m(self, model, api_config)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28mself\u001b[39m, model: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext-embedding-ada-002\u001b[39m\u001b[38;5;124m\"\u001b[39m, api_config: Optional[Dict] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 56\u001b[0m ):\n\u001b[1;32m 57\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Initialize the AzureOpenAI vectorizer.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \n\u001b[1;32m 59\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[38;5;124;03m ValueError: If the AzureOpenAI API key, version, or endpoint are not provided.\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialize_clients\u001b[49m\u001b[43m(\u001b[49m\u001b[43mapi_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(model\u001b[38;5;241m=\u001b[39mmodel, dims\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_model_dims(model))\n",
"File \u001b[0;32m~/Documents/redisvl/redisvl/utils/vectorize/text/azureopenai.py:95\u001b[0m, in \u001b[0;36mAzureOpenAITextVectorizer._initialize_clients\u001b[0;34m(self, api_config)\u001b[0m\n\u001b[1;32m 88\u001b[0m azure_endpoint \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 89\u001b[0m api_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mazure_endpoint\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m api_config\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAZURE_OPENAI_ENDPOINT\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 92\u001b[0m )\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m azure_endpoint:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 96\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAzureOpenAI API endpoint is required. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mProvide it in api_config or set the AZURE_OPENAI_ENDPOINT\u001b[39m\u001b[38;5;130;01m\\\u001b[39;00m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;124m environment variable.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 99\u001b[0m )\n\u001b[1;32m 101\u001b[0m api_version \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 102\u001b[0m api_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapi_version\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m api_config\n\u001b[1;32m 104\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_VERSION\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 105\u001b[0m )\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m api_version:\n",
"\u001b[0;31mValueError\u001b[0m: AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT environment variable."
]
},
{
"data": {
"text/plain": [
"[-0.0010088568087667227,\n",
" -0.003142790636047721,\n",
" 0.0024922797456383705,\n",
" -0.004522906616330147,\n",
" -0.010369433090090752,\n",
" 0.012739036232233047,\n",
" -0.005365503951907158,\n",
" -0.0029668458737432957,\n",
" -0.007141091860830784,\n",
" -0.03383301943540573]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
Expand All @@ -252,27 +240,19 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[-0.017460526898503304,\n",
" -6.895032856846228e-05,\n",
" 0.0013909287517890334,\n",
" -0.025688467547297478,\n",
" -0.019813183695077896,\n",
" 0.016087085008621216,\n",
" -0.003729278687387705,\n",
" 0.0009211922879330814,\n",
" 0.006606514099985361,\n",
" -0.025128915905952454]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
"ename": "NameError",
"evalue": "name 'az_oai' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[6], line 8\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Just like OpenAI, AzureOpenAI supports batching embeddings and asynchronous requests.\u001b[39;00m\n\u001b[1;32m 2\u001b[0m sentences \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThat is a happy dog\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThat is a happy person\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mToday is a sunny day\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 6\u001b[0m ]\n\u001b[0;32m----> 8\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[43maz_oai\u001b[49m\u001b[38;5;241m.\u001b[39maembed_many(sentences)\n\u001b[1;32m 9\u001b[0m embeddings[\u001b[38;5;241m0\u001b[39m][:\u001b[38;5;241m10\u001b[39m]\n",
tylerhutcherson marked this conversation as resolved.
Show resolved Hide resolved
"\u001b[0;31mNameError\u001b[0m: name 'az_oai' is not defined"
]
}
],
"source": [
Expand Down Expand Up @@ -302,7 +282,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -349,7 +329,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -384,7 +364,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -438,7 +418,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -459,7 +439,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -500,6 +480,81 @@
"Learn more about using RedisVL and Cohere together through [this dedicated user guide](https://docs.cohere.com/docs/redis-and-cohere)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Custom Vectorizers\n",
"\n",
"RedisVL supports the use of other vectorizers and provides a class to enable compatibility with any function that generates a vector or vectors from string data"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from redisvl.utils.vectorize import CustomTextVectorizer\n",
"\n",
"def generate_embeddings(text_input):\n",
" return [0.1] * 768\n",
"\n",
" \n",
"custom_vectorizer = CustomTextVectorizer(generate_embeddings)\n",
"\n",
"custom_vectorizer.embed(\"This is a test sentence.\")[:10]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10:40:28 redisvl.index.index INFO Index already exists, not overwriting.\n"
]
},
{
"data": {
"text/plain": [
"[{'id': 'llmcache:78bd2446a37a0c6ab62652af9b7e53845145c4471ea83ff9fb4280a528d36bbb',\n",
" 'vector_distance': '6.13927841187e-06',\n",
" 'prompt': 'this is a test prompt',\n",
" 'response': 'this is a test response',\n",
" 'prompt_vector': '================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================'}]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# this enables the use of custom vectorizers with other RedisVL components\n",
"from redisvl.extensions.llmcache import SemanticCache\n",
"\n",
"#cache = SemanticCache(vectorizer=custom_vectorizer)\n",
"cache = SemanticCache()\n",
"\n",
"cache.store(\"this is a test prompt\", \"this is a test response\")\n",
"cache.check(\"this is also a test prompt\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -534,7 +589,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -552,7 +607,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand All @@ -571,7 +626,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -600,7 +655,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -658,7 +713,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.12.2"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
2 changes: 2 additions & 0 deletions redisvl/utils/vectorize/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from redisvl.utils.vectorize.base import BaseVectorizer
from redisvl.utils.vectorize.text.azureopenai import AzureOpenAITextVectorizer
from redisvl.utils.vectorize.text.cohere import CohereTextVectorizer
from redisvl.utils.vectorize.text.custom import CustomTextVectorizer
from redisvl.utils.vectorize.text.huggingface import HFTextVectorizer
from redisvl.utils.vectorize.text.openai import OpenAITextVectorizer
from redisvl.utils.vectorize.text.vertexai import VertexAITextVectorizer
Expand All @@ -12,4 +13,5 @@
"OpenAITextVectorizer",
"VertexAITextVectorizer",
"AzureOpenAITextVectorizer",
"CustomTextVectorizer",
]
Loading
Loading