From a7ab00ff4211a17371c4e5572b59d1fae0880993 Mon Sep 17 00:00:00 2001 From: charliehuang09 Date: Fri, 22 Dec 2023 22:02:22 -0800 Subject: [PATCH 1/7] added query.py Signed-off-by: charliehuang09 --- scripts/encode_data.py | 2 +- scripts/query.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 scripts/query.py diff --git a/scripts/encode_data.py b/scripts/encode_data.py index a65ced1..e6126f6 100644 --- a/scripts/encode_data.py +++ b/scripts/encode_data.py @@ -93,7 +93,7 @@ def _load_files(self) -> None: name, extension = os.path.splitext(full_path) match extension: - case ".pdf" | ".rst": + case ".pdf" | ".rst" | ".sh" | ".conf" | ".md": pass case _: # Links is a special case, its where we load arbitrary html diff --git a/scripts/query.py b/scripts/query.py new file mode 100644 index 0000000..05a44a0 --- /dev/null +++ b/scripts/query.py @@ -0,0 +1,31 @@ +import numpy as np + +from qdrant_client import QdrantClient + +from qdrant_client.http import models + +from openai import OpenAI + + +def main(): + client = QdrantClient("0.0.0.0", port=6333) + openai_client = OpenAI() + + model = "text-embedding-ada-002" + prompt = "" + embeddings = openai_client.embeddings.create( + input = prompt, + model=model + ) + tmp = embeddings.data[0].embedding + print(tmp, type(tmp)) + results = client.search( + collection_name="default", + query_vector=embeddings.data[0].embedding, + limit=3, + ) + print(results) + + +if __name__ == '__main__': + main() From 634c9e6f8243fa3dc6f50c3bbbf4cae553497e4d Mon Sep 17 00:00:00 2001 From: charliehuang09 Date: Sat, 23 Dec 2023 15:52:39 -0800 Subject: [PATCH 2/7] added query.ipynb Signed-off-by: charliehuang09 --- scripts/query.ipynb | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 scripts/query.ipynb diff --git a/scripts/query.ipynb b/scripts/query.ipynb new file mode 100644 index 0000000..aac33cf --- /dev/null +++ b/scripts/query.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from qdrant_client import QdrantClient\n", + "\n", + "from qdrant_client.http import models\n", + "\n", + "from openai import OpenAI\n", + "\n", + "client = QdrantClient(\"0.0.0.0\", port=6333)\n", + "openai_client = OpenAI()\n", + "\n", + "model = \"text-embedding-ada-002\"\n", + "prompt = \"How do I contribute to FRC?\"\n", + "collection_name = \"parent_document\"\n", + "embeddings = openai_client.embeddings.create(\n", + " input = prompt,\n", + " model=model\n", + ")\n", + "tmp = embeddings.data[0].embedding\n", + "print(tmp, type(tmp))\n", + "results = client.search(\n", + " collection_name=collection_name,\n", + " query_vector=embeddings.data[0].embedding,\n", + " limit=3,\n", + ") \n", + "print(results)\n", + "\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 7b2df3da1ebfabed2a2ac0647cc4557f986abac0 Mon Sep 17 00:00:00 2001 From: charliehuang09 Date: Sat, 23 Dec 2023 17:49:26 -0800 Subject: [PATCH 3/7] fix according to code review comments Signed-off-by: charliehuang09 --- scripts/encode_data.py | 2 +- scripts/query.py | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/scripts/encode_data.py b/scripts/encode_data.py index e6126f6..a65ced1 100644 --- a/scripts/encode_data.py +++ b/scripts/encode_data.py @@ -93,7 +93,7 @@ def _load_files(self) -> None: name, extension = os.path.splitext(full_path) match extension: - case ".pdf" | ".rst" | ".sh" | ".conf" | ".md": + case ".pdf" | ".rst": pass case _: # Links is a special case, its where we load arbitrary html diff --git a/scripts/query.py b/scripts/query.py index 05a44a0..14107fb 100644 --- a/scripts/query.py +++ b/scripts/query.py @@ -1,9 +1,5 @@ -import numpy as np - from qdrant_client import QdrantClient -from qdrant_client.http import models - from openai import OpenAI @@ -17,8 +13,6 @@ def main(): input = prompt, model=model ) - tmp = embeddings.data[0].embedding - print(tmp, type(tmp)) results = client.search( collection_name="default", query_vector=embeddings.data[0].embedding, From f41b18e360f567e91f94aa1859eede5a12534348 Mon Sep 17 00:00:00 2001 From: charliehuang09 Date: Sat, 23 Dec 2023 18:26:43 -0800 Subject: [PATCH 4/7] renmove query.ipynb Signed-off-by: charliehuang09 --- scripts/query.ipynb | 46 --------------------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 scripts/query.ipynb diff --git a/scripts/query.ipynb b/scripts/query.ipynb deleted file mode 100644 index aac33cf..0000000 --- a/scripts/query.ipynb +++ /dev/null @@ -1,46 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "from qdrant_client import QdrantClient\n", - "\n", - "from qdrant_client.http import models\n", - "\n", - "from openai import OpenAI\n", - "\n", - "client = QdrantClient(\"0.0.0.0\", port=6333)\n", - "openai_client = OpenAI()\n", - "\n", - "model = \"text-embedding-ada-002\"\n", - "prompt = \"How do I contribute to FRC?\"\n", - "collection_name = \"parent_document\"\n", - "embeddings = openai_client.embeddings.create(\n", - " input = prompt,\n", - " model=model\n", - ")\n", - "tmp = embeddings.data[0].embedding\n", - "print(tmp, type(tmp))\n", - "results = client.search(\n", - " collection_name=collection_name,\n", - " query_vector=embeddings.data[0].embedding,\n", - " limit=3,\n", - ") \n", - "print(results)\n", - "\n" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 7c8756f03084330e7b0bdf9e2586d2e4aabf947d Mon Sep 17 00:00:00 2001 From: charliehuang09 Date: Sat, 23 Dec 2023 18:37:37 -0800 Subject: [PATCH 5/7] reformat code Signed-off-by: charliehuang09 --- scripts/query.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/query.py b/scripts/query.py index 14107fb..a75ba81 100644 --- a/scripts/query.py +++ b/scripts/query.py @@ -12,10 +12,10 @@ def main(): embeddings = openai_client.embeddings.create( input = prompt, model=model - ) + ).data[0].embedding results = client.search( - collection_name="default", - query_vector=embeddings.data[0].embedding, + collection_name="1000", + query_vector=embeddings, limit=3, ) print(results) From 6ddc782fafd0a2287464e389d34c0b0098c52584 Mon Sep 17 00:00:00 2001 From: charliehuang09 Date: Sat, 23 Dec 2023 19:52:48 -0800 Subject: [PATCH 6/7] change 1000 to defualt (typo) Signed-off-by: charliehuang09 --- scripts/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/query.py b/scripts/query.py index a75ba81..a50e4cf 100644 --- a/scripts/query.py +++ b/scripts/query.py @@ -14,7 +14,7 @@ def main(): model=model ).data[0].embedding results = client.search( - collection_name="1000", + collection_name="default", query_vector=embeddings, limit=3, ) From f9ef290c8ec39047d7e95406d1cc53d748d0a21f Mon Sep 17 00:00:00 2001 From: charliehuang09 Date: Sun, 24 Dec 2023 18:05:01 -0800 Subject: [PATCH 7/7] add load_dotenv to query.py Signed-off-by: charliehuang09 --- scripts/query.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/query.py b/scripts/query.py index a50e4cf..0c3ff6b 100644 --- a/scripts/query.py +++ b/scripts/query.py @@ -2,8 +2,10 @@ from openai import OpenAI +from dotenv import load_dotenv def main(): + load_dotenv() client = QdrantClient("0.0.0.0", port=6333) openai_client = OpenAI()