langchain-ai · hbmartin · Jul 12, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
diff --git a/docs/docs/integrations/vectorstores/objective.ipynb b/docs/docs/integrations/vectorstores/objective.ipynb
@@ -0,0 +1,321 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "5eabe1d5-39b4-4273-9fa3-e66e8bb6225f",
+   "metadata": {},
+   "source": [
+    "# Objective\n",
+    "\n",
+    "This notebook covers how to get started with the **Objective** vector store.\n",
+    "\n",
+    "Objective is an engine for building modern, AI-native search.\n",
+    "\n",
+    "Key features include:\n",
+    "\n",
+    "* Semantic, automatically understanding natural language queries, synonyms, typos, and multiple languages.\n",
+    "* Hybrid, capable of exact matching and approximate matching in one API without requiring you to develop lexical / keyword matching as well as vector-based approximate nearest neighbors (ANN) engines.\n",
+    "* Deep, surfacing relevant Highlights from different media by going inside the content and pulling out relevant bits."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e941ba03-1da9-4c94-b938-275754030368",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "To use Objective be sure to install the latest `langchain-community` with `pip install -qU langchain-community`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "82b0111c-1427-4b7f-b0b9-61fbcd75a300",
+   "metadata": {},
+   "source": [
+    "### Credentials\n",
+    "\n",
+    "Next you'll need to [sign up](https://app.objective.inc/) and copy your [API key](https://app.objective.inc/dashboard).\n",
+    "\n",
+    "The easiest step to configure your key is to store it in the `OBJECTIVE_KEY` environment variable. Otherwise you may use it directly below"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23358317-74db-474a-8805-449f0db1e60c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "objective_key = os.getenv(\"OBJECTIVE_KEY\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1bec221c-3bd0-42da-838e-518fa10de6cd",
+   "metadata": {},
+   "source": [
+    "## Initialization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2d015c15-26b2-4ae5-bdf4-dd575692b00c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.vectorstores import Objective\n",
+    "\n",
+    "vector_store = Objective(objective_key)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "32aac0d0-dcf9-4210-aa82-4dd35d91c784",
+   "metadata": {},
+   "source": [
+    "## Manage vector store"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b47a0e29-b2cf-4e2b-b054-3eca88e090c9",
+   "metadata": {},
+   "source": [
+    "### Add items to vector store"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8811bcf7-d164-4c88-b127-b2e4f1547c8b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.documents import Document\n",
+    "\n",
+    "document_1 = Document(page_content=\"foo\", metadata={\"source\": \"https://example.com\"})\n",
+    "\n",
+    "document_2 = Document(page_content=\"bar\", metadata={\"source\": \"https://example.com\"})\n",
+    "\n",
+    "document_3 = Document(page_content=\"baz\", metadata={\"source\": \"https://example.com\"})\n",
+    "\n",
+    "documents = [document_1, document_2, document_3]\n",
+    "\n",
+    "vector_store.add_documents(documents=documents, ids=[\"1\", \"2\", \"3\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "09cfc02e-d976-4f2b-b47d-73bf2a313140",
+   "metadata": {},
+   "source": [
+    "### Update items in vector store"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ff48c681-1667-4815-9310-c12f0e5bcba2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "updated_document = Document(\n",
+    "    id=\"1\", page_content=\"qux\", metadata={\"source\": \"https://another-example.com\"}\n",
+    ")\n",
+    "\n",
+    "vector_store.upsert([updated_document])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e18d812a-5d27-4fcd-b734-75793b5beb75",
+   "metadata": {},
+   "source": [
+    "### Delete items from vector store"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "227d98da-4acc-4b1e-a70f-a53fdadac693",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vector_store.delete(ids=[\"3\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "09d0ddc3-1587-4f9e-8313-ce3d1e10bd7f",
+   "metadata": {},
+   "source": [
+    "## Create a searchable index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d51e9358-ef3b-4fbe-b2cf-8e07b2439bea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "index_id = vector_store.create_index()\n",
+    "print(f\"Created index ID: {index_id}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "884c1576-87ad-4b34-9920-16153bcd20c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "status = vector_store.index_status(index_id)\n",
+    "# status returns an object with UPLOADED, READY, ERROR, and PROCESSING document counts\n",
+    "if status[\"UPLOADED\"] != status[\"READY\"]:\n",
+    "    print(f\"Not all documents processed yet, please retry: {status}\")\n",
+    "else:\n",
+    "    print(\"Ready to proceed\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fec81b4e-56f8-4f52-8f8d-5d6c2cf737e0",
+   "metadata": {},
+   "source": [
+    "## Query vector store\n",
+    "\n",
+    "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a36fffe2-d5ef-444a-9950-f7218210b6f8",
+   "metadata": {},
+   "source": [
+    "### Query directly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "54a391be-7b21-4ee1-8cff-8958f53ccc46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = vector_store.search(query=\"thud\", search_type=\"similarity\", index_id=index_id)\n",
+    "for doc in results:\n",
+    "    print(doc.page_content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b3ffe701-5dc6-459c-aeb4-d82e73f7d088",
+   "metadata": {},
+   "source": [
+    "### Query by turning into retriever\n",
+    "\n",
+    "You can also transform the vector store into a retriever for easier usage in your chains."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b40fcbe3-332a-4e3c-ae38-8a7476d29665",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retriever = vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 1})\n",
+    "retriever.invoke(\"thud\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7c750c66-4455-4dfd-91e3-757074b6f6a0",
+   "metadata": {},
+   "source": [
+    "## Usage for retrieval-augmented generation\n",
+    "\n",
+    "For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
+    "\n",
+    "- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
+    "- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
+    "- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "decd202f-936e-4938-a022-91a6f52a6094",
+   "metadata": {},
+   "source": [
+    "## Chain usage\n",
+    "\n",
+    "The code below shows how to use the vector store as a retriever in a simple RAG chain:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8768a452-4d1d-4d42-a836-bdf4aad64e4c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain import hub\n",
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "from langchain_core.runnables import RunnablePassthrough\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")\n",
+    "\n",
+    "prompt = hub.pull(\"rlm/rag-prompt\")\n",
+    "\n",
+    "\n",
+    "def format_docs(docs):\n",
+    "    return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
+    "\n",
+    "\n",
+    "rag_chain = (\n",
+    "    {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
+    "    | prompt\n",
+    "    | llm\n",
+    "    | StrOutputParser()\n",
+    ")\n",
+    "\n",
+    "rag_chain.invoke(\"thud\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "be0bc9fd-61de-41bf-8741-bebde269f36f",
+   "metadata": {},
+   "source": [
+    "## API reference"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/scripts/check_templates.py b/docs/scripts/check_templates.py
@@ -67,18 +67,45 @@ def check_header_order(path: Path) -> None:
     with open(path, "r") as f:
         doc = f.read()
     regex = r".*".join(headers)
-    if not re.search(regex, doc, re.DOTALL):
+
+    problems = []
+    for index, header in enumerate(headers):
+        if not re.search(header, doc, re.DOTALL):
+            problems.append(f"Missing header: {header}")
+            continue
+
+    if not problems:
+        skip_headers = []
+        for index, header in enumerate(headers):
+            if index == 0 or headers[index - 1] in skip_headers:
+                continue
+            pair_re = f"{headers[index - 1]}.*{header}"
+            pair_match = re.search(pair_re, doc, re.DOTALL)
+            if not pair_match:
+                skip_headers.append(header)
+                print(f"Out of order header: {header}")
+            else:
+                doc = doc[pair_match.end() - len(header) :]
+    if problems:
         issueline = (
             (
                 " Please see https://github.com/langchain-ai/langchain/issues/"
                 f"{issue_number} for instructions on how to correctly format a "
-                f"{doc_dir} integration page."
+                f"{doc_dir} integration page "
+                "and templates at https://github.com/langchain-ai/langchain/tree/master"
+                "/libs/cli/langchain_cli/integration_template/docs "
+                "Problems: "
             )
             if isinstance(issue_number, int)
-            else ""
+            else (
+                " Please see doc templates at https://github.com/langchain-ai/langchain/tree/master"
+                "/libs/cli/langchain_cli/integration_template/docs "
+                "Problems: "
+            )
         )
         raise ValueError(
-            f"Document {path} does not match the expected header order.{issueline}"
+            f"Document {path} is missing headers or does not match the expected header order.{issueline}"
+            + ", ".join(problems)
         )
 
 

diff --git a/libs/community/langchain_community/vectorstores/__init__.py b/libs/community/langchain_community/vectorstores/__init__.py
@@ -188,6 +188,9 @@
     from langchain_community.vectorstores.neo4j_vector import (
         Neo4jVector,
     )
+    from langchain_community.vectorstores.objective import (
+        Objective,
+    )
     from langchain_community.vectorstores.opensearch_vector_search import (
         OpenSearchVectorSearch,
     )
@@ -369,6 +372,7 @@
     "Neo4jVector",
     "NeuralDBClientVectorStore",
     "NeuralDBVectorStore",
+    "Objective",
     "OracleVS",
     "OpenSearchVectorSearch",
     "PGEmbedding",
@@ -472,6 +476,7 @@
     "Neo4jVector": "langchain_community.vectorstores.neo4j_vector",
     "NeuralDBClientVectorStore": "langchain_community.vectorstores.thirdai_neuraldb",
     "NeuralDBVectorStore": "langchain_community.vectorstores.thirdai_neuraldb",
+    "Objective": "langchain_community.vectorstores.objective",
     "OpenSearchVectorSearch": "langchain_community.vectorstores.opensearch_vector_search",  # noqa: E501
     "OracleVS": "langchain_community.vectorstores.oraclevs",
     "PathwayVectorClient": "langchain_community.vectorstores.pathway",