langchain-ai · hwchase17 · May 30, 2023 · May 30, 2023 · May 30, 2023
diff --git a/docs/modules/chains/index_examples/chat_vector_db.ipynb b/docs/modules/chains/index_examples/chat_vector_db.ipynb
@@ -113,7 +113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 5,
    "id": "af803fee",
    "metadata": {},
    "outputs": [],
@@ -316,6 +316,64 @@
     "result['answer']"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "11a76453",
+   "metadata": {},
+   "source": [
+    "## Using a different model for condensing the question\n",
+    "\n",
+    "This chain has two steps. First, it condenses the current question and the chat history into a standalone question. This is neccessary to create a standanlone vector to use for retrieval. After that, it does retrieval and then answers the question using retrieval augmented generation with a separate model. Part of the power of the declarative nature of LangChain is that you can easily use a separate language model for each call. This can be useful to use a cheaper and faster model for the simpler task of condensing the question, and then a more expensive model for answering the question. Here is an example of doing so."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "8d4ede9e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "04a23e23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qa = ConversationalRetrievalChain.from_llm(\n",
+    "    ChatOpenAI(temperature=0, model=\"gpt-4\"),\n",
+    "    vectorstore.as_retriever(),\n",
+    "    condense_question_llm = ChatOpenAI(temperature=0, model='gpt-3.5-turbo'),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "b1223752",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chat_history = []\n",
+    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
+    "result = qa({\"question\": query, \"chat_history\": chat_history})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cdce4e28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chat_history = [(query, result[\"answer\"])]\n",
+    "query = \"Did he mention who she suceeded\"\n",
+    "result = qa({\"question\": query, \"chat_history\": chat_history})"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "0eaadf0f",

diff --git a/langchain/chains/conversational_retrieval/base.py b/langchain/chains/conversational_retrieval/base.py
@@ -195,6 +195,7 @@ def from_llm(
         condense_question_prompt: BasePromptTemplate = CONDENSE_QUESTION_PROMPT,
         chain_type: str = "stuff",
         verbose: bool = False,
+        condense_question_llm: Optional[BaseLanguageModel] = None,
         combine_docs_chain_kwargs: Optional[Dict] = None,
         **kwargs: Any,
     ) -> BaseConversationalRetrievalChain:
@@ -206,8 +207,10 @@ def from_llm(
             verbose=verbose,
             **combine_docs_chain_kwargs,
         )
+
+        _llm = condense_question_llm or llm
         condense_question_chain = LLMChain(
-            llm=llm, prompt=condense_question_prompt, verbose=verbose
+            llm=_llm, prompt=condense_question_prompt, verbose=verbose
         )
         return cls(
             retriever=retriever,