From d0c1f442cc4fe10e82cb6e582417d6ec64477346 Mon Sep 17 00:00:00 2001
From: sugarforever <william.1022@gmail.com>
Date: Mon, 20 Nov 2023 23:48:55 +0000
Subject: [PATCH] Created using Colaboratory

---
 01_semi_structured_data.ipynb | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/01_semi_structured_data.ipynb b/01_semi_structured_data.ipynb
index 1aa698b..96792a8 100644
--- a/01_semi_structured_data.ipynb
+++ b/01_semi_structured_data.ipynb
@@ -5,7 +5,7 @@
     "colab": {
       "provenance": [],
       "gpuType": "T4",
-      "authorship_tag": "ABX9TyOsgwsp69IaZ0UMnvAVkdVX",
+      "authorship_tag": "ABX9TyPnIDleZ4upjO9LLlSfEb5e",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -1975,17 +1975,13 @@
     {
       "cell_type": "code",
       "source": [
-        "# Prompt\n",
         "prompt_text = \"\"\"\n",
         "  You are responsible for concisely summarizing table or text chunk:\n",
         "\n",
         "  {element}\n",
         "\"\"\"\n",
         "prompt = ChatPromptTemplate.from_template(prompt_text)\n",
-        "\n",
-        "# Summarization chain\n",
-        "model = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
-        "summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()"
+        "summarize_chain = {\"element\": lambda x: x} | prompt | ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\") | StrOutputParser()"
       ],
       "metadata": {
         "id": "uDQYbnKDbM7C"
@@ -2005,10 +2001,9 @@
     {
       "cell_type": "code",
       "source": [
-        "# Apply to tables\n",
         "tables = [i.text for i in table_elements]\n",
         "table_summaries = summarize_chain.batch(tables, {\"max_concurrency\": 5})\n",
-        "# Apply to texts\n",
+        "\n",
         "texts = [i.text for i in text_elements]\n",
         "text_summaries = summarize_chain.batch(texts, {\"max_concurrency\": 5})"
       ],
@@ -2038,17 +2033,12 @@
         "from langchain.storage import InMemoryStore\n",
         "from langchain.vectorstores import Chroma\n",
         "\n",
-        "# The vectorstore to use to index the child chunks\n",
-        "vectorstore = Chroma(collection_name=\"summaries\", embedding_function=OpenAIEmbeddings())\n",
-        "\n",
-        "# The storage layer for the parent documents\n",
-        "store = InMemoryStore()\n",
         "id_key = \"doc_id\"\n",
         "\n",
         "# The retriever (empty to start)\n",
         "retriever = MultiVectorRetriever(\n",
-        "    vectorstore=vectorstore,\n",
-        "    docstore=store,\n",
+        "    vectorstore=Chroma(collection_name=\"summaries\", embedding_function=OpenAIEmbeddings()),\n",
+        "    docstore=InMemoryStore(),\n",
         "    id_key=id_key,\n",
         ")\n",
         "\n",
@@ -2081,21 +2071,17 @@
       "source": [
         "from langchain.schema.runnable import RunnablePassthrough\n",
         "\n",
-        "# Prompt template\n",
         "template = \"\"\"Answer the question based only on the following context, which can include text and tables:\n",
         "{context}\n",
         "Question: {question}\n",
         "\"\"\"\n",
         "prompt = ChatPromptTemplate.from_template(template)\n",
         "\n",
-        "# LLM\n",
-        "model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
-        "\n",
         "# RAG pipeline\n",
         "chain = (\n",
         "    {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
         "    | prompt\n",
-        "    | model\n",
+        "    | ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
         "    | StrOutputParser()\n",
         ")"
       ],