From d0c1f442cc4fe10e82cb6e582417d6ec64477346 Mon Sep 17 00:00:00 2001 From: sugarforever Date: Mon, 20 Nov 2023 23:48:55 +0000 Subject: [PATCH] Created using Colaboratory --- 01_semi_structured_data.ipynb | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/01_semi_structured_data.ipynb b/01_semi_structured_data.ipynb index 1aa698b..96792a8 100644 --- a/01_semi_structured_data.ipynb +++ b/01_semi_structured_data.ipynb @@ -5,7 +5,7 @@ "colab": { "provenance": [], "gpuType": "T4", - "authorship_tag": "ABX9TyOsgwsp69IaZ0UMnvAVkdVX", + "authorship_tag": "ABX9TyPnIDleZ4upjO9LLlSfEb5e", "include_colab_link": true }, "kernelspec": { @@ -1975,17 +1975,13 @@ { "cell_type": "code", "source": [ - "# Prompt\n", "prompt_text = \"\"\"\n", " You are responsible for concisely summarizing table or text chunk:\n", "\n", " {element}\n", "\"\"\"\n", "prompt = ChatPromptTemplate.from_template(prompt_text)\n", - "\n", - "# Summarization chain\n", - "model = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n", - "summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()" + "summarize_chain = {\"element\": lambda x: x} | prompt | ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\") | StrOutputParser()" ], "metadata": { "id": "uDQYbnKDbM7C" @@ -2005,10 +2001,9 @@ { "cell_type": "code", "source": [ - "# Apply to tables\n", "tables = [i.text for i in table_elements]\n", "table_summaries = summarize_chain.batch(tables, {\"max_concurrency\": 5})\n", - "# Apply to texts\n", + "\n", "texts = [i.text for i in text_elements]\n", "text_summaries = summarize_chain.batch(texts, {\"max_concurrency\": 5})" ], @@ -2038,17 +2033,12 @@ "from langchain.storage import InMemoryStore\n", "from langchain.vectorstores import Chroma\n", "\n", - "# The vectorstore to use to index the child chunks\n", - "vectorstore = Chroma(collection_name=\"summaries\", embedding_function=OpenAIEmbeddings())\n", - "\n", - "# The storage layer for the parent documents\n", - "store = InMemoryStore()\n", "id_key = \"doc_id\"\n", "\n", "# The retriever (empty to start)\n", "retriever = MultiVectorRetriever(\n", - " vectorstore=vectorstore,\n", - " docstore=store,\n", + " vectorstore=Chroma(collection_name=\"summaries\", embedding_function=OpenAIEmbeddings()),\n", + " docstore=InMemoryStore(),\n", " id_key=id_key,\n", ")\n", "\n", @@ -2081,21 +2071,17 @@ "source": [ "from langchain.schema.runnable import RunnablePassthrough\n", "\n", - "# Prompt template\n", "template = \"\"\"Answer the question based only on the following context, which can include text and tables:\n", "{context}\n", "Question: {question}\n", "\"\"\"\n", "prompt = ChatPromptTemplate.from_template(template)\n", "\n", - "# LLM\n", - "model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n", - "\n", "# RAG pipeline\n", "chain = (\n", " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", " | prompt\n", - " | model\n", + " | ChatOpenAI(temperature=0, model=\"gpt-4\")\n", " | StrOutputParser()\n", ")" ],