From 0b2c03b6e962ba542f9e32bd16bb0ae75f923cdf Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Fri, 7 Mar 2025 21:42:57 +0800
Subject: [PATCH 01/12] update

---
 .gitignore                                    |  4 ++-
 .../src/rag_cot_evaluation/python_env.yml     |  1 +
 app/streamlit/Chatbot.py                      |  7 -----
 app/streamlit/Dockerfile                      |  5 +++-
 app/streamlit/requirements.txt                | 28 +++++++++----------
 5 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/.gitignore b/.gitignore
index 655ed34..ffc3c5e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -206,4 +206,6 @@ data/*
 **/llm-template2/*
 **/llmops/outputs/*
 **/*.zip
-**/llm-examples/*
\ No newline at end of file
+**/llm-examples/*
+**/*.ipynb_checkpoints
+**/*.ipynb
\ No newline at end of file
diff --git a/app/llmops/src/rag_cot_evaluation/python_env.yml b/app/llmops/src/rag_cot_evaluation/python_env.yml
index 7d91512..35b30a6 100644
--- a/app/llmops/src/rag_cot_evaluation/python_env.yml
+++ b/app/llmops/src/rag_cot_evaluation/python_env.yml
@@ -12,6 +12,7 @@ build_dependencies:
   - langchain_google_genai
   - langchain-deepseek
   - langchain-community
+  - mlflow[genai]
 # Dependencies required to run the project.
 dependencies:
   - mlflow==2.8.1
\ No newline at end of file
diff --git a/app/streamlit/Chatbot.py b/app/streamlit/Chatbot.py
index ce971e5..00ae5ee 100644
--- a/app/streamlit/Chatbot.py
+++ b/app/streamlit/Chatbot.py
@@ -11,9 +11,6 @@ from langchain_community.llms.moonshot import Moonshot
 import torch
 torch.classes.__path__ = [os.path.join(torch.__path__[0], torch.classes.__file__)] 
 
-# # # or simply:
-# torch.classes.__path__ = []
-
 
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 GEMINI_API_KEY = config("GOOGLE_API_KEY", cast=str, default="123456")
@@ -31,15 +28,12 @@ if "messages" not in st.session_state:
 for msg in st.session_state.messages:
     st.chat_message(msg["role"]).write(msg["content"])
 
-print('i am here1')
 # Load data from ChromaDB
 chroma_client = chromadb.PersistentClient(path=INPUT_CHROMADB_LOCAL)
 collection = chroma_client.get_collection(name=COLLECTION_NAME)
-print('i am here2')
 
 # Initialize embedding model
 model = SentenceTransformer(EMBEDDING_MODEL) 
-print('i am here3')
 
 if CHAT_MODEL_PROVIDER == "deepseek":
     # Initialize DeepSeek model
@@ -88,7 +82,6 @@ Provide the answer with language that is similar to the question asked.
 """
 answer_prompt = PromptTemplate(template=answer_template, input_variables=["cot", "question"])
 answer_chain = answer_prompt | llm
-print('i am here4')
 
 if prompt := st.chat_input():
     
diff --git a/app/streamlit/Dockerfile b/app/streamlit/Dockerfile
index 1c55f0c..d582af3 100644
--- a/app/streamlit/Dockerfile
+++ b/app/streamlit/Dockerfile
@@ -4,7 +4,8 @@ WORKDIR /app/streamlit
 
 COPY requirements.txt ./
 
-RUN pip install --no-cache-dir -r requirements.txt
+# RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install -r requirements.txt
 
 COPY Chatbot.py .
 COPY .env .
@@ -13,6 +14,8 @@ COPY .env .
 COPY initialize_sentence_transformer.py .
 RUN python initialize_sentence_transformer.py
 
+COPY pages ./pages
+
 EXPOSE 8501
 
 ENTRYPOINT ["streamlit", "run", "Chatbot.py"]
\ No newline at end of file
diff --git a/app/streamlit/requirements.txt b/app/streamlit/requirements.txt
index a7df356..8a3f02d 100644
--- a/app/streamlit/requirements.txt
+++ b/app/streamlit/requirements.txt
@@ -1,14 +1,14 @@
-streamlit>=1.28
-langchain>=0.0.217
-openai>=1.2
-duckduckgo-search
-anthropic>=0.3.0
-trubrics>=1.4.3
-streamlit-feedback
-langchain-community
-chromadb
-python-decouple
-langchain_google_genai
-langchain-deepseek
-sentence_transformers
-watchdog
\ No newline at end of file
+streamlit==1.28.0
+langchain
+openai==1.65.4
+duckduckgo_search==7.5.0
+anthropic==0.49.0
+trubrics==1.8.3
+streamlit-feedback==0.1.4
+langchain-community==0.3.19
+chromadb==0.6.3
+python-decouple==3.8
+langchain-google-genai==2.0.10
+langchain-deepseek==0.1.2
+sentence-transformers==3.4.1
+watchdog==6.0.0
\ No newline at end of file

From 24e21b9093a3f80220fd4683b98b8207695949bb Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Tue, 11 Mar 2025 16:51:34 +0800
Subject: [PATCH 02/12] functioning agentic adaptive rag

---
 app/llmops/main.py                            |  30 +
 .../src/adaptive_rag_evaluation/MLproject     |  29 +
 .../adaptive_rag_evaluation/python_env.yml    |  28 +
 app/llmops/src/adaptive_rag_evaluation/run.py | 551 ++++++++++++++++++
 .../src/etl_chromadb_pdf/python_env.yml       |   5 +
 app/llmops/src/etl_chromadb_pdf/run.py        |  38 +-
 app/llmops/src/rag_cot_evaluation/run.py      |   2 +-
 7 files changed, 665 insertions(+), 18 deletions(-)
 create mode 100644 app/llmops/src/adaptive_rag_evaluation/MLproject
 create mode 100644 app/llmops/src/adaptive_rag_evaluation/python_env.yml
 create mode 100644 app/llmops/src/adaptive_rag_evaluation/run.py

diff --git a/app/llmops/main.py b/app/llmops/main.py
index 4bd19ce..809b2f1 100644
--- a/app/llmops/main.py
+++ b/app/llmops/main.py
@@ -9,6 +9,7 @@ _steps = [
     "etl_chromadb_pdf",
     "etl_chromadb_scanned_pdf", # the performance for scanned pdf may not be good
     "rag_cot_evaluation",
+    "adaptive_rag_evaluation",
     "test_rag_cot"
 ]
 
@@ -130,7 +131,36 @@ def go(config: DictConfig):
                     "chat_model_provider": config["prompt_engineering"]["chat_model_provider"]
                 },
             )
+        
+        if "adaptive_rag_evaluation" in active_steps:
 
+            if config["prompt_engineering"]["run_id_chromadb"] == "None":
+                # Look for run_id that has artifact logged as documents
+                run_id = None
+                client = mlflow.tracking.MlflowClient()
+                for run in client.search_runs(experiment_ids=[client.get_experiment_by_name(config["main"]["experiment_name"]).experiment_id]):
+                    for artifact in client.list_artifacts(run.info.run_id):
+                        if artifact.path == "chromadb":
+                            run_id = run.info.run_id
+                            break
+                    if run_id:
+                        break
+
+                if run_id is None:
+                    raise ValueError("No run_id found with artifact logged as documents")
+            else:
+                run_id = config["prompt_engineering"]["run_id_chromadb"]
+
+            _ = mlflow.run(
+                os.path.join(hydra.utils.get_original_cwd(), "src", "adaptive_rag_evaluation"),
+                "main",
+                parameters={
+                    "query": config["prompt_engineering"]["query"],
+                    "input_chromadb_artifact": f'runs:/{run_id}/chromadb/chroma_db.zip',
+                    "embedding_model": config["etl"]["embedding_model"],
+                    "chat_model_provider": config["prompt_engineering"]["chat_model_provider"]
+                },
+            )
 
         if "test_rag_cot" in active_steps:
 
diff --git a/app/llmops/src/adaptive_rag_evaluation/MLproject b/app/llmops/src/adaptive_rag_evaluation/MLproject
new file mode 100644
index 0000000..521e783
--- /dev/null
+++ b/app/llmops/src/adaptive_rag_evaluation/MLproject
@@ -0,0 +1,29 @@
+name: adaptive_rag_evaluation
+python_env: python_env.yml
+
+entry_points:
+  main:
+    parameters:
+
+      query:
+        description: Query to run
+        type: string
+
+      input_chromadb_artifact:
+        description: Fully-qualified name for the input artifact
+        type: string
+
+      embedding_model:
+        description: Fully-qualified name for the embedding model
+        type: string
+
+      chat_model_provider:
+        description: Fully-qualified name for the chat model provider
+        type: string
+
+    
+    command: >-
+        python run.py --query {query} \
+                      --input_chromadb_artifact {input_chromadb_artifact} \
+                      --embedding_model {embedding_model} \
+                      --chat_model_provider {chat_model_provider}
\ No newline at end of file
diff --git a/app/llmops/src/adaptive_rag_evaluation/python_env.yml b/app/llmops/src/adaptive_rag_evaluation/python_env.yml
new file mode 100644
index 0000000..c968736
--- /dev/null
+++ b/app/llmops/src/adaptive_rag_evaluation/python_env.yml
@@ -0,0 +1,28 @@
+# Python version required to run the project.
+python: "3.11.11"
+# Dependencies required to build packages. This field is optional.
+build_dependencies:
+  - pip==23.3.1
+  - setuptools
+  - wheel==0.37.1
+  - chromadb
+  - langchain
+  - sentence_transformers
+  - python-decouple
+  - langchain_google_genai
+  - langchain-deepseek
+  - langchain-openai
+  - langchain-community
+  - mlflow[genai]
+  - langsmith
+  - openai
+  - tiktoken
+  - langchainhub
+  - langgraph
+  - langchain-text-splitters
+  - langchain-cohere
+  - tavily-python
+  - langchain_huggingface
+# Dependencies required to run the project.
+dependencies:
+  - mlflow==2.8.1
\ No newline at end of file
diff --git a/app/llmops/src/adaptive_rag_evaluation/run.py b/app/llmops/src/adaptive_rag_evaluation/run.py
new file mode 100644
index 0000000..83abd86
--- /dev/null
+++ b/app/llmops/src/adaptive_rag_evaluation/run.py
@@ -0,0 +1,551 @@
+import os
+import logging
+import argparse
+import mlflow
+import shutil
+from decouple import config
+from langchain.prompts import PromptTemplate
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_deepseek import ChatDeepSeek
+from langchain_community.llms.moonshot import Moonshot
+from langchain_huggingface import HuggingFaceEmbeddings
+
+from langchain_community.vectorstores.chroma import Chroma
+
+from typing import Literal, List
+from typing_extensions import TypedDict
+
+from langchain_core.prompts import ChatPromptTemplate
+from pydantic import BaseModel, Field
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain.schema import Document
+from pprint import pprint
+from langgraph.graph import END, StateGraph, START
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s")
+logger = logging.getLogger()
+
+GEMINI_API_KEY = config("GOOGLE_API_KEY", cast=str)
+DEEKSEEK_API_KEY = config("DEEKSEEK_API_KEY", cast=str)
+MOONSHOT_API_KEY = config("MOONSHOT_API_KEY", cast=str)
+TAVILY_API_KEY = config("TAVILY_API_KEY", cast=str)
+os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+
+def go(args):
+
+    # start a new MLflow run
+    with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id, run_name="etl_chromdb_pdf"):
+        existing_params = mlflow.get_run(mlflow.active_run().info.run_id).data.params
+        if 'query' not in existing_params:
+            mlflow.log_param('query', args.query)
+        
+        # Log parameters to MLflow
+        mlflow.log_params({
+            "input_chromadb_artifact": args.input_chromadb_artifact,
+            "embedding_model": args.embedding_model,
+            "chat_model_provider": args.chat_model_provider
+        })
+
+
+        logger.info("Downloading chromadb artifact")
+        artifact_chromadb_local_path = mlflow.artifacts.download_artifacts(artifact_uri=args.input_chromadb_artifact)
+
+        # unzip the artifact
+        logger.info("Unzipping the artifact")
+        shutil.unpack_archive(artifact_chromadb_local_path, "chroma_db")
+
+        # Initialize embedding model (do this ONCE)
+        embedding_model = HuggingFaceEmbeddings(model_name=args.embedding_model) 
+        if args.chat_model_provider == 'deepseek':
+            llm = ChatDeepSeek(
+                model="deepseek-chat", 
+                temperature=0,
+                max_tokens=None,
+                timeout=None,
+                max_retries=2,
+                api_key=DEEKSEEK_API_KEY
+            )
+        elif args.chat_model_provider == 'gemini':
+            llm = ChatGoogleGenerativeAI(
+                model="gemini-1.5-flash", 
+                google_api_key=GEMINI_API_KEY,
+                temperature=0,
+                max_retries=3,
+                streaming=True
+            )
+        elif args.chat_model_provider == 'moonshot':
+            llm = Moonshot(
+                model="moonshot-v1-128k", 
+                temperature=0,
+                max_tokens=None,
+                timeout=None,
+                max_retries=2,
+                api_key=MOONSHOT_API_KEY
+            )
+
+        # Load data from ChromaDB
+        db_folder = "chroma_db"
+        db_path = os.path.join(os.getcwd(), db_folder)
+        collection_name = "rag-chroma"
+        vectorstore = Chroma(persist_directory=db_path, collection_name=collection_name, embedding_function=embedding_model)
+        retriever = vectorstore.as_retriever()
+
+        # Data model
+        class RouteQuery(BaseModel):
+            """Route a user query to the most relevant datasource."""
+
+            datasource: Literal["vectorstore", "web_search"] = Field(
+                ...,
+                description="Given a user question choose to route it to web search or a vectorstore.",
+            )
+
+        structured_llm_router = llm.with_structured_output(RouteQuery)
+
+        # Prompt
+        system = """You are an expert at routing a user question to a vectorstore or web search.
+        The vectorstore contains documents related to medical treatment for cancer/tumor diseases.
+        Use the vectorstore for questions on these topics. Otherwise, use web-search."""
+        route_prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", system),
+                ("human", "{question}"),
+            ]
+        )
+
+        question_router = route_prompt | structured_llm_router
+
+
+        ### Retrieval Grader
+        # Data model
+        class GradeDocuments(BaseModel):
+            """Binary score for relevance check on retrieved documents."""
+
+            binary_score: str = Field(
+                description="Documents are relevant to the question, 'yes' or 'no'"
+            )
+
+        structured_llm_grader = llm.with_structured_output(GradeDocuments)
+
+        # Prompt
+        system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
+            If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
+            It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
+            Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
+        grade_prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", system),
+                ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
+            ]
+        )
+
+        retrieval_grader = grade_prompt | structured_llm_grader
+
+
+        ### Generate
+
+        from langchain import hub
+        from langchain_core.output_parsers import StrOutputParser
+
+        # Prompt
+        prompt = hub.pull("rlm/rag-prompt")
+
+        # Post-processing
+        def format_docs(docs):
+            return "\n\n".join(doc.page_content for doc in docs)
+
+
+        # Chain
+        rag_chain = prompt | llm | StrOutputParser()
+
+
+
+
+        ### Hallucination Grader
+
+        # Data model
+        class GradeHallucinations(BaseModel):
+            """Binary score for hallucination present in generation answer."""
+
+            binary_score: str = Field(
+                description="Answer is grounded in the facts, 'yes' or 'no'"
+            )
+
+
+        # LLM with function call
+        structured_llm_grader = llm.with_structured_output(GradeHallucinations)
+
+        # Prompt
+        system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
+            Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
+        hallucination_prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", system),
+                ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
+            ]
+        )
+
+        hallucination_grader = hallucination_prompt | structured_llm_grader
+
+
+        ### Answer Grader
+        # Data model
+        class GradeAnswer(BaseModel):
+            """Binary score to assess answer addresses question."""
+
+            binary_score: str = Field(
+                description="Answer addresses the question, 'yes' or 'no'"
+            )
+
+
+        # LLM with function call
+        structured_llm_grader = llm.with_structured_output(GradeAnswer)
+
+        # Prompt
+        system = """You are a grader assessing whether an answer addresses / resolves a question \n 
+            Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
+        answer_prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", system),
+                ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
+            ]
+        )
+
+        answer_grader = answer_prompt | structured_llm_grader
+
+        ### Question Re-writer
+
+        # LLM
+
+        # Prompt
+        system = """You a question re-writer that converts an input question to a better version that is optimized \n 
+            for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
+        re_write_prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", system),
+                (
+                    "human",
+                    "Here is the initial question: \n\n {question} \n Formulate an improved question.",
+                ),
+            ]
+        )   
+
+        question_rewriter = re_write_prompt | llm | StrOutputParser()
+
+
+        ### Search
+        web_search_tool = TavilySearchResults(k=3)
+
+        class GraphState(TypedDict):
+            """
+            Represents the state of our graph.
+
+            Attributes:
+                question: question
+                generation: LLM generation
+                documents: list of documents
+            """
+
+            question: str
+            generation: str
+            documents: List[str]
+
+
+
+        def retrieve(state):
+            """
+            Retrieve documents
+
+            Args:
+                state (dict): The current graph state
+
+            Returns:
+                state (dict): New key added to state, documents, that contains retrieved documents
+            """
+            print("---RETRIEVE---")
+            question = state["question"]
+
+            # Retrieval
+            documents = retriever.invoke(question)
+
+            print(documents)
+            return {"documents": documents, "question": question}
+
+
+        def generate(state):
+            """
+            Generate answer
+
+            Args:
+                state (dict): The current graph state
+
+            Returns:
+                state (dict): New key added to state, generation, that contains LLM generation
+            """
+            print("---GENERATE---")
+            question = state["question"]
+            documents = state["documents"]
+
+            # RAG generation
+            generation = rag_chain.invoke({"context": documents, "question": question})
+            return {"documents": documents, "question": question, "generation": generation}
+
+
+        def grade_documents(state):
+            """
+            Determines whether the retrieved documents are relevant to the question.
+
+            Args:
+                state (dict): The current graph state
+
+            Returns:
+                state (dict): Updates documents key with only filtered relevant documents
+            """
+
+            print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
+            question = state["question"]
+            documents = state["documents"]
+
+            # Score each doc
+            filtered_docs = []
+            for d in documents:
+                score = retrieval_grader.invoke(
+                    {"question": question, "document": d.page_content}
+                )
+                grade = score.binary_score
+                if grade == "yes":
+                    print("---GRADE: DOCUMENT RELEVANT---")
+                    filtered_docs.append(d)
+                else:
+                    print("---GRADE: DOCUMENT NOT RELEVANT---")
+                    continue
+            return {"documents": filtered_docs, "question": question}
+
+
+        def transform_query(state):
+            """
+            Transform the query to produce a better question.
+
+            Args:
+                state (dict): The current graph state
+
+            Returns:
+                state (dict): Updates question key with a re-phrased question
+            """
+
+            print("---TRANSFORM QUERY---")
+            question = state["question"]
+            documents = state["documents"]
+
+            # Re-write question
+            better_question = question_rewriter.invoke({"question": question})
+            return {"documents": documents, "question": better_question}
+
+
+        def web_search(state):
+            """
+            Web search based on the re-phrased question.
+
+            Args:
+                state (dict): The current graph state
+
+            Returns:
+                state (dict): Updates documents key with appended web results
+            """
+
+            print("---WEB SEARCH---")
+            question = state["question"]
+
+            # Web search
+            docs = web_search_tool.invoke({"query": question})
+            web_results = "\n".join([d["content"] for d in docs])
+            web_results = Document(page_content=web_results)
+
+            return {"documents": web_results, "question": question}
+
+
+        ### Edges ###
+
+
+        def route_question(state):
+            """
+            Route question to web search or RAG.
+
+            Args:
+                state (dict): The current graph state
+
+            Returns:
+                str: Next node to call
+            """
+
+            print("---ROUTE QUESTION---")
+            question = state["question"]
+            source = question_router.invoke({"question": question})
+            if source.datasource == "web_search":
+                print("---ROUTE QUESTION TO WEB SEARCH---")
+                return "web_search"
+            elif source.datasource == "vectorstore":
+                print("---ROUTE QUESTION TO RAG---")
+                return "vectorstore"
+
+
+        def decide_to_generate(state):
+            """
+            Determines whether to generate an answer, or re-generate a question.
+
+            Args:
+                state (dict): The current graph state
+
+            Returns:
+                str: Binary decision for next node to call
+            """
+
+            print("---ASSESS GRADED DOCUMENTS---")
+            state["question"]
+            filtered_documents = state["documents"]
+
+            if not filtered_documents:
+                # All documents have been filtered check_relevance
+                # We will re-generate a new query
+                print(
+                    "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
+                )
+                return "transform_query"
+            else:
+                # We have relevant documents, so generate answer
+                print("---DECISION: GENERATE---")
+                return "generate"
+
+
+        def grade_generation_v_documents_and_question(state):
+            """
+            Determines whether the generation is grounded in the document and answers question.
+
+            Args:
+                state (dict): The current graph state
+
+            Returns:
+                str: Decision for next node to call
+            """
+
+            print("---CHECK HALLUCINATIONS---")
+            question = state["question"]
+            documents = state["documents"]
+            generation = state["generation"]
+
+            score = hallucination_grader.invoke(
+                {"documents": documents, "generation": generation}
+            )
+            grade = score.binary_score
+
+            # Check hallucination
+            if grade == "yes":
+                print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
+                # Check question-answering
+                print("---GRADE GENERATION vs QUESTION---")
+                score = answer_grader.invoke({"question": question, "generation": generation})
+                grade = score.binary_score
+                if grade == "yes":
+                    print("---DECISION: GENERATION ADDRESSES QUESTION---")
+                    return "useful"
+                else:
+                    print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
+                    return "not useful"
+            else:
+                pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
+                return "not supported"
+
+        workflow = StateGraph(GraphState)
+
+        # Define the nodes
+        workflow.add_node("web_search", web_search)  # web search
+        workflow.add_node("retrieve", retrieve)  # retrieve
+        workflow.add_node("grade_documents", grade_documents)  # grade documents
+        workflow.add_node("generate", generate)  # generatae
+        workflow.add_node("transform_query", transform_query)  # transform_query
+
+        # Build graph
+        workflow.add_conditional_edges(
+            START,
+            route_question,
+            {
+                "web_search": "web_search",
+                "vectorstore": "retrieve",
+            },
+        )
+        workflow.add_edge("web_search", "generate")
+        workflow.add_edge("retrieve", "grade_documents")
+        workflow.add_conditional_edges(
+            "grade_documents",
+            decide_to_generate,
+            {
+                "transform_query": "transform_query",
+                "generate": "generate",
+            },
+        )
+        workflow.add_edge("transform_query", "retrieve")
+        workflow.add_conditional_edges(
+            "generate",
+            grade_generation_v_documents_and_question,
+            {
+                "not supported": "generate",
+                "useful": END,
+                "not useful": "transform_query",
+            },
+        )
+
+        # Compile
+        app = workflow.compile()
+
+
+
+        # Run
+        inputs = {
+            "question": args.query
+        }
+        for output in app.stream(inputs):
+            for key, value in output.items():
+                # Node
+                pprint(f"Node '{key}':")
+                # Optional: print full state at each node
+                # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
+            pprint("\n---\n")
+
+        # Final generation
+        pprint(value["generation"])
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Chain of Thought RAG")
+
+    parser.add_argument(
+        "--query", 
+        type=str,
+        help="Question to ask the model",
+        required=True
+    )
+
+    parser.add_argument(
+        "--input_chromadb_artifact", 
+        type=str,
+        help="Fully-qualified name for the chromadb artifact",
+        required=True
+    )
+
+    parser.add_argument(
+        "--embedding_model",
+        type=str,
+        default="paraphrase-multilingual-mpnet-base-v2",
+        help="Sentence Transformer model name"
+    )
+
+    parser.add_argument(
+        "--chat_model_provider",
+        type=str,
+        default="gemini",
+        help="Chat model provider"
+    )
+
+    args = parser.parse_args()
+    
+    go(args)
\ No newline at end of file
diff --git a/app/llmops/src/etl_chromadb_pdf/python_env.yml b/app/llmops/src/etl_chromadb_pdf/python_env.yml
index 9e2c10b..f777f71 100644
--- a/app/llmops/src/etl_chromadb_pdf/python_env.yml
+++ b/app/llmops/src/etl_chromadb_pdf/python_env.yml
@@ -10,6 +10,11 @@ build_dependencies:
   - pdfminer.six
   - langchain
   - sentence_transformers
+  - langchain-text-splitters
+  - langchain_huggingface
+  - langchain-community
+  - tiktoken
+
 # Dependencies required to run the project.
 dependencies:
   - mlflow==2.8.1
\ No newline at end of file
diff --git a/app/llmops/src/etl_chromadb_pdf/run.py b/app/llmops/src/etl_chromadb_pdf/run.py
index 74930a2..2f268fd 100644
--- a/app/llmops/src/etl_chromadb_pdf/run.py
+++ b/app/llmops/src/etl_chromadb_pdf/run.py
@@ -8,7 +8,6 @@ import os
 import mlflow
 import shutil
 
-import chromadb
 import io
 from pdfminer.converter import TextConverter
 from pdfminer.pdfinterp import PDFPageInterpreter
@@ -16,8 +15,10 @@ from pdfminer.pdfinterp import PDFResourceManager
 from pdfminer.pdfpage import PDFPage
 from langchain.schema import Document
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores.chroma import Chroma
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 
-from sentence_transformers import SentenceTransformer
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s")
 logger = logging.getLogger()
@@ -80,7 +81,7 @@ def go(args):
 
 
         # Initialize embedding model (do this ONCE)
-        model_embedding = SentenceTransformer(args.embedding_model)  # Or a multilingual model
+        model_embedding = HuggingFaceEmbeddings(model_name=args.embedding_model)  # Or a multilingual model
 
 
         # Create database, delete the database directory if it exists
@@ -90,9 +91,6 @@ def go(args):
             shutil.rmtree(db_path)
         os.makedirs(db_path)
 
-        chroma_client = chromadb.PersistentClient(path=db_path)
-        collection_name = "rag_experiment"
-        db = chroma_client.create_collection(name=collection_name)
 
         logger.info("Downloading artifact")
         artifact_local_path = mlflow.artifacts.download_artifacts(artifact_uri=args.input_artifact)
@@ -107,22 +105,28 @@ def go(args):
         # show the unzipped folder
         documents_folder = os.path.splitext(os.path.basename(artifact_local_path))[0]
 
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+            chunk_size=1000, chunk_overlap=500
+        )
 
+        ls_docs = []
         for root, _dir, files in os.walk(f"./{documents_folder}"):
             for file in files:
                 if file.endswith(".pdf"):
                     read_text = extract_chinese_text_from_pdf(os.path.join(root, file))
-                    document = Document(page_content=read_text)
-                    all_splits = text_splitter.split_documents([document])
-                    
-                    for i, split in enumerate(all_splits):
-                        db.add(documents=[split.page_content], 
-                            metadatas=[{"filename": file}],
-                            ids=[f'{file[:-4]}-{str(i)}'],
-                            embeddings=[model_embedding.encode(split.page_content)]
-                        )
-        
+                    document = Document(metadata={"file": file}, page_content=read_text)
+                    ls_docs.append(document)
+                                        
+        doc_splits = text_splitter.split_documents(ls_docs)
+
+        # Add to vectorDB
+        _vectorstore = Chroma.from_documents(
+            documents=doc_splits,
+            collection_name="rag-chroma",
+            embedding=model_embedding,
+            persist_directory=db_path
+        )
+
         logger.info("Logging artifact with mlflow")
         shutil.make_archive(db_path, 'zip', db_path)
         mlflow.log_artifact(db_path + '.zip', args.output_artifact)
diff --git a/app/llmops/src/rag_cot_evaluation/run.py b/app/llmops/src/rag_cot_evaluation/run.py
index 38e67dd..054d11b 100644
--- a/app/llmops/src/rag_cot_evaluation/run.py
+++ b/app/llmops/src/rag_cot_evaluation/run.py
@@ -46,7 +46,7 @@ def go(args):
         db_folder = "chroma_db"
         db_path = os.path.join(os.getcwd(), db_folder)
         chroma_client = chromadb.PersistentClient(path=db_path)
-        collection_name = "rag_experiment"
+        collection_name = "rag-chroma"
         collection = chroma_client.get_collection(name=collection_name)
 
         # Formulate a question

From 5b611653f9c7af327451bb3af2998e014ff5def4 Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Wed, 12 Mar 2025 13:52:43 +0800
Subject: [PATCH 03/12] chinese question working for adaptive and working
 dockerfile

---
 app/llmops/config.yaml                        |   2 +-
 .../adaptive_rag_evaluation/python_env.yml    |   1 +
 app/llmops/src/adaptive_rag_evaluation/run.py |  18 +-
 app/streamlit/Chatbot.py                      |   2 +-
 app/streamlit/Dockerfile                      |  11 +-
 app/streamlit/Pipfile                         |   4 +-
 app/streamlit/Pipfile.lock                    | 602 ++++++++++--------
 7 files changed, 343 insertions(+), 297 deletions(-)

diff --git a/app/llmops/config.yaml b/app/llmops/config.yaml
index 70dd14d..37aeb2b 100644
--- a/app/llmops/config.yaml
+++ b/app/llmops/config.yaml
@@ -12,5 +12,5 @@ etl:
 prompt_engineering:
   run_id_chromadb: None
   chat_model_provider: gemini
-  query: "怎么治疗有kras的肺癌?"
+  query: "如何治疗乳腺癌?"
   
\ No newline at end of file
diff --git a/app/llmops/src/adaptive_rag_evaluation/python_env.yml b/app/llmops/src/adaptive_rag_evaluation/python_env.yml
index c968736..2278969 100644
--- a/app/llmops/src/adaptive_rag_evaluation/python_env.yml
+++ b/app/llmops/src/adaptive_rag_evaluation/python_env.yml
@@ -23,6 +23,7 @@ build_dependencies:
   - langchain-cohere
   - tavily-python
   - langchain_huggingface
+  - pydantic
 # Dependencies required to run the project.
 dependencies:
   - mlflow==2.8.1
\ No newline at end of file
diff --git a/app/llmops/src/adaptive_rag_evaluation/run.py b/app/llmops/src/adaptive_rag_evaluation/run.py
index 83abd86..e0496c0 100644
--- a/app/llmops/src/adaptive_rag_evaluation/run.py
+++ b/app/llmops/src/adaptive_rag_evaluation/run.py
@@ -4,7 +4,6 @@ import argparse
 import mlflow
 import shutil
 from decouple import config
-from langchain.prompts import PromptTemplate
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_deepseek import ChatDeepSeek
 from langchain_community.llms.moonshot import Moonshot
@@ -22,6 +21,7 @@ from langchain.schema import Document
 from pprint import pprint
 from langgraph.graph import END, StateGraph, START
 
+
 logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s")
 logger = logging.getLogger()
 
@@ -29,9 +29,15 @@ GEMINI_API_KEY = config("GOOGLE_API_KEY", cast=str)
 DEEKSEEK_API_KEY = config("DEEKSEEK_API_KEY", cast=str)
 MOONSHOT_API_KEY = config("MOONSHOT_API_KEY", cast=str)
 TAVILY_API_KEY = config("TAVILY_API_KEY", cast=str)
+LANGSMITH_API_KEY = config("LANGSMITH_API_KEY", cast=str)
+LANGSMITH_TRACING = config("LANGSMITH_TRACING", cast=str)
+LANGSMITH_PROJECT = config("LANGSMITH_PROJECT", cast=str)
 os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
+os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY
+os.environ["LANGSMITH_TRACING"] = LANGSMITH_TRACING
+os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
+os.environ["LANGSMITH_PROJECT"] = LANGSMITH_PROJECT
 
 def go(args):
 
@@ -105,8 +111,10 @@ def go(args):
 
         # Prompt
         system = """You are an expert at routing a user question to a vectorstore or web search.
-        The vectorstore contains documents related to medical treatment for cancer/tumor diseases.
-        Use the vectorstore for questions on these topics. Otherwise, use web-search."""
+        The vectorstore contains documents related to any cancer/tumor disease. The question may be
+        asked in a variety of languages, and may be phrased in a variety of ways.
+        Use the vectorstore for questions on these topics. Otherwise, use web-search. 
+        """
         route_prompt = ChatPromptTemplate.from_messages(
             [
                 ("system", system),
@@ -155,13 +163,11 @@ def go(args):
         def format_docs(docs):
             return "\n\n".join(doc.page_content for doc in docs)
 
-
         # Chain
         rag_chain = prompt | llm | StrOutputParser()
 
 
 
-
         ### Hallucination Grader
 
         # Data model
diff --git a/app/streamlit/Chatbot.py b/app/streamlit/Chatbot.py
index 00ae5ee..f02fb31 100644
--- a/app/streamlit/Chatbot.py
+++ b/app/streamlit/Chatbot.py
@@ -19,7 +19,7 @@ MOONSHOT_API_KEY = config("MOONSHOT_API_KEY", cast=str, default="123456")
 CHAT_MODEL_PROVIDER = config("CHAT_MODEL_PROVIDER", cast=str, default="gemini")
 INPUT_CHROMADB_LOCAL = config("INPUT_CHROMADB_LOCAL", cast=str, default="../llmops/src/rag_cot_evaluation/chroma_db")
 EMBEDDING_MODEL = config("EMBEDDING_MODEL", cast=str, default="paraphrase-multilingual-mpnet-base-v2")
-COLLECTION_NAME = config("COLLECTION_NAME", cast=str, default="rag_experiment")
+COLLECTION_NAME = config("COLLECTION_NAME", cast=str, default="rag-chroma")
 
 st.title("💬 RAG AI for Medical Guideline")
 st.caption(f"🚀 A RAG AI for Medical Guideline powered by {CHAT_MODEL_PROVIDER}")
diff --git a/app/streamlit/Dockerfile b/app/streamlit/Dockerfile
index d582af3..4d95699 100644
--- a/app/streamlit/Dockerfile
+++ b/app/streamlit/Dockerfile
@@ -2,20 +2,23 @@ FROM python:3.11-slim
 
 WORKDIR /app/streamlit
 
-COPY requirements.txt ./
+COPY Pipfile ./
 
 # RUN pip install --no-cache-dir -r requirements.txt
-RUN pip install -r requirements.txt
+# RUN pip install -r requirements.txt
+RUN pip install --upgrade pip setuptools wheel -i https://pypi.tuna.tsinghua.edu.cn/simple
+RUN pip install pipenv -i https://pypi.tuna.tsinghua.edu.cn/simple 
+RUN pipenv install --deploy
 
 COPY Chatbot.py .
 COPY .env .
 
 # Run python to initialize download of SentenceTransformer model
 COPY initialize_sentence_transformer.py .
-RUN python initialize_sentence_transformer.py
+RUN pipenv run python initialize_sentence_transformer.py
 
 COPY pages ./pages
 
 EXPOSE 8501
 
-ENTRYPOINT ["streamlit", "run", "Chatbot.py"]
\ No newline at end of file
+ENTRYPOINT ["pipenv", "run", "streamlit", "run", "Chatbot.py"]
\ No newline at end of file
diff --git a/app/streamlit/Pipfile b/app/streamlit/Pipfile
index 181cf2e..473ceba 100644
--- a/app/streamlit/Pipfile
+++ b/app/streamlit/Pipfile
@@ -4,7 +4,7 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
-streamlit = "==1.28"
+streamlit = "*"
 langchain = "*"
 duckduckgo-search = "*"
 anthropic = "*"
@@ -12,7 +12,7 @@ trubrics = "*"
 streamlit-feedback = "*"
 langchain-community = "*"
 watchdog = "*"
-mlflow = "==2.8.1"
+mlflow = "*"
 python-decouple = "*"
 langchain_google_genai = "*"
 langchain-deepseek = "*"
diff --git a/app/streamlit/Pipfile.lock b/app/streamlit/Pipfile.lock
index 298d70c..6834541 100644
--- a/app/streamlit/Pipfile.lock
+++ b/app/streamlit/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "68da9f2cf2dea795e4bb8d4f5b108a40e1fe4255c7d8dbe9233f9db6f993f876"
+            "sha256": "97652b705fea7df9b9012ec199d87df451e67beee8fc9368748e3886a0f5566f"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -10,7 +10,7 @@
         "sources": [
             {
                 "name": "pypi",
-                "url": "https://pypi.org/simple",
+                "url": "https://pypi.tuna.tsinghua.edu.cn/simple",
                 "verify_ssl": true
             }
         ]
@@ -18,11 +18,11 @@
     "default": {
         "aiohappyeyeballs": {
             "hashes": [
-                "sha256:19728772cb12263077982d2f55453babd8bec6a052a926cd5c0c42796da8bf62",
-                "sha256:6cac4f5dd6e34a9644e69cf9021ef679e4394f54e58a183056d12009e42ea9e3"
+                "sha256:0850b580748c7071db98bffff6d4c94028d0d3035acc20fd721a0ce7e8cac35d",
+                "sha256:18fde6204a76deeabc97c48bdd01d5801cfda5d6b9c8bbeb1aaaee9d648ca191"
             ],
             "markers": "python_version >= '3.9'",
-            "version": "==2.4.8"
+            "version": "==2.5.0"
         },
         "aiohttp": {
             "hashes": [
@@ -764,11 +764,11 @@
         },
         "fsspec": {
             "hashes": [
-                "sha256:1c24b16eaa0a1798afa0337aa0db9b256718ab2a89c425371f5628d22c3b6afd",
-                "sha256:9de2ad9ce1f85e1931858535bc882543171d197001a0a5eb2ddc04f1781ab95b"
+                "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972",
+                "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3"
             ],
             "markers": "python_version >= '3.8'",
-            "version": "==2025.2.0"
+            "version": "==2025.3.0"
         },
         "gitdb": {
             "hashes": [
@@ -788,22 +788,30 @@
         },
         "google-ai-generativelanguage": {
             "hashes": [
-                "sha256:494f73c44dede1fd6853e579efe590f139d0654481d2a5bdadfc415ec5351d3d",
-                "sha256:b53c736b8ebed75fe040d48740b0a15370d75e7dbc72249fb7acd2c9171bc072"
+                "sha256:5a03ef86377aa184ffef3662ca28f19eeee158733e45d7947982eb953c6ebb6c",
+                "sha256:8f6d9dc4c12b065fe2d0289026171acea5183ebf2d0b11cefe12f3821e159ec3"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==0.6.16"
+            "version": "==0.6.15"
         },
         "google-api-core": {
             "extras": [
                 "grpc"
             ],
             "hashes": [
-                "sha256:bc78d608f5a5bf853b80bd70a795f703294de656c096c0968320830a4bc280f1",
-                "sha256:f8b36f5456ab0dd99a1b693a40a31d1e7757beea380ad1b38faaf8941eae9d8a"
+                "sha256:810a63ac95f3c441b7c0e43d344e372887f62ce9071ba972eacf32672e072de9",
+                "sha256:81718493daf06d96d6bc76a91c23874dbf2fac0adbbf542831b805ee6e974696"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==2.24.1"
+            "version": "==2.24.2"
+        },
+        "google-api-python-client": {
+            "hashes": [
+                "sha256:080e8bc0669cb4c1fb8efb8da2f5b91a2625d8f0e7796cfad978f33f7016c6c4",
+                "sha256:88dee87553a2d82176e2224648bf89272d536c8f04dcdda37ef0a71473886dd7"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==2.163.0"
         },
         "google-auth": {
             "hashes": [
@@ -813,74 +821,84 @@
             "markers": "python_version >= '3.7'",
             "version": "==2.38.0"
         },
+        "google-auth-httplib2": {
+            "hashes": [
+                "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05",
+                "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d"
+            ],
+            "version": "==0.2.0"
+        },
+        "google-generativeai": {
+            "hashes": [
+                "sha256:e987b33ea6decde1e69191ddcaec6ef974458864d243de7191db50c21a7c5b82"
+            ],
+            "markers": "python_version >= '3.9'",
+            "version": "==0.8.4"
+        },
         "googleapis-common-protos": {
             "hashes": [
-                "sha256:17835fdc4fa8da1d61cfe2d4d5d57becf7c61d4112f8d81c67eaa9d7ce43042d",
-                "sha256:5a46d58af72846f59009b9c4710425b9af2139555c71837081706b213b298187"
+                "sha256:4077f27a6900d5946ee5a369fab9c8ded4c0ef1c6e880458ea2f70c14f7b70d5",
+                "sha256:e20d2d8dda87da6fe7340afbbdf4f0bcb4c8fae7e6cadf55926c31f946b0b9b1"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==1.69.0"
+            "version": "==1.69.1"
         },
         "grpcio": {
             "hashes": [
-                "sha256:0495c86a55a04a874c7627fd33e5beaee771917d92c0e6d9d797628ac40e7655",
-                "sha256:07269ff4940f6fb6710951116a04cd70284da86d0a4368fd5a3b552744511f5a",
-                "sha256:0a5c78d5198a1f0aa60006cd6eb1c912b4a1520b6a3968e677dbcba215fabb40",
-                "sha256:0ba0a173f4feacf90ee618fbc1a27956bfd21260cd31ced9bc707ef551ff7dc7",
-                "sha256:0cd430b9215a15c10b0e7d78f51e8a39d6cf2ea819fd635a7214fae600b1da27",
-                "sha256:0de706c0a5bb9d841e353f6343a9defc9fc35ec61d6eb6111802f3aa9fef29e1",
-                "sha256:17325b0be0c068f35770f944124e8839ea3185d6d54862800fc28cc2ffad205a",
-                "sha256:2394e3381071045a706ee2eeb6e08962dd87e8999b90ac15c55f56fa5a8c9597",
-                "sha256:27cc75e22c5dba1fbaf5a66c778e36ca9b8ce850bf58a9db887754593080d839",
-                "sha256:2b0d02e4b25a5c1f9b6c7745d4fa06efc9fd6a611af0fb38d3ba956786b95199",
-                "sha256:374d014f29f9dfdb40510b041792e0e2828a1389281eb590df066e1cc2b404e5",
-                "sha256:3b0f01f6ed9994d7a0b27eeddea43ceac1b7e6f3f9d86aeec0f0064b8cf50fdb",
-                "sha256:4119fed8abb7ff6c32e3d2255301e59c316c22d31ab812b3fbcbaf3d0d87cc68",
-                "sha256:412faabcc787bbc826f51be261ae5fa996b21263de5368a55dc2cf824dc5090e",
-                "sha256:4f1937f47c77392ccd555728f564a49128b6a197a05a5cd527b796d36f3387d0",
-                "sha256:5413549fdf0b14046c545e19cfc4eb1e37e9e1ebba0ca390a8d4e9963cab44d2",
-                "sha256:558c386ecb0148f4f99b1a65160f9d4b790ed3163e8610d11db47838d452512d",
-                "sha256:58ad9ba575b39edef71f4798fdb5c7b6d02ad36d47949cd381d4392a5c9cbcd3",
-                "sha256:5ea67c72101d687d44d9c56068328da39c9ccba634cabb336075fae2eab0d04b",
-                "sha256:7385b1cb064734005204bc8994eed7dcb801ed6c2eda283f613ad8c6c75cf873",
-                "sha256:7c73c42102e4a5ec76608d9b60227d917cea46dff4d11d372f64cbeb56d259d0",
-                "sha256:8058667a755f97407fca257c844018b80004ae8035565ebc2812cc550110718d",
-                "sha256:879a61bf52ff8ccacbedf534665bb5478ec8e86ad483e76fe4f729aaef867cab",
-                "sha256:880bfb43b1bb8905701b926274eafce5c70a105bc6b99e25f62e98ad59cb278e",
-                "sha256:8d1584a68d5922330025881e63a6c1b54cc8117291d382e4fa69339b6d914c56",
-                "sha256:95469d1977429f45fe7df441f586521361e235982a0b39e33841549143ae2851",
-                "sha256:9e654c4b17d07eab259d392e12b149c3a134ec52b11ecdc6a515b39aceeec898",
-                "sha256:a31d7e3b529c94e930a117b2175b2efd179d96eb3c7a21ccb0289a8ab05b645c",
-                "sha256:aa47688a65643afd8b166928a1da6247d3f46a2784d301e48ca1cc394d2ffb40",
-                "sha256:aa573896aeb7d7ce10b1fa425ba263e8dddd83d71530d1322fd3a16f31257b4a",
-                "sha256:aba19419aef9b254e15011b230a180e26e0f6864c90406fdbc255f01d83bc83c",
-                "sha256:ac073fe1c4cd856ebcf49e9ed6240f4f84d7a4e6ee95baa5d66ea05d3dd0df7f",
-                "sha256:b3c76701428d2df01964bc6479422f20e62fcbc0a37d82ebd58050b86926ef8c",
-                "sha256:b745d2c41b27650095e81dea7091668c040457483c9bdb5d0d9de8f8eb25e59f",
-                "sha256:bb491125103c800ec209d84c9b51f1c60ea456038e4734688004f377cfacc113",
-                "sha256:c1af8e15b0f0fe0eac75195992a63df17579553b0c4af9f8362cc7cc99ccddf4",
-                "sha256:c78b339869f4dbf89881e0b6fbf376313e4f845a42840a7bdf42ee6caed4b11f",
-                "sha256:cb5277db254ab7586769e490b7b22f4ddab3876c490da0a1a9d7c695ccf0bf77",
-                "sha256:cbce24409beaee911c574a3d75d12ffb8c3e3dd1b813321b1d7a96bbcac46bf4",
-                "sha256:cd24d2d9d380fbbee7a5ac86afe9787813f285e684b0271599f95a51bce33528",
-                "sha256:ce7df14b2dcd1102a2ec32f621cc9fab6695effef516efbc6b063ad749867295",
-                "sha256:d24035d49e026353eb042bf7b058fb831db3e06d52bee75c5f2f3ab453e71aca",
-                "sha256:d405b005018fd516c9ac529f4b4122342f60ec1cee181788249372524e6db429",
-                "sha256:d63764963412e22f0491d0d32833d71087288f4e24cbcddbae82476bfa1d81fd",
-                "sha256:dbe41ad140df911e796d4463168e33ef80a24f5d21ef4d1e310553fcd2c4a386",
-                "sha256:dfa089a734f24ee5f6880c83d043e4f46bf812fcea5181dcb3a572db1e79e01c",
-                "sha256:e27585831aa6b57b9250abaf147003e126cd3a6c6ca0c531a01996f31709bed1",
-                "sha256:e7831a0fc1beeeb7759f737f5acd9fdcda520e955049512d68fda03d91186eea",
-                "sha256:ed9718f17fbdb472e33b869c77a16d0b55e166b100ec57b016dc7de9c8d236bf",
-                "sha256:ef4c14508299b1406c32bdbb9fb7b47612ab979b04cf2b27686ea31882387cff",
-                "sha256:f19375f0300b96c0117aca118d400e76fede6db6e91f3c34b7b035822e06c35f",
-                "sha256:f2af68a6f5c8f78d56c145161544ad0febbd7479524a59c16b3e25053f39c87f",
-                "sha256:f32090238b720eb585248654db8e3afc87b48d26ac423c8dde8334a232ff53c9",
-                "sha256:fe9dbd916df3b60e865258a8c72ac98f3ac9e2a9542dcb72b7a34d236242a5ce",
-                "sha256:ff4a8112a79464919bb21c18e956c54add43ec9a4850e3949da54f61c241a4a6"
+                "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea",
+                "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7",
+                "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537",
+                "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b",
+                "sha256:20e8f653abd5ec606be69540f57289274c9ca503ed38388481e98fa396ed0b41",
+                "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366",
+                "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b",
+                "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c",
+                "sha256:39983a9245d37394fd59de71e88c4b295eb510a3555e0a847d9965088cdbd033",
+                "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3",
+                "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79",
+                "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29",
+                "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7",
+                "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e",
+                "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67",
+                "sha256:63e41b91032f298b3e973b3fa4093cbbc620c875e2da7b93e249d4728b54559a",
+                "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8",
+                "sha256:693bc706c031aeb848849b9d1c6b63ae6bcc64057984bb91a542332b75aa4c3d",
+                "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb",
+                "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3",
+                "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4",
+                "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a",
+                "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3",
+                "sha256:8700a2a57771cc43ea295296330daaddc0d93c088f0a35cc969292b6db959bf3",
+                "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509",
+                "sha256:9182e0063112e55e74ee7584769ec5a0b4f18252c35787f48738627e23a62b97",
+                "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6",
+                "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b",
+                "sha256:a371e6b6a5379d3692cc4ea1cb92754d2a47bdddeee755d3203d1f84ae08e03e",
+                "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637",
+                "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a",
+                "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d",
+                "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7",
+                "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd",
+                "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69",
+                "sha256:c6a0a28450c16809f94e0b5bfe52cabff63e7e4b97b44123ebf77f448534d07d",
+                "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379",
+                "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7",
+                "sha256:d35a95f05a8a2cbe8e02be137740138b3b2ea5f80bd004444e4f9a1ffc511e32",
+                "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c",
+                "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef",
+                "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444",
+                "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec",
+                "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594",
+                "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804",
+                "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7",
+                "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73",
+                "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5",
+                "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db",
+                "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db",
+                "sha256:f9c30c464cb2ddfbc2ddf9400287701270fdc0f14be5f08a1e3939f1e749b455"
             ],
-            "markers": "python_version >= '3.8'",
-            "version": "==1.70.0"
+            "markers": "python_version >= '3.9'",
+            "version": "==1.71.0"
         },
         "grpcio-status": {
             "hashes": [
@@ -914,6 +932,14 @@
             "markers": "python_version >= '3.8'",
             "version": "==1.0.7"
         },
+        "httplib2": {
+            "hashes": [
+                "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc",
+                "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==0.22.0"
+        },
         "httptools": {
             "hashes": [
                 "sha256:0614154d5454c21b6410fdf5262b4a3ddb0f53f1e1721cfd59d55f32138c578a",
@@ -981,11 +1007,11 @@
         },
         "huggingface-hub": {
             "hashes": [
-                "sha256:590b29c0dcbd0ee4b7b023714dc1ad8563fe4a68a91463438b74e980d28afaf3",
-                "sha256:c56f20fca09ef19da84dcde2b76379ecdaddf390b083f59f166715584953307d"
+                "sha256:0b25710932ac649c08cdbefa6c6ccb8e88eef82927cacdb048efb726429453aa",
+                "sha256:64519a25716e0ba382ba2d3fb3ca082e7c7eb4a2fc634d200e8380006e0760e5"
             ],
             "markers": "python_full_version >= '3.8.0'",
-            "version": "==0.29.2"
+            "version": "==0.29.3"
         },
         "humanfriendly": {
             "hashes": [
@@ -1029,93 +1055,93 @@
         },
         "jinja2": {
             "hashes": [
-                "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb",
-                "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"
+                "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d",
+                "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==3.1.5"
+            "version": "==3.1.6"
         },
         "jiter": {
             "hashes": [
-                "sha256:025337859077b41548bdcbabe38698bcd93cfe10b06ff66617a48ff92c9aec60",
-                "sha256:03c9df035d4f8d647f8c210ddc2ae0728387275340668fb30d2421e17d9a0841",
-                "sha256:08d4c92bf480e19fc3f2717c9ce2aa31dceaa9163839a311424b6862252c943e",
-                "sha256:0cf5dfa9956d96ff2efb0f8e9c7d055904012c952539a774305aaaf3abdf3d6c",
-                "sha256:14601dcac4889e0a1c75ccf6a0e4baf70dbc75041e51bcf8d0e9274519df6887",
-                "sha256:180a8aea058f7535d1c84183c0362c710f4750bef66630c05f40c93c2b152a0f",
-                "sha256:1c0dfbd1be3cbefc7510102370d86e35d1d53e5a93d48519688b1bf0f761160a",
-                "sha256:2dd61c5afc88a4fda7d8b2cf03ae5947c6ac7516d32b7a15bf4b49569a5c076b",
-                "sha256:317b25e98a35ffec5c67efe56a4e9970852632c810d35b34ecdd70cc0e47b3b6",
-                "sha256:32475a42b2ea7b344069dc1e81445cfc00b9d0e3ca837f0523072432332e9f74",
-                "sha256:37b2998606d6dadbb5ccda959a33d6a5e853252d921fec1792fc902351bb4e2c",
-                "sha256:3ac9f578c46f22405ff7f8b1f5848fb753cc4b8377fbec8470a7dc3997ca7566",
-                "sha256:3b94a33a241bee9e34b8481cdcaa3d5c2116f575e0226e421bed3f7a6ea71cff",
-                "sha256:4a9220497ca0cb1fe94e3f334f65b9b5102a0b8147646118f020d8ce1de70105",
-                "sha256:4ab9a87f3784eb0e098f84a32670cfe4a79cb6512fd8f42ae3d0709f06405d18",
-                "sha256:5127dc1abd809431172bc3fbe8168d6b90556a30bb10acd5ded41c3cfd6f43b6",
-                "sha256:5672a86d55416ccd214c778efccf3266b84f87b89063b582167d803246354be4",
-                "sha256:580ccf358539153db147e40751a0b41688a5ceb275e6f3e93d91c9467f42b2e3",
-                "sha256:58dc9bc9767a1101f4e5e22db1b652161a225874d66f0e5cb8e2c7d1c438b587",
-                "sha256:5a90a923338531b7970abb063cfc087eebae6ef8ec8139762007188f6bc69a9f",
-                "sha256:653cf462db4e8c41995e33d865965e79641ef45369d8a11f54cd30888b7e6ff1",
-                "sha256:66227a2c7b575720c1871c8800d3a0122bb8ee94edb43a5685aa9aceb2782d44",
-                "sha256:6e5337bf454abddd91bd048ce0dca5134056fc99ca0205258766db35d0a2ea43",
-                "sha256:70bf4c43652cc294040dbb62256c83c8718370c8b93dd93d934b9a7bf6c4f53c",
-                "sha256:711e408732d4e9a0208008e5892c2966b485c783cd2d9a681f3eb147cf36c7ef",
-                "sha256:76e324da7b5da060287c54f2fabd3db5f76468006c811831f051942bf68c9d44",
-                "sha256:789361ed945d8d42850f919342a8665d2dc79e7e44ca1c97cc786966a21f627a",
-                "sha256:79aec8172b9e3c6d05fd4b219d5de1ac616bd8da934107325a6c0d0e866a21b6",
-                "sha256:7efe4853ecd3d6110301665a5178b9856be7e2a9485f49d91aa4d737ad2ae49e",
-                "sha256:7f22b16b35d5c1df9dfd58843ab2cd25e6bf15191f5a236bed177afade507bfc",
-                "sha256:83c0efd80b29695058d0fd2fa8a556490dbce9804eac3e281f373bbc99045f6c",
-                "sha256:859e8eb3507894093d01929e12e267f83b1d5f6221099d3ec976f0c995cb6bd9",
-                "sha256:8b9931fd36ee513c26b5bf08c940b0ac875de175341cbdd4fa3be109f0492586",
-                "sha256:8bd2a824d08d8977bb2794ea2682f898ad3d8837932e3a74937e93d62ecbb637",
-                "sha256:8f2d5ed877f089862f4c7aacf3a542627c1496f972a34d0474ce85ee7d939c27",
-                "sha256:8ffc86ae5e3e6a93765d49d1ab47b6075a9c978a2b3b80f0f32628f39caa0c88",
-                "sha256:92249669925bc1c54fcd2ec73f70f2c1d6a817928480ee1c65af5f6b81cdf12d",
-                "sha256:99d9a1eded738299ba8e106c6779ce5c3893cffa0e32e4485d680588adae6db8",
-                "sha256:9c63eaef32b7bebac8ebebf4dabebdbc6769a09c127294db6babee38e9f405b9",
-                "sha256:9e1fa156ee9454642adb7e7234a383884452532bc9d53d5af2d18d98ada1d79c",
-                "sha256:a2ecaa3c23e7a7cf86d00eda3390c232f4d533cd9ddea4b04f5d0644faf642c5",
-                "sha256:a6c710d657c8d1d2adbbb5c0b0c6bfcec28fd35bd6b5f016395f9ac43e878a15",
-                "sha256:a9584de0cd306072635fe4b89742bf26feae858a0683b399ad0c2509011b9dc0",
-                "sha256:ab7f43235d71e03b941c1630f4b6e3055d46b6cb8728a17663eaac9d8e83a865",
-                "sha256:af102d3372e917cffce49b521e4c32c497515119dc7bd8a75665e90a718bbf08",
-                "sha256:b25bd626bde7fb51534190c7e3cb97cee89ee76b76d7585580e22f34f5e3f393",
-                "sha256:b2dd880785088ff2ad21ffee205e58a8c1ddabc63612444ae41e5e4b321b39c0",
-                "sha256:b426f72cd77da3fec300ed3bc990895e2dd6b49e3bfe6c438592a3ba660e41ca",
-                "sha256:ba5bdf56969cad2019d4e8ffd3f879b5fdc792624129741d3d83fc832fef8c7d",
-                "sha256:bf55846c7b7a680eebaf9c3c48d630e1bf51bdf76c68a5f654b8524335b0ad29",
-                "sha256:ca1f08b8e43dc3bd0594c992fb1fd2f7ce87f7bf0d44358198d6da8034afdf84",
-                "sha256:ca29b6371ebc40e496995c94b988a101b9fbbed48a51190a4461fcb0a68b4a36",
-                "sha256:ca8577f6a413abe29b079bc30f907894d7eb07a865c4df69475e868d73e71c7b",
-                "sha256:cadcc978f82397d515bb2683fc0d50103acff2a180552654bb92d6045dec2c49",
-                "sha256:cd646c827b4f85ef4a78e4e58f4f5854fae0caf3db91b59f0d73731448a970c6",
-                "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d",
-                "sha256:cde031d8413842a1e7501e9129b8e676e62a657f8ec8166e18a70d94d4682855",
-                "sha256:ce0820f4a3a59ddced7fce696d86a096d5cc48d32a4183483a17671a61edfddc",
-                "sha256:d20be8b7f606df096e08b0b1b4a3c6f0515e8dac296881fe7461dfa0fb5ec817",
-                "sha256:d21974d246ed0181558087cd9f76e84e8321091ebfb3a93d4c341479a736f099",
-                "sha256:d33f94615fcaf872f7fd8cd98ac3b429e435c77619777e8a449d9d27e01134d1",
-                "sha256:d35c864c2dff13dfd79fb070fc4fc6235d7b9b359efe340e1261deb21b9fcb66",
-                "sha256:d5c826a221851a8dc028eb6d7d6429ba03184fa3c7e83ae01cd6d3bd1d4bd17d",
-                "sha256:e41e75344acef3fc59ba4765df29f107f309ca9e8eace5baacabd9217e52a5ee",
-                "sha256:e52bf98c7e727dd44f7c4acb980cb988448faeafed8433c867888268899b298b",
-                "sha256:e6ec2be506e7d6f9527dae9ff4b7f54e68ea44a0ef6b098256ddf895218a2f8f",
-                "sha256:e725edd0929fa79f8349ab4ec7f81c714df51dc4e991539a578e5018fa4a7152",
-                "sha256:eaa58399c01db555346647a907b4ef6d4f584b123943be6ed5588c3f2359c9f4",
-                "sha256:eb21aaa9a200d0a80dacc7a81038d2e476ffe473ffdd9c91eb745d623561de05",
-                "sha256:ecff0dc14f409599bbcafa7e470c00b80f17abc14d1405d38ab02e4b42e55b57",
-                "sha256:f557c55bc2b7676e74d39d19bcb8775ca295c7a028246175d6a8b431e70835e5",
-                "sha256:f7200b8f7619d36aa51c803fd52020a2dfbea36ffec1b5e22cab11fd34d95a6d",
-                "sha256:f9d471356dc16f84ed48768b8ee79f29514295c7295cb41e1133ec0b2b8d637d",
-                "sha256:fc5adda618205bd4678b146612ce44c3cbfdee9697951f2c0ffdef1f26d72b63",
-                "sha256:fc9043259ee430ecd71d178fccabd8c332a3bf1e81e50cae43cc2b28d19e4cb7",
-                "sha256:ffd9fee7d0775ebaba131f7ca2e2d83839a62ad65e8e02fe2bd8fc975cedeb9e"
+                "sha256:04e8ffa3c353b1bc4134f96f167a2082494351e42888dfcf06e944f2729cbe1d",
+                "sha256:062b756ceb1d40b0b28f326cba26cfd575a4918415b036464a52f08632731e5a",
+                "sha256:099500d07b43f61d8bd780466d429c45a7b25411b334c60ca875fa775f68ccb0",
+                "sha256:0c058ecb51763a67f019ae423b1cbe3fa90f7ee6280c31a1baa6ccc0c0e2d06e",
+                "sha256:113f30f87fb1f412510c6d7ed13e91422cfd329436364a690c34c8b8bd880c42",
+                "sha256:11509bfecbc319459647d4ac3fd391d26fdf530dad00c13c4dadabf5b81f01a4",
+                "sha256:1339f839b91ae30b37c409bf16ccd3dc453e8b8c3ed4bd1d6a567193651a4a51",
+                "sha256:1537a890724ba00fdba21787010ac6f24dad47f763410e9e1093277913592784",
+                "sha256:161d461dcbe658cf0bd0aa375b30a968b087cdddc624fc585f3867c63c6eca95",
+                "sha256:17f5d55eb856597607562257c8e36c42bc87f16bef52ef7129b7da11afc779f3",
+                "sha256:1fd19112d1049bdd47f17bfbb44a2c0001061312dcf0e72765bfa8abd4aa30e5",
+                "sha256:203f28a72a05ae0e129b3ed1f75f56bc419d5f91dfacd057519a8bd137b00c42",
+                "sha256:2221176dfec87f3470b21e6abca056e6b04ce9bff72315cb0b243ca9e835a4b5",
+                "sha256:2228eaaaa111ec54b9e89f7481bffb3972e9059301a878d085b2b449fbbde635",
+                "sha256:237e5cee4d5d2659aaf91bbf8ec45052cc217d9446070699441a91b386ae27dc",
+                "sha256:2685f44bf80e95f8910553bf2d33b9c87bf25fceae6e9f0c1355f75d2922b0ee",
+                "sha256:2764891d3f3e8b18dce2cff24949153ee30c9239da7c00f032511091ba688ff7",
+                "sha256:27cd1f2e8bb377f31d3190b34e4328d280325ad7ef55c6ac9abde72f79e84d2e",
+                "sha256:2e89dc075c1fef8fa9be219e249f14040270dbc507df4215c324a1839522ea75",
+                "sha256:351f4c90a24c4fb8c87c6a73af2944c440494ed2bea2094feecacb75c50398ae",
+                "sha256:387b22fbfd7a62418d5212b4638026d01723761c75c1c8232a8b8c37c2f1003b",
+                "sha256:3c7adb66f899ffa25e3c92bfcb593391ee1947dbdd6a9a970e0d7e713237d572",
+                "sha256:3f22238da568be8bbd8e0650e12feeb2cfea15eda4f9fc271d3b362a4fa0604d",
+                "sha256:40d8da8629ccae3606c61d9184970423655fb4e33d03330bcdfe52d234d32f69",
+                "sha256:42f8a68a69f047b310319ef8e2f52fdb2e7976fb3313ef27df495cf77bcad965",
+                "sha256:4a2d16360d0642cd68236f931b85fe50288834c383492e4279d9f1792e309571",
+                "sha256:4feafe787eb8a8d98168ab15637ca2577f6ddf77ac6c8c66242c2d028aa5420e",
+                "sha256:51c4e1a4f8ea84d98b7b98912aa4290ac3d1eabfde8e3c34541fae30e9d1f08b",
+                "sha256:528b6b71745e7326eed73c53d4aa57e2a522242320b6f7d65b9c5af83cf49b6e",
+                "sha256:5cfc7d0a8e899089d11f065e289cb5b2daf3d82fbe028f49b20d7b809193958d",
+                "sha256:5f4c677c424dc76684fea3e7285a7a2a7493424bea89ac441045e6a1fb1d7b3b",
+                "sha256:609cf3c78852f1189894383cf0b0b977665f54cb38788e3e6b941fa6d982c00e",
+                "sha256:699edfde481e191d81f9cf6d2211debbfe4bd92f06410e7637dffb8dd5dfde06",
+                "sha256:6a99bed9fbb02f5bed416d137944419a69aa4c423e44189bc49718859ea83bc5",
+                "sha256:6c4d99c71508912a7e556d631768dcdef43648a93660670986916b297f1c54af",
+                "sha256:6ef5da104664e526836070e4a23b5f68dec1cc673b60bf1edb1bfbe8a55d0678",
+                "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf",
+                "sha256:7825f46e50646bee937e0f849d14ef3a417910966136f59cd1eb848b8b5bb3e4",
+                "sha256:7a9aaa5102dba4e079bb728076fadd5a2dca94c05c04ce68004cfd96f128ea34",
+                "sha256:7b46249cfd6c48da28f89eb0be3f52d6fdb40ab88e2c66804f546674e539ec11",
+                "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad",
+                "sha256:8793b6df019b988526f5a633fdc7456ea75e4a79bd8396a3373c371fc59f5c9b",
+                "sha256:8f60fb8ce7df529812bf6c625635a19d27f30806885139e367af93f6e734ef58",
+                "sha256:923b54afdd697dfd00d368b7ccad008cccfeb1efb4e621f32860c75e9f25edbd",
+                "sha256:9872aeff3f21e437651df378cb75aeb7043e5297261222b6441a620218b58708",
+                "sha256:9897115ad716c48f0120c1f0c4efae348ec47037319a6c63b2d7838bb53aaef4",
+                "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea",
+                "sha256:9ef340fae98065071ccd5805fe81c99c8f80484e820e40043689cf97fb66b3e2",
+                "sha256:9f3c848209ccd1bfa344a1240763975ca917de753c7875c77ec3034f4151d06c",
+                "sha256:9f48e86b57bc711eb5acdfd12b6cb580a59cc9a993f6e7dcb6d8b50522dcd50d",
+                "sha256:a1be73d8982bdc278b7b9377426a4b44ceb5c7952073dd7488e4ae96b88e1103",
+                "sha256:a25519efb78a42254d59326ee417d6f5161b06f5da827d94cf521fed961b1ff2",
+                "sha256:a7954a401d0a8a0b8bc669199db78af435aae1e3569187c2939c477c53cb6a0a",
+                "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893",
+                "sha256:c0194f813efdf4b8865ad5f5c5f50f8566df7d770a82c51ef593d09e0b347020",
+                "sha256:c43ca669493626d8672be3b645dbb406ef25af3f4b6384cfd306da7eb2e70322",
+                "sha256:c8ae3bf27cd1ac5e6e8b7a27487bf3ab5f82318211ec2e1346a5b058756361f7",
+                "sha256:c98d27330fdfb77913c1097a7aab07f38ff2259048949f499c9901700789ac15",
+                "sha256:cb12e6d65ebbefe5518de819f3eda53b73187b7089040b2d17f5b39001ff31c4",
+                "sha256:d1d9870561eb26b11448854dce0ff27a9a27cb616b632468cafc938de25e9e51",
+                "sha256:d45807b0f236c485e1e525e2ce3a854807dfe28ccf0d013dd4a563395e28008a",
+                "sha256:d726a3890a54561e55a9c5faea1f7655eda7f105bd165067575ace6e65f80bb2",
+                "sha256:d82a811928b26d1a6311a886b2566f68ccf2b23cf3bfed042e18686f1f22c2d7",
+                "sha256:d838650f6ebaf4ccadfb04522463e74a4c378d7e667e0eb1865cfe3990bfac49",
+                "sha256:dd5ab5ddc11418dce28343123644a100f487eaccf1de27a459ab36d6cca31043",
+                "sha256:e00a1a2bbfaaf237e13c3d1592356eab3e9015d7efd59359ac8b51eb56390a12",
+                "sha256:e057adb0cd1bd39606100be0eafe742de2de88c79df632955b9ab53a086b3c8d",
+                "sha256:e3630ec20cbeaddd4b65513fa3857e1b7c4190d4481ef07fb63d0fad59033321",
+                "sha256:e84ed1c9c9ec10bbb8c37f450077cbe3c0d4e8c2b19f0a49a60ac7ace73c7452",
+                "sha256:e8b36d8a16a61993be33e75126ad3d8aa29cf450b09576f3c427d27647fcb4aa",
+                "sha256:eda3f8cc74df66892b1d06b5d41a71670c22d95a1ca2cbab73654745ce9d0419",
+                "sha256:efb767d92c63b2cd9ec9f24feeb48f49574a713870ec87e9ba0c2c6e9329c3e2",
+                "sha256:f0b2827fb88dda2cbecbbc3e596ef08d69bda06c6f57930aec8e79505dc17001",
+                "sha256:f7e6850991f3940f62d387ccfa54d1a92bd4bb9f89690b53aea36b4364bcab53",
+                "sha256:fca1a02ad60ec30bb230f65bc01f611c8608b02d269f998bc29cca8619a919dc",
+                "sha256:ffba79584b3b670fefae66ceb3a28822365d25b7bf811e030609a3d5b876f538"
             ],
             "markers": "python_version >= '3.8'",
-            "version": "==0.8.2"
+            "version": "==0.9.0"
         },
         "joblib": {
             "hashes": [
@@ -1271,11 +1297,11 @@
         },
         "langchain-core": {
             "hashes": [
-                "sha256:1a27cca5333bae7597de4004fb634b5f3e71667a3da6493b94ce83bcf15a23bd",
-                "sha256:d3ee9f3616ebbe7943470ade23d4a04e1729b1512c0ec55a4a07bd2ac64dedb4"
+                "sha256:bec60f4f5665b536434ff747b8f23375a812e82cfa529f519b54cc1e7a94a875",
+                "sha256:caa6bc1f4c6ab71d3c2e400f8b62e1cd6dc5ac2c37e03f12f3e2c60befd5b273"
             ],
             "markers": "python_version >= '3.9' and python_version < '4.0'",
-            "version": "==0.3.41"
+            "version": "==0.3.43"
         },
         "langchain-deepseek": {
             "hashes": [
@@ -1288,20 +1314,20 @@
         },
         "langchain-google-genai": {
             "hashes": [
-                "sha256:c98b18524a78fcc7084ba5ac69ea6a1a69b0b693255de68245b98bbbc3f08e87",
-                "sha256:ff6997eee872f0732036129173f4c1740b03fbb1f13251805d51d6c08bf8b34d"
+                "sha256:964a7542fd11fdec7592052b4eaef383227f7c4fa4d754a455e4bf0634f4ad28",
+                "sha256:b51067b468853856f275bb7b1a85dbaf4467b59fe67e35fcd614fc0d744c810e"
             ],
             "index": "pypi",
             "markers": "python_version >= '3.9' and python_version < '4.0'",
-            "version": "==2.0.11"
+            "version": "==2.0.10"
         },
         "langchain-openai": {
             "hashes": [
-                "sha256:0aefc7bdf8e7398d41e09c4313cace816df6438f2aa93d34f79523487310f0da",
-                "sha256:b8b51a3aaa1cc3bda060651ea41145f7728219e8a7150b5404fb1e8446de9cef"
+                "sha256:4d73727eda8102d1d07a2ca036278fccab0bb5e0abf353cec9c3973eb72550ec",
+                "sha256:9004dc8ef853aece0d8f0feca7753dc97f710fa3e53874c8db66466520436dbb"
             ],
             "markers": "python_version >= '3.9' and python_version < '4.0'",
-            "version": "==0.3.7"
+            "version": "==0.3.8"
         },
         "langchain-text-splitters": {
             "hashes": [
@@ -1313,11 +1339,11 @@
         },
         "langsmith": {
             "hashes": [
-                "sha256:0cca22737ef07d3b038a437c141deda37e00add56022582680188b681bec095e",
-                "sha256:ddf29d24352e99de79c9618aaf95679214324e146c5d3d9475a7ddd2870018b1"
+                "sha256:14014058cff408772acb93344e03cb64174837292d5f1ae09b2c8c1d8df45e92",
+                "sha256:73aaf52bbc293b9415fff4f6dad68df40658081eb26c9cb2c7bd1ff57cedd695"
             ],
             "markers": "python_version >= '3.9' and python_version < '4.0'",
-            "version": "==0.3.11"
+            "version": "==0.3.13"
         },
         "lxml": {
             "hashes": [
@@ -1828,11 +1854,11 @@
         },
         "narwhals": {
             "hashes": [
-                "sha256:1021c345d56c66ff0cc8e6d03ca8c543d01ffc411630973a5cb69ee86824d823",
-                "sha256:653aa8e5eb435816e7b50c8def17e7e5e3324c2ffd8a3eec03fef85792e9cf5e"
+                "sha256:0c50cc67a5404da501302882838ec17dce51703d22cd8ad89162d6f60ea0bb19",
+                "sha256:443aa0a1abfae89bc65a6b888a7e310a03d1818bfb2ccd61c150199a5f954c17"
             ],
             "markers": "python_version >= '3.8'",
-            "version": "==1.29.0"
+            "version": "==1.30.0"
         },
         "networkx": {
             "hashes": [
@@ -1894,37 +1920,35 @@
         },
         "onnxruntime": {
             "hashes": [
-                "sha256:06bfbf02ca9ab5f28946e0f912a562a5f005301d0c419283dc57b3ed7969bb7b",
-                "sha256:0df6f2df83d61f46e842dbcde610ede27218947c33e994545a22333491e72a3b",
-                "sha256:19c2d843eb074f385e8bbb753a40df780511061a63f9def1b216bf53860223fb",
-                "sha256:22b0655e2bf4f2161d52706e31f517a0e54939dc393e92577df51808a7edc8c9",
-                "sha256:4c4b251a725a3b8cf2aab284f7d940c26094ecd9d442f07dd81ab5470e99b83f",
-                "sha256:5eec64c0269dcdb8d9a9a53dc4d64f87b9e0c19801d9321246a53b7eb5a7d1bc",
-                "sha256:7b2908b50101a19e99c4d4e97ebb9905561daf61829403061c1adc1b588bc0de",
-                "sha256:8508887eb1c5f9537a4071768723ec7c30c28eb2518a00d0adcd32c89dea3221",
-                "sha256:a19bc6e8c70e2485a1725b3d517a2319603acc14c1f1a017dda0afe6d4665b41",
-                "sha256:bb71a814f66517a65628c9e4a2bb530a6edd2cd5d87ffa0af0f6f773a027d99e",
-                "sha256:bd386cc9ee5f686ee8a75ba74037750aca55183085bf1941da8efcfe12d5b120",
-                "sha256:bda6aebdf7917c1d811f21d41633df00c58aff2bef2f598f69289c1f1dabc4b3",
-                "sha256:c9158465745423b2b5d97ed25aa7740c7d38d2993ee2e5c3bfacb0c4145c49d8",
-                "sha256:cc01437a32d0042b606f462245c8bbae269e5442797f6213e36ce61d5abdd8cc",
-                "sha256:d30367df7e70f1d9fc5a6a68106f5961686d39b54d3221f760085524e8d38e16",
-                "sha256:d3b616bb53a77a9463707bb313637223380fc327f5064c9a782e8ec69c22e6a2",
-                "sha256:d82daaec24045a2e87598b8ac2b417b1cce623244e80e663882e9fe1aae86410",
-                "sha256:e50ba5ff7fed4f7d9253a6baf801ca2883cc08491f9d32d78a80da57256a5439",
-                "sha256:f1f56e898815963d6dc4ee1c35fc6c36506466eff6d16f3cb9848cea4e8c8172",
-                "sha256:f6243e34d74423bdd1edf0ae9596dd61023b260f546ee17d701723915f06a9f7",
-                "sha256:fb44b08e017a648924dbe91b82d89b0c105b1adcfe31e90d1dc06b8677ad37be"
+                "sha256:19b630c6a8956ef97fb7c94948b17691167aa1aaf07b5f214fa66c3e4136c108",
+                "sha256:1d970dff1e2fa4d9c53f2787b3b7d0005596866e6a31997b41169017d1362dd0",
+                "sha256:36b18b8f39c0f84e783902112a0dd3c102466897f96d73bb83f6a6bff283a423",
+                "sha256:37b7445c920a96271a8dfa16855e258dc5599235b41c7bbde0d262d55bcc105f",
+                "sha256:3995c4a2d81719623c58697b9510f8de9fa42a1da6b4474052797b0d712324fe",
+                "sha256:635d4ab13ae0f150dd4c6ff8206fd58f1c6600636ecc796f6f0c42e4c918585b",
+                "sha256:7d06bfa0dd5512bd164f25a2bf594b2e7c9eabda6fc064b684924f3e81bdab1b",
+                "sha256:7f801318476cd7003d636a5b392f7a37c08b6c8d2f829773f3c3887029e03f32",
+                "sha256:7f9156cf6f8ee133d07a751e6518cf6f84ed37fbf8243156bd4a2c4ee6e073c8",
+                "sha256:85718cbde1c2912d3a03e3b3dc181b1480258a229c32378408cace7c450f7f23",
+                "sha256:893d67c68ca9e7a58202fa8d96061ed86a5815b0925b5a97aef27b8ba246a20b",
+                "sha256:8a5d09815a9e209fa0cb20c2985b34ab4daeba7aea94d0f96b8751eb10403201",
+                "sha256:8e16f8a79df03919810852fb46ffcc916dc87a9e9c6540a58f20c914c575678c",
+                "sha256:94dff3a61538f3b7b0ea9a06bc99e1410e90509c76e3a746f039e417802a12ae",
+                "sha256:95513c9302bc8dd013d84148dcf3168e782a80cdbf1654eddc948a23147ccd3d",
+                "sha256:9a04aafb802c1e5573ba4552f8babcb5021b041eb4cfa802c9b7644ca3510eca",
+                "sha256:b0fc22d219791e0284ee1d9c26724b8ee3fbdea28128ef25d9507ad3b9621f23",
+                "sha256:c1e704b0eda5f2bbbe84182437315eaec89a450b08854b5a7762c85d04a28a0a"
             ],
-            "version": "==1.20.1"
+            "markers": "python_version >= '3.10'",
+            "version": "==1.21.0"
         },
         "openai": {
             "hashes": [
-                "sha256:9b7cd8f79140d03d77f4ed8aeec6009be5dcd79bbc02f03b0e8cd83356004f71",
-                "sha256:a155fa5d60eccda516384d3d60d923e083909cc126f383fe4a350f79185c232a"
+                "sha256:17d39096bbcaf6c86580244b493a59e16613460147f0ba5ab6e608cdb6628149",
+                "sha256:5948a504e7b4003d921cfab81273813793a31c25b1d7b605797c01757e0141f1"
             ],
             "markers": "python_version >= '3.8'",
-            "version": "==1.65.3"
+            "version": "==1.65.5"
         },
         "opentelemetry-api": {
             "hashes": [
@@ -2243,10 +2267,10 @@
         },
         "posthog": {
             "hashes": [
-                "sha256:6865104b7cf3a5b13949e2bc2aab9b37b5fbf5f9e045fa55b9eabe21b3850200",
-                "sha256:ce115b8422f26c57cd4143499115b741f5683c93d0b5b87bab391579aaef084b"
+                "sha256:7abaa1f0fbcdde8f1c193f744fdc26de852c13a80b5f527c6eeba8516b20df76",
+                "sha256:b879bc257de287ea91a9545bab1a3d09ba22586f3c0370ef210e06631c4929bc"
             ],
-            "version": "==3.18.1"
+            "version": "==3.19.1"
         },
         "primp": {
             "hashes": [
@@ -2369,11 +2393,11 @@
         },
         "proto-plus": {
             "hashes": [
-                "sha256:6e93d5f5ca267b54300880fff156b6a3386b3fa3f43b1da62e680fc0c586ef22",
-                "sha256:bf2dfaa3da281fc3187d12d224c707cb57214fb2c22ba854eb0c105a3fb2d4d7"
+                "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66",
+                "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==1.26.0"
+            "version": "==1.26.1"
         },
         "protobuf": {
             "hashes": [
@@ -3092,11 +3116,11 @@
         },
         "setuptools": {
             "hashes": [
-                "sha256:4880473a969e5f23f2a2be3646b2dfd84af9028716d398e46192f84bc36900d2",
-                "sha256:558e47c15f1811c1fa7adbd0096669bf76c1d3f433f58324df69f3f5ecac4e8f"
+                "sha256:199466a166ff664970d0ee145839f5582cb9bca7a0a3a2e795b6a9cb2308e9c6",
+                "sha256:43b4ee60e10b0d0ee98ad11918e114c70701bc6051662a9a675a0496c1a158f4"
             ],
             "markers": "python_version >= '3.9'",
-            "version": "==75.8.2"
+            "version": "==76.0.0"
         },
         "shellingham": {
             "hashes": [
@@ -3203,11 +3227,11 @@
         },
         "starlette": {
             "hashes": [
-                "sha256:913f0798bd90ba90a9156383bcf1350a17d6259451d0d8ee27fc0cf2db609038",
-                "sha256:b359e4567456b28d473d0193f34c0de0ed49710d75ef183a74a5ce0499324f50"
+                "sha256:3c88d58ee4bd1bb807c0d1acb381838afc7752f9ddaec81bbe4383611d833230",
+                "sha256:77c74ed9d2720138b25875133f3a2dae6d854af2ec37dceb56aef370c1d8a227"
             ],
             "markers": "python_version >= '3.9'",
-            "version": "==0.46.0"
+            "version": "==0.46.1"
         },
         "streamlit": {
             "hashes": [
@@ -3426,11 +3450,19 @@
         },
         "tzlocal": {
             "hashes": [
-                "sha256:2fafbfc07e9d8b49ade18f898d6bcd37ae88ce3ad6486842a2e4f03af68323d2",
-                "sha256:3814135a1bb29763c6e4f08fd6e41dbb435c7a60bfbb03270211bcc537187d8c"
+                "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd",
+                "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d"
             ],
             "markers": "python_version >= '3.9'",
-            "version": "==5.3"
+            "version": "==5.3.1"
+        },
+        "uritemplate": {
+            "hashes": [
+                "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0",
+                "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==4.1.1"
         },
         "urllib3": {
             "hashes": [
@@ -3626,78 +3658,78 @@
         },
         "websockets": {
             "hashes": [
-                "sha256:0e389efe46ccb25a1f93d08c7a74e8123a2517f7b7458f043bd7529d1a63ffeb",
-                "sha256:0f2205cdb444a42a7919690238fb5979a05439b9dbb73dd47c863d39640d85ab",
-                "sha256:10552fed076757a70ba2c18edcbc601c7637b30cdfe8c24b65171e824c7d6081",
-                "sha256:110a847085246ab8d4d119632145224d6b49e406c64f1bbeed45c6f05097b680",
-                "sha256:1206432cc6c644f6fc03374b264c5ff805d980311563202ed7fef91a38906276",
-                "sha256:1657a9eecb29d7838e3b415458cc494e6d1b194f7ac73a34aa55c6fb6c72d1f3",
-                "sha256:17f2854c6bd9ee008c4b270f7010fe2da6c16eac5724a175e75010aacd905b31",
-                "sha256:190bc6ef8690cd88232a038d1b15714c258f79653abad62f7048249b09438af3",
-                "sha256:1caf951110ca757b8ad9c4974f5cac7b8413004d2f29707e4d03a65d54cedf2b",
-                "sha256:24d5333a9b2343330f0f4eb88546e2c32a7f5c280f8dd7d3cc079beb0901781b",
-                "sha256:26ba70fed190708551c19a360f9d7eca8e8c0f615d19a574292b7229e0ae324c",
-                "sha256:2bd8ef197c87afe0a9009f7a28b5dc613bfc585d329f80b7af404e766aa9e8c7",
-                "sha256:2ea4f210422b912ebe58ef0ad33088bc8e5c5ff9655a8822500690abc3b1232d",
-                "sha256:30cff3ef329682b6182c01c568f551481774c476722020b8f7d0daacbed07a17",
-                "sha256:327adab7671f3726b0ba69be9e865bba23b37a605b585e65895c428f6e47e766",
-                "sha256:32e02a2d83f4954aa8c17e03fe8ec6962432c39aca4be7e8ee346b05a3476904",
-                "sha256:37d66646f929ae7c22c79bc73ec4074d6db45e6384500ee3e0d476daf55482a9",
-                "sha256:3a302241fbe825a3e4fe07666a2ab513edfdc6d43ce24b79691b45115273b5e7",
-                "sha256:3abd670ca7ce230d5a624fd3d55e055215d8d9b723adee0a348352f5d8d12ff4",
-                "sha256:4095a1f2093002c2208becf6f9a178b336b7572512ee0a1179731acb7788e8ad",
-                "sha256:45535fead66e873f411c1d3cf0d3e175e66f4dd83c4f59d707d5b3e4c56541c4",
-                "sha256:45d464622314973d78f364689d5dbb9144e559f93dca11b11af3f2480b5034e1",
-                "sha256:4f7290295794b5dec470867c7baa4a14182b9732603fd0caf2a5bf1dc3ccabf3",
-                "sha256:4ff380aabd7a74a42a760ee76c68826a8f417ceb6ea415bd574a035a111fd133",
-                "sha256:51ffd53c53c4442415b613497a34ba0aa7b99ac07f1e4a62db5dcd640ae6c3c3",
-                "sha256:5294fcb410ed0a45d5d1cdedc4e51a60aab5b2b3193999028ea94afc2f554b05",
-                "sha256:56e3efe356416bc67a8e093607315951d76910f03d2b3ad49c4ade9207bf710d",
-                "sha256:5d3cc75ef3e17490042c47e0523aee1bcc4eacd2482796107fd59dd1100a44bc",
-                "sha256:5e6ee18a53dd5743e6155b8ff7e8e477c25b29b440f87f65be8165275c87fef0",
-                "sha256:67a04754d121ea5ca39ddedc3f77071651fb5b0bc6b973c71c515415b44ed9c5",
-                "sha256:7394c0b7d460569c9285fa089a429f58465db930012566c03046f9e3ab0ed181",
-                "sha256:789c43bf4a10cd067c24c321238e800b8b2716c863ddb2294d2fed886fa5a689",
-                "sha256:7ac67b542505186b3bbdaffbc303292e1ee9c8729e5d5df243c1f20f4bb9057e",
-                "sha256:8561c48b0090993e3b2a54db480cab1d23eb2c5735067213bb90f402806339f5",
-                "sha256:86bfb52a9cfbcc09aba2b71388b0a20ea5c52b6517c0b2e316222435a8cdab72",
-                "sha256:8711682a629bbcaf492f5e0af72d378e976ea1d127a2d47584fa1c2c080b436b",
-                "sha256:89da58e4005e153b03fe8b8794330e3f6a9774ee9e1c3bd5bc52eb098c3b0c4f",
-                "sha256:89f72524033abbfde880ad338fd3c2c16e31ae232323ebdfbc745cbb1b3dcc03",
-                "sha256:8bf1ab71f9f23b0a1d52ec1682a3907e0c208c12fef9c3e99d2b80166b17905f",
-                "sha256:8d7bbbe2cd6ed80aceef2a14e9f1c1b61683194c216472ed5ff33b700e784e37",
-                "sha256:94c4a9b01eede952442c088d415861b0cf2053cbd696b863f6d5022d4e4e2453",
-                "sha256:98dcf978d4c6048965d1762abd534c9d53bae981a035bfe486690ba11f49bbbb",
-                "sha256:a4cc73a6ae0a6751b76e69cece9d0311f054da9b22df6a12f2c53111735657c8",
-                "sha256:a9f8e33747b1332db11cf7fcf4a9512bef9748cb5eb4d3f7fbc8c30d75dc6ffc",
-                "sha256:ace960769d60037ca9625b4c578a6f28a14301bd2a1ff13bb00e824ac9f73e55",
-                "sha256:ae721bcc8e69846af00b7a77a220614d9b2ec57d25017a6bbde3a99473e41ce8",
-                "sha256:aea01f40995fa0945c020228ab919b8dfc93fc8a9f2d3d705ab5b793f32d9e99",
-                "sha256:b499caef4bca9cbd0bd23cd3386f5113ee7378094a3cb613a2fa543260fe9506",
-                "sha256:b89504227a5311610e4be16071465885a0a3d6b0e82e305ef46d9b064ce5fb72",
-                "sha256:bd66b4865c8b853b8cca7379afb692fc7f52cf898786537dfb5e5e2d64f0a47f",
-                "sha256:bfcd3acc1a81f106abac6afd42327d2cf1e77ec905ae11dc1d9142a006a496b6",
-                "sha256:c24ba103ecf45861e2e1f933d40b2d93f5d52d8228870c3e7bf1299cd1cb8ff1",
-                "sha256:c348abc5924caa02a62896300e32ea80a81521f91d6db2e853e6b1994017c9f6",
-                "sha256:c53f97032b87a406044a1c33d1e9290cc38b117a8062e8a8b285175d7e2f99c9",
-                "sha256:c7cd4b1015d2f60dfe539ee6c95bc968d5d5fad92ab01bb5501a77393da4f596",
-                "sha256:c86dc2068f1c5ca2065aca34f257bbf4f78caf566eb230f692ad347da191f0a1",
-                "sha256:c8c5c8e1bac05ef3c23722e591ef4f688f528235e2480f157a9cfe0a19081375",
-                "sha256:ca36151289a15b39d8d683fd8b7abbe26fc50be311066c5f8dcf3cb8cee107ab",
-                "sha256:cc8821a03bcfb36e4e4705316f6b66af28450357af8a575dc8f4b09bf02a3dee",
-                "sha256:cccc18077acd34c8072578394ec79563664b1c205f7a86a62e94fafc7b59001f",
-                "sha256:d2244d8ab24374bed366f9ff206e2619345f9cd7fe79aad5225f53faac28b6b1",
-                "sha256:d4c22992e24f12de340ca5f824121a5b3e1a37ad4360b4e1aaf15e9d1c42582d",
-                "sha256:dd24c4d256558429aeeb8d6c24ebad4e982ac52c50bc3670ae8646c181263965",
-                "sha256:e413352a921f5ad5d66f9e2869b977e88d5103fc528b6deb8423028a2befd842",
-                "sha256:ee06405ea2e67366a661ed313e14cf2a86e84142a3462852eb96348f7219cee3",
-                "sha256:f83eca8cbfd168e424dfa3b3b5c955d6c281e8fc09feb9d870886ff8d03683c7",
-                "sha256:fb915101dfbf318486364ce85662bb7b020840f68138014972c08331458d41f3",
-                "sha256:ffc02b159b65c05f2ed9ec176b715b66918a674bd4daed48a9a7a590dd4be1aa",
-                "sha256:ffc5ae23ada6515f31604f700009e2df90b091b67d463a8401c1d8a37f76c1d7"
+                "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2",
+                "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9",
+                "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5",
+                "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3",
+                "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8",
+                "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e",
+                "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1",
+                "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256",
+                "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85",
+                "sha256:2034693ad3097d5355bfdacfffcbd3ef5694f9718ab7f29c29689a9eae841880",
+                "sha256:21c1fa28a6a7e3cbdc171c694398b6df4744613ce9b36b1a498e816787e28123",
+                "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375",
+                "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065",
+                "sha256:363c6f671b761efcb30608d24925a382497c12c506b51661883c3e22337265ed",
+                "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41",
+                "sha256:3b1ac0d3e594bf121308112697cf4b32be538fb1444468fb0a6ae4feebc83411",
+                "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597",
+                "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f",
+                "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c",
+                "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3",
+                "sha256:47819cea040f31d670cc8d324bb6435c6f133b8c7a19ec3d61634e62f8d8f9eb",
+                "sha256:47b099e1f4fbc95b701b6e85768e1fcdaf1630f3cbe4765fa216596f12310e2e",
+                "sha256:4a9fac8e469d04ce6c25bb2610dc535235bd4aa14996b4e6dbebf5e007eba5ee",
+                "sha256:4b826973a4a2ae47ba357e4e82fa44a463b8f168e1ca775ac64521442b19e87f",
+                "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf",
+                "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf",
+                "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4",
+                "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a",
+                "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665",
+                "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22",
+                "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675",
+                "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4",
+                "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d",
+                "sha256:5f4c04ead5aed67c8a1a20491d54cdfba5884507a48dd798ecaf13c74c4489f5",
+                "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65",
+                "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792",
+                "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57",
+                "sha256:67f2b6de947f8c757db2db9c71527933ad0019737ec374a8a6be9a956786aaf9",
+                "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3",
+                "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151",
+                "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d",
+                "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475",
+                "sha256:7f493881579c90fc262d9cdbaa05a6b54b3811c2f300766748db79f098db9940",
+                "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431",
+                "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee",
+                "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413",
+                "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8",
+                "sha256:a625e06551975f4b7ea7102bc43895b90742746797e2e14b70ed61c43a90f09b",
+                "sha256:abdc0c6c8c648b4805c5eacd131910d2a7f6455dfd3becab248ef108e89ab16a",
+                "sha256:ac017dd64572e5c3bd01939121e4d16cf30e5d7e110a119399cf3133b63ad054",
+                "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb",
+                "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205",
+                "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04",
+                "sha256:b7643a03db5c95c799b89b31c036d5f27eeb4d259c798e878d6937d71832b1e4",
+                "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa",
+                "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9",
+                "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122",
+                "sha256:d08eb4c2b7d6c41da6ca0600c077e93f5adcfd979cd777d747e9ee624556da4b",
+                "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905",
+                "sha256:d591f8de75824cbb7acad4e05d2d710484f15f29d4a915092675ad3456f11770",
+                "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe",
+                "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b",
+                "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562",
+                "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561",
+                "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215",
+                "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931",
+                "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9",
+                "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f",
+                "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7"
             ],
             "markers": "python_version >= '3.9'",
-            "version": "==15.0"
+            "version": "==15.0.1"
         },
         "werkzeug": {
             "hashes": [
@@ -4092,7 +4124,11 @@
         "pytest": {
             "hashes": [
                 "sha256:249b1b0864530ba251b7438274c4d251c58d868edaaec8762893ad4a0d71c36c",
-                "sha256:50fb9cbe836c3f20f0dfa99c565201fb75dc54c8d76373cd1bde06b06657bdb6"
+                "sha256:42ed2f917ded90ceb752dbe2ecb48c436c2a70d38bc16018c2d11da6426a18b6",
+                "sha256:50fb9cbe836c3f20f0dfa99c565201fb75dc54c8d76373cd1bde06b06657bdb6",
+                "sha256:6c30d4c4409c5d227ef936678b72c56b6fbaed28a6ee4eafd2c93ed9a24c65af",
+                "sha256:b5baeee6fb27cbca444fc1bab2ee7e1934f93daa50a4d475a6d0f819c263e573",
+                "sha256:efc82dc5e6f2f41ae5acb9eabdf2ced192f336664c436b24a7db2c6aaafe4efd"
             ],
             "index": "pypi",
             "markers": "python_version >= '3.8'",

From a8acbabe838ab36bc355034fea911d3568fcabe9 Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Wed, 12 Mar 2025 13:57:42 +0800
Subject: [PATCH 04/12] ruff fixed

---
 app/llmops/src/etl_chromadb_pdf/run.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/llmops/src/etl_chromadb_pdf/run.py b/app/llmops/src/etl_chromadb_pdf/run.py
index 2f268fd..edaaa01 100644
--- a/app/llmops/src/etl_chromadb_pdf/run.py
+++ b/app/llmops/src/etl_chromadb_pdf/run.py
@@ -14,7 +14,6 @@ from pdfminer.pdfinterp import PDFPageInterpreter
 from pdfminer.pdfinterp import PDFResourceManager
 from pdfminer.pdfpage import PDFPage
 from langchain.schema import Document
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores.chroma import Chroma
 from langchain_text_splitters import RecursiveCharacterTextSplitter

From 8b68c6024992f0b42153392788226f024fba1116 Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Wed, 12 Mar 2025 15:31:04 +0800
Subject: [PATCH 05/12] rename src folder from adaptive_rag_evaluation to
 rag_adaptive_evaluation

---
 app/llmops/main.py                                        | 6 +++---
 .../MLproject                                             | 2 +-
 .../python_env.yml                                        | 0
 .../run.py                                                | 0
 app/llmops/src/rag_cot_evaluation/run.py                  | 8 ++++++++
 5 files changed, 12 insertions(+), 4 deletions(-)
 rename app/llmops/src/{adaptive_rag_evaluation => rag_adaptive_evaluation}/MLproject (96%)
 rename app/llmops/src/{adaptive_rag_evaluation => rag_adaptive_evaluation}/python_env.yml (100%)
 rename app/llmops/src/{adaptive_rag_evaluation => rag_adaptive_evaluation}/run.py (100%)

diff --git a/app/llmops/main.py b/app/llmops/main.py
index 809b2f1..4c04a63 100644
--- a/app/llmops/main.py
+++ b/app/llmops/main.py
@@ -9,7 +9,7 @@ _steps = [
     "etl_chromadb_pdf",
     "etl_chromadb_scanned_pdf", # the performance for scanned pdf may not be good
     "rag_cot_evaluation",
-    "adaptive_rag_evaluation",
+    "rag_adaptive_evaluation",
     "test_rag_cot"
 ]
 
@@ -132,7 +132,7 @@ def go(config: DictConfig):
                 },
             )
         
-        if "adaptive_rag_evaluation" in active_steps:
+        if "rag_adaptive_evaluation" in active_steps:
 
             if config["prompt_engineering"]["run_id_chromadb"] == "None":
                 # Look for run_id that has artifact logged as documents
@@ -152,7 +152,7 @@ def go(config: DictConfig):
                 run_id = config["prompt_engineering"]["run_id_chromadb"]
 
             _ = mlflow.run(
-                os.path.join(hydra.utils.get_original_cwd(), "src", "adaptive_rag_evaluation"),
+                os.path.join(hydra.utils.get_original_cwd(), "src", "rag_adaptive_evaluation"),
                 "main",
                 parameters={
                     "query": config["prompt_engineering"]["query"],
diff --git a/app/llmops/src/adaptive_rag_evaluation/MLproject b/app/llmops/src/rag_adaptive_evaluation/MLproject
similarity index 96%
rename from app/llmops/src/adaptive_rag_evaluation/MLproject
rename to app/llmops/src/rag_adaptive_evaluation/MLproject
index 521e783..48c1dad 100644
--- a/app/llmops/src/adaptive_rag_evaluation/MLproject
+++ b/app/llmops/src/rag_adaptive_evaluation/MLproject
@@ -1,4 +1,4 @@
-name: adaptive_rag_evaluation
+name: rag_adaptive_evaluation
 python_env: python_env.yml
 
 entry_points:
diff --git a/app/llmops/src/adaptive_rag_evaluation/python_env.yml b/app/llmops/src/rag_adaptive_evaluation/python_env.yml
similarity index 100%
rename from app/llmops/src/adaptive_rag_evaluation/python_env.yml
rename to app/llmops/src/rag_adaptive_evaluation/python_env.yml
diff --git a/app/llmops/src/adaptive_rag_evaluation/run.py b/app/llmops/src/rag_adaptive_evaluation/run.py
similarity index 100%
rename from app/llmops/src/adaptive_rag_evaluation/run.py
rename to app/llmops/src/rag_adaptive_evaluation/run.py
diff --git a/app/llmops/src/rag_cot_evaluation/run.py b/app/llmops/src/rag_cot_evaluation/run.py
index 054d11b..aa773b1 100644
--- a/app/llmops/src/rag_cot_evaluation/run.py
+++ b/app/llmops/src/rag_cot_evaluation/run.py
@@ -18,6 +18,14 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
 GEMINI_API_KEY = config("GOOGLE_API_KEY", cast=str)
 DEEKSEEK_API_KEY = config("DEEKSEEK_API_KEY", cast=str)
 MOONSHOT_API_KEY = config("MOONSHOT_API_KEY", cast=str)
+LANGSMITH_API_KEY = config("LANGSMITH_API_KEY", cast=str)
+LANGSMITH_TRACING = config("LANGSMITH_TRACING", cast=str)
+LANGSMITH_PROJECT = config("LANGSMITH_PROJECT", cast=str)
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY
+os.environ["LANGSMITH_TRACING"] = LANGSMITH_TRACING
+os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
+os.environ["LANGSMITH_PROJECT"] = LANGSMITH_PROJECT
 
 def go(args):
 

From 486a79a2ccca52d20914e5d2dada48018269e4fa Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Wed, 12 Mar 2025 17:59:25 +0800
Subject: [PATCH 06/12] refactored adaptive rag

---
 app/llmops/config.yaml                        |   2 +-
 app/llmops/main.py                            |   1 +
 .../src/rag_adaptive_evaluation/MLproject     |   6 +-
 .../rag_adaptive_evaluation/data_models.py    |  32 +++++
 .../src/rag_adaptive_evaluation/evaluators.py |  99 ++++++++++++++
 .../prompts_library.py                        |  19 +++
 .../rag_adaptive_evaluation/python_env.yml    |   1 +
 app/llmops/src/rag_adaptive_evaluation/run.py | 129 +++++++-----------
 8 files changed, 207 insertions(+), 82 deletions(-)
 create mode 100644 app/llmops/src/rag_adaptive_evaluation/data_models.py
 create mode 100644 app/llmops/src/rag_adaptive_evaluation/evaluators.py
 create mode 100644 app/llmops/src/rag_adaptive_evaluation/prompts_library.py

diff --git a/app/llmops/config.yaml b/app/llmops/config.yaml
index 37aeb2b..5452f8c 100644
--- a/app/llmops/config.yaml
+++ b/app/llmops/config.yaml
@@ -13,4 +13,4 @@ prompt_engineering:
   run_id_chromadb: None
   chat_model_provider: gemini
   query: "如何治疗乳腺癌?"
-  
\ No newline at end of file
+  query_evaluation_dataset_csv_path: "../../../../data/qa_datasets.csv"   
\ No newline at end of file
diff --git a/app/llmops/main.py b/app/llmops/main.py
index 4c04a63..ac768b4 100644
--- a/app/llmops/main.py
+++ b/app/llmops/main.py
@@ -156,6 +156,7 @@ def go(config: DictConfig):
                 "main",
                 parameters={
                     "query": config["prompt_engineering"]["query"],
+                    "query_evaluation_dataset_csv_path": config["prompt_engineering"]["query_evaluation_dataset_csv_path"],
                     "input_chromadb_artifact": f'runs:/{run_id}/chromadb/chroma_db.zip',
                     "embedding_model": config["etl"]["embedding_model"],
                     "chat_model_provider": config["prompt_engineering"]["chat_model_provider"]
diff --git a/app/llmops/src/rag_adaptive_evaluation/MLproject b/app/llmops/src/rag_adaptive_evaluation/MLproject
index 48c1dad..457116d 100644
--- a/app/llmops/src/rag_adaptive_evaluation/MLproject
+++ b/app/llmops/src/rag_adaptive_evaluation/MLproject
@@ -8,6 +8,10 @@ entry_points:
       query:
         description: Query to run
         type: string
+      
+      query_evaluation_dataset_csv_path:
+        description: query evaluation dataset csv path
+        type: string
 
       input_chromadb_artifact:
         description: Fully-qualified name for the input artifact
@@ -20,10 +24,10 @@ entry_points:
       chat_model_provider:
         description: Fully-qualified name for the chat model provider
         type: string
-
     
     command: >-
         python run.py --query {query} \
+                      --query_evaluation_dataset_csv_path {query_evaluation_dataset_csv_path} \
                       --input_chromadb_artifact {input_chromadb_artifact} \
                       --embedding_model {embedding_model} \
                       --chat_model_provider {chat_model_provider}
\ No newline at end of file
diff --git a/app/llmops/src/rag_adaptive_evaluation/data_models.py b/app/llmops/src/rag_adaptive_evaluation/data_models.py
new file mode 100644
index 0000000..680cfbd
--- /dev/null
+++ b/app/llmops/src/rag_adaptive_evaluation/data_models.py
@@ -0,0 +1,32 @@
+from typing import Literal, List
+from pydantic import BaseModel, Field
+
+
+class RouteQuery(BaseModel):
+    """Route a user query to the most relevant datasource."""
+
+    datasource: Literal["vectorstore", "web_search"] = Field(
+        ...,
+        description="Given a user question choose to route it to web search or a vectorstore.",
+    )
+
+class GradeDocuments(BaseModel):
+    """Binary score for relevance check on retrieved documents."""
+
+    binary_score: str = Field(
+        description="Documents are relevant to the question, 'yes' or 'no'"
+    )
+
+class GradeHallucinations(BaseModel):
+    """Binary score for hallucination present in generation answer."""
+
+    binary_score: str = Field(
+        description="Answer is grounded in the facts, 'yes' or 'no'"
+    )
+
+class GradeAnswer(BaseModel):
+    """Binary score to assess answer addresses question."""
+
+    binary_score: str = Field(
+        description="Answer addresses the question, 'yes' or 'no'"
+    )
\ No newline at end of file
diff --git a/app/llmops/src/rag_adaptive_evaluation/evaluators.py b/app/llmops/src/rag_adaptive_evaluation/evaluators.py
new file mode 100644
index 0000000..17b1b90
--- /dev/null
+++ b/app/llmops/src/rag_adaptive_evaluation/evaluators.py
@@ -0,0 +1,99 @@
+from decouple import config
+from openevals.llm import create_llm_as_judge
+from openevals.prompts import (
+    CORRECTNESS_PROMPT, 
+    CONCISENESS_PROMPT, 
+    HALLUCINATION_PROMPT
+)
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_deepseek import ChatDeepSeek
+from langchain_community.llms.moonshot import Moonshot
+
+GEMINI_API_KEY = config("GOOGLE_API_KEY", cast=str)
+DEEKSEEK_API_KEY = config("DEEKSEEK_API_KEY", cast=str)
+MOONSHOT_API_KEY = config("MOONSHOT_API_KEY", cast=str)
+
+# correctness
+gemini_evaluator_correctness = create_llm_as_judge(
+    prompt=CORRECTNESS_PROMPT,
+    judge=ChatGoogleGenerativeAI(
+                model="gemini-1.5-flash", 
+                google_api_key=GEMINI_API_KEY,
+                temperature=0.5,
+            ),
+    )
+
+deepseek_evaluator_correctness = create_llm_as_judge(
+    prompt=CORRECTNESS_PROMPT,
+    judge=ChatDeepSeek(
+                model="deepseek-chat", 
+                temperature=0.5,
+                api_key=DEEKSEEK_API_KEY
+            ),
+    )
+
+moonshot_evaluator_correctness = create_llm_as_judge(
+    prompt=CORRECTNESS_PROMPT,
+    judge=Moonshot(
+                model="moonshot-v1-128k", 
+                temperature=0.5,
+                api_key=MOONSHOT_API_KEY
+            ),
+    )
+
+# conciseness
+gemini_evaluator_conciseness = create_llm_as_judge(
+    prompt=CONCISENESS_PROMPT,
+    judge=ChatGoogleGenerativeAI(
+                model="gemini-1.5-flash", 
+                google_api_key=GEMINI_API_KEY,
+                temperature=0.5,
+            ),
+    )
+
+deepseek_evaluator_conciseness = create_llm_as_judge(
+    prompt=CONCISENESS_PROMPT,
+    judge=ChatDeepSeek(
+                model="deepseek-chat", 
+                temperature=0.5,
+                api_key=DEEKSEEK_API_KEY
+            ),
+    )
+
+moonshot_evaluator_conciseness = create_llm_as_judge(
+    prompt=CONCISENESS_PROMPT,
+    judge=Moonshot(
+                model="moonshot-v1-128k", 
+                temperature=0.5,
+                api_key=MOONSHOT_API_KEY
+            ),
+    )
+
+# hallucination
+gemini_evaluator_hallucination = create_llm_as_judge(
+    prompt=HALLUCINATION_PROMPT,
+    judge=ChatGoogleGenerativeAI(
+                model="gemini-1.5-flash", 
+                google_api_key=GEMINI_API_KEY,
+                temperature=0.5,
+            ),
+    )
+
+deepseek_evaluator_hallucination = create_llm_as_judge(
+    prompt=HALLUCINATION_PROMPT,
+    judge=ChatDeepSeek(
+                model="deepseek-chat", 
+                temperature=0.5,
+                api_key=DEEKSEEK_API_KEY
+            ),
+    )
+
+moonshot_evaluator_hallucination = create_llm_as_judge(
+    prompt=HALLUCINATION_PROMPT,
+    judge=Moonshot(
+                model="moonshot-v1-128k", 
+                temperature=0.5,
+                api_key=MOONSHOT_API_KEY
+            ),
+    )
+
diff --git a/app/llmops/src/rag_adaptive_evaluation/prompts_library.py b/app/llmops/src/rag_adaptive_evaluation/prompts_library.py
new file mode 100644
index 0000000..fcaf564
--- /dev/null
+++ b/app/llmops/src/rag_adaptive_evaluation/prompts_library.py
@@ -0,0 +1,19 @@
+system_router = """You are an expert at routing a user question to a vectorstore or web search.
+The vectorstore contains documents related to any cancer/tumor disease. The question may be
+asked in a variety of languages, and may be phrased in a variety of ways.
+Use the vectorstore for questions on these topics. Otherwise, use web-search. 
+"""
+
+system_retriever_grader = """You are a grader assessing relevance of a retrieved document to a user question. \n 
+    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
+    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
+    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
+
+system_hallucination_grader = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
+    Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
+
+system_answer_grader = """You are a grader assessing whether an answer addresses / resolves a question \n 
+    Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
+
+system_question_rewriter = """You a question re-writer that converts an input question to a better version that is optimized \n 
+    for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
\ No newline at end of file
diff --git a/app/llmops/src/rag_adaptive_evaluation/python_env.yml b/app/llmops/src/rag_adaptive_evaluation/python_env.yml
index 2278969..451cdb7 100644
--- a/app/llmops/src/rag_adaptive_evaluation/python_env.yml
+++ b/app/llmops/src/rag_adaptive_evaluation/python_env.yml
@@ -24,6 +24,7 @@ build_dependencies:
   - tavily-python
   - langchain_huggingface
   - pydantic
+  - openevals
 # Dependencies required to run the project.
 dependencies:
   - mlflow==2.8.1
\ No newline at end of file
diff --git a/app/llmops/src/rag_adaptive_evaluation/run.py b/app/llmops/src/rag_adaptive_evaluation/run.py
index e0496c0..1fe7543 100644
--- a/app/llmops/src/rag_adaptive_evaluation/run.py
+++ b/app/llmops/src/rag_adaptive_evaluation/run.py
@@ -8,19 +8,43 @@ from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_deepseek import ChatDeepSeek
 from langchain_community.llms.moonshot import Moonshot
 from langchain_huggingface import HuggingFaceEmbeddings
-
 from langchain_community.vectorstores.chroma import Chroma
 
-from typing import Literal, List
+from typing import List
 from typing_extensions import TypedDict
 
 from langchain_core.prompts import ChatPromptTemplate
-from pydantic import BaseModel, Field
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain.schema import Document
 from pprint import pprint
 from langgraph.graph import END, StateGraph, START
+from langsmith import Client
 
+from data_models import (
+    RouteQuery, 
+    GradeDocuments, 
+    GradeHallucinations, 
+    GradeAnswer
+)
+from prompts_library import (
+    system_router, 
+    system_retriever_grader,
+    system_hallucination_grader,
+    system_answer_grader,
+    system_question_rewriter
+)
+
+from evaluators import (
+    gemini_evaluator_correctness,
+    deepseek_evaluator_correctness,
+    moonshot_evaluator_correctness,
+    gemini_evaluator_conciseness,
+    deepseek_evaluator_conciseness,
+    moonshot_evaluator_conciseness,
+    gemini_evaluator_hallucination,
+    deepseek_evaluator_hallucination,
+    moonshot_evaluator_hallucination
+)
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s")
 logger = logging.getLogger()
@@ -98,61 +122,32 @@ def go(args):
         vectorstore = Chroma(persist_directory=db_path, collection_name=collection_name, embedding_function=embedding_model)
         retriever = vectorstore.as_retriever()
 
-        # Data model
-        class RouteQuery(BaseModel):
-            """Route a user query to the most relevant datasource."""
-
-            datasource: Literal["vectorstore", "web_search"] = Field(
-                ...,
-                description="Given a user question choose to route it to web search or a vectorstore.",
-            )
-
+        ##########################################
+        # Routing to vectorstore or web search
         structured_llm_router = llm.with_structured_output(RouteQuery)
-
         # Prompt
-        system = """You are an expert at routing a user question to a vectorstore or web search.
-        The vectorstore contains documents related to any cancer/tumor disease. The question may be
-        asked in a variety of languages, and may be phrased in a variety of ways.
-        Use the vectorstore for questions on these topics. Otherwise, use web-search. 
-        """
         route_prompt = ChatPromptTemplate.from_messages(
             [
-                ("system", system),
+                ("system", system_router),
                 ("human", "{question}"),
             ]
         )
-
         question_router = route_prompt | structured_llm_router
 
-
+        ##########################################
         ### Retrieval Grader
-        # Data model
-        class GradeDocuments(BaseModel):
-            """Binary score for relevance check on retrieved documents."""
-
-            binary_score: str = Field(
-                description="Documents are relevant to the question, 'yes' or 'no'"
-            )
-
         structured_llm_grader = llm.with_structured_output(GradeDocuments)
-
         # Prompt
-        system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
-            If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
-            It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
-            Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
         grade_prompt = ChatPromptTemplate.from_messages(
             [
-                ("system", system),
+                ("system", system_retriever_grader),
                 ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
             ]
         )
-
         retrieval_grader = grade_prompt | structured_llm_grader
 
-
+        ##########################################
         ### Generate
-
         from langchain import hub
         from langchain_core.output_parsers import StrOutputParser
 
@@ -167,76 +162,45 @@ def go(args):
         rag_chain = prompt | llm | StrOutputParser()
 
 
-
+        ##########################################
         ### Hallucination Grader
-
-        # Data model
-        class GradeHallucinations(BaseModel):
-            """Binary score for hallucination present in generation answer."""
-
-            binary_score: str = Field(
-                description="Answer is grounded in the facts, 'yes' or 'no'"
-            )
-
-
-        # LLM with function call
         structured_llm_grader = llm.with_structured_output(GradeHallucinations)
 
         # Prompt
-        system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
-            Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
         hallucination_prompt = ChatPromptTemplate.from_messages(
             [
-                ("system", system),
+                ("system", system_hallucination_grader),
                 ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
             ]
         )
 
         hallucination_grader = hallucination_prompt | structured_llm_grader
 
-
+        ##########################################
         ### Answer Grader
-        # Data model
-        class GradeAnswer(BaseModel):
-            """Binary score to assess answer addresses question."""
-
-            binary_score: str = Field(
-                description="Answer addresses the question, 'yes' or 'no'"
-            )
-
-
-        # LLM with function call
         structured_llm_grader = llm.with_structured_output(GradeAnswer)
 
         # Prompt
-        system = """You are a grader assessing whether an answer addresses / resolves a question \n 
-            Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
         answer_prompt = ChatPromptTemplate.from_messages(
             [
-                ("system", system),
+                ("system", system_answer_grader),
                 ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
             ]
         )
-
         answer_grader = answer_prompt | structured_llm_grader
 
+        ##########################################
         ### Question Re-writer
-
-        # LLM
-
         # Prompt
-        system = """You a question re-writer that converts an input question to a better version that is optimized \n 
-            for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
         re_write_prompt = ChatPromptTemplate.from_messages(
             [
-                ("system", system),
+                ("system", system_question_rewriter),
                 (
                     "human",
                     "Here is the initial question: \n\n {question} \n Formulate an improved question.",
                 ),
             ]
         )   
-
         question_rewriter = re_write_prompt | llm | StrOutputParser()
 
 
@@ -372,8 +336,6 @@ def go(args):
 
 
         ### Edges ###
-
-
         def route_question(state):
             """
             Route question to web search or RAG.
@@ -504,8 +466,6 @@ def go(args):
         # Compile
         app = workflow.compile()
 
-
-
         # Run
         inputs = {
             "question": args.query
@@ -521,8 +481,10 @@ def go(args):
         # Final generation
         pprint(value["generation"])
 
+    
+
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Chain of Thought RAG")
+    parser = argparse.ArgumentParser(description="Adaptive AG")
 
     parser.add_argument(
         "--query", 
@@ -531,6 +493,13 @@ if __name__ == "__main__":
         required=True
     )
 
+    parser.add_argument(
+        "--query_evaluation_dataset_csv_path",
+        type=str,
+        help="Path to the query evaluation dataset",
+        default=None,
+    )
+
     parser.add_argument(
         "--input_chromadb_artifact", 
         type=str,

From fcb2f9e4ea376b4a9e31e034d1f6ec442e687ddd Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Thu, 13 Mar 2025 15:04:21 +0800
Subject: [PATCH 07/12] correctness done

---
 .../rag_adaptive_evaluation/data_models.py    |   2 +-
 .../src/rag_adaptive_evaluation/evaluators.py | 144 ++++++++----------
 .../prompts_library.py                        |  37 ++++-
 .../rag_adaptive_evaluation/python_env.yml    |   1 -
 app/llmops/src/rag_adaptive_evaluation/run.py |  92 ++++++++---
 app/llmops/src/rag_cot_evaluation/run.py      |  19 +--
 6 files changed, 174 insertions(+), 121 deletions(-)

diff --git a/app/llmops/src/rag_adaptive_evaluation/data_models.py b/app/llmops/src/rag_adaptive_evaluation/data_models.py
index 680cfbd..aeb193d 100644
--- a/app/llmops/src/rag_adaptive_evaluation/data_models.py
+++ b/app/llmops/src/rag_adaptive_evaluation/data_models.py
@@ -1,4 +1,4 @@
-from typing import Literal, List
+from typing import Literal
 from pydantic import BaseModel, Field
 
 
diff --git a/app/llmops/src/rag_adaptive_evaluation/evaluators.py b/app/llmops/src/rag_adaptive_evaluation/evaluators.py
index 17b1b90..0d16dea 100644
--- a/app/llmops/src/rag_adaptive_evaluation/evaluators.py
+++ b/app/llmops/src/rag_adaptive_evaluation/evaluators.py
@@ -1,99 +1,77 @@
+import os
 from decouple import config
-from openevals.llm import create_llm_as_judge
-from openevals.prompts import (
-    CORRECTNESS_PROMPT, 
-    CONCISENESS_PROMPT, 
-    HALLUCINATION_PROMPT
-)
+
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_deepseek import ChatDeepSeek
 from langchain_community.llms.moonshot import Moonshot
 
-GEMINI_API_KEY = config("GOOGLE_API_KEY", cast=str)
-DEEKSEEK_API_KEY = config("DEEKSEEK_API_KEY", cast=str)
-MOONSHOT_API_KEY = config("MOONSHOT_API_KEY", cast=str)
+from pydantic import BaseModel, Field
 
-# correctness
-gemini_evaluator_correctness = create_llm_as_judge(
-    prompt=CORRECTNESS_PROMPT,
-    judge=ChatGoogleGenerativeAI(
+from prompts_library import CORRECTNESS_PROMPT
+
+os.environ["GOOGLE_API_KEY"] = config("GOOGLE_API_KEY", cast=str)
+os.environ["DEEPSEEK_API_KEY"] = config("DEEPSEEK_API_KEY", cast=str)
+os.environ["MOONSHOT_API_KEY"] = config("MOONSHOT_API_KEY", cast=str)
+
+
+# Define output schema for the evaluation
+class CorrectnessGrade(BaseModel):
+    score: int = Field(description="Numerical score (1-5) indicating the correctness of the response.")
+
+# Todo:
+# class RelevanceGrade(BaseModel):
+
+
+
+def gemini_evaluator_correctness(outputs: dict, reference_outputs: dict) -> CorrectnessGrade:
+    llm = ChatGoogleGenerativeAI(
                 model="gemini-1.5-flash", 
-                google_api_key=GEMINI_API_KEY,
                 temperature=0.5,
-            ),
-    )
+            )
 
-deepseek_evaluator_correctness = create_llm_as_judge(
-    prompt=CORRECTNESS_PROMPT,
-    judge=ChatDeepSeek(
+    messages = [
+        {"role": "system", "content": CORRECTNESS_PROMPT},
+        {"role": "user", "content": f"""Ground Truth answer: {reference_outputs["answer"]};
+        Student's Answer: {outputs['response']}
+        """}
+    ]
+
+    response = llm.invoke(messages)
+
+    return CorrectnessGrade(score=int(response.content)).score
+
+
+def deepseek_evaluator_correctness(outputs: dict, reference_outputs: dict) -> CorrectnessGrade:
+    llm = ChatDeepSeek(
                 model="deepseek-chat", 
                 temperature=0.5,
-                api_key=DEEKSEEK_API_KEY
-            ),
-    )
+            )
 
-moonshot_evaluator_correctness = create_llm_as_judge(
-    prompt=CORRECTNESS_PROMPT,
-    judge=Moonshot(
-                model="moonshot-v1-128k", 
+    messages = [
+        {"role": "system", "content": CORRECTNESS_PROMPT},
+        {"role": "user", "content": f"""Ground Truth answer: {reference_outputs["answer"]};
+        Student's Answer: {outputs['response']}
+        """}
+    ]
+
+    response = llm.invoke(messages)
+
+    return CorrectnessGrade(score=int(response.content)).score
+
+
+def moonshot_evaluator_correctness(outputs: dict, reference_outputs: dict) -> CorrectnessGrade:
+    llm = Moonshot(
+                model="moonshot-v1-128k",
                 temperature=0.5,
-                api_key=MOONSHOT_API_KEY
-            ),
-    )
+            )
 
-# conciseness
-gemini_evaluator_conciseness = create_llm_as_judge(
-    prompt=CONCISENESS_PROMPT,
-    judge=ChatGoogleGenerativeAI(
-                model="gemini-1.5-flash", 
-                google_api_key=GEMINI_API_KEY,
-                temperature=0.5,
-            ),
-    )
+    messages = [
+        {"role": "system", "content": CORRECTNESS_PROMPT},
+        {"role": "user", "content": f"""Ground Truth answer: {reference_outputs["answer"]};
+        Student's Answer: {outputs['response']}
+        """}
+    ]
 
-deepseek_evaluator_conciseness = create_llm_as_judge(
-    prompt=CONCISENESS_PROMPT,
-    judge=ChatDeepSeek(
-                model="deepseek-chat", 
-                temperature=0.5,
-                api_key=DEEKSEEK_API_KEY
-            ),
-    )
-
-moonshot_evaluator_conciseness = create_llm_as_judge(
-    prompt=CONCISENESS_PROMPT,
-    judge=Moonshot(
-                model="moonshot-v1-128k", 
-                temperature=0.5,
-                api_key=MOONSHOT_API_KEY
-            ),
-    )
-
-# hallucination
-gemini_evaluator_hallucination = create_llm_as_judge(
-    prompt=HALLUCINATION_PROMPT,
-    judge=ChatGoogleGenerativeAI(
-                model="gemini-1.5-flash", 
-                google_api_key=GEMINI_API_KEY,
-                temperature=0.5,
-            ),
-    )
-
-deepseek_evaluator_hallucination = create_llm_as_judge(
-    prompt=HALLUCINATION_PROMPT,
-    judge=ChatDeepSeek(
-                model="deepseek-chat", 
-                temperature=0.5,
-                api_key=DEEKSEEK_API_KEY
-            ),
-    )
-
-moonshot_evaluator_hallucination = create_llm_as_judge(
-    prompt=HALLUCINATION_PROMPT,
-    judge=Moonshot(
-                model="moonshot-v1-128k", 
-                temperature=0.5,
-                api_key=MOONSHOT_API_KEY
-            ),
-    )
+    response = llm.invoke(messages)
 
+    return CorrectnessGrade(score=int(response)).score
diff --git a/app/llmops/src/rag_adaptive_evaluation/prompts_library.py b/app/llmops/src/rag_adaptive_evaluation/prompts_library.py
index fcaf564..3bfed18 100644
--- a/app/llmops/src/rag_adaptive_evaluation/prompts_library.py
+++ b/app/llmops/src/rag_adaptive_evaluation/prompts_library.py
@@ -16,4 +16,39 @@ system_answer_grader = """You are a grader assessing whether an answer addresses
     Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
 
 system_question_rewriter = """You a question re-writer that converts an input question to a better version that is optimized \n 
-    for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
\ No newline at end of file
+    for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
+
+
+# Evaluation
+CORRECTNESS_PROMPT = """Evaluate Student Answer against Ground Truth for conceptual similarity and correctness.
+
+You are an impartial judge. Evaluate Student Answer against Ground Truth for conceptual similarity and correctness. 
+You may also be given additional information that was used by the model to generate the output.
+
+Your task is to determine a numerical score called faithfulness based on the input and output.
+A definition of correctness and a grading rubric are provided below.
+You must use the grading rubric to determine your score.
+
+Metric definition:
+Correctness assesses the degree to which a provided output aligns with factual accuracy, completeness, logical 
+consistency, and precise terminology. It evaluates the intrinsic validity of the output, independent of any 
+external context. A higher score indicates a higher adherence to factual accuracy, completeness, logical consistency, 
+and precise terminology.
+
+Grading rubric:
+Correctness: Below are the details for different scores: 
+ - 1: Major factual errors, highly incomplete, illogical, and uses incorrect terminology.
+ - 2: Significant factual errors, incomplete, noticeable logical flaws, and frequent terminology errors.
+ - 3: Minor factual errors, somewhat incomplete, minor logical inconsistencies, and occasional terminology errors.
+ - 4: Few to no factual errors, mostly complete, strong logical consistency, and accurate terminology.
+ - 5: Accurate, complete, logically consistent, and uses precise terminology.
+ 
+ Reminder:
+  - Carefully read the input and output
+  - Check for factual accuracy and completeness
+  - Focus on correctness of information rather than style or verbosity
+  - The goal is to evaluate factual correctness and completeness of the response.
+  - Please provide your answer score only with the numerical number between 1 and 5. No score: or other text is allowed.
+
+"""
+
diff --git a/app/llmops/src/rag_adaptive_evaluation/python_env.yml b/app/llmops/src/rag_adaptive_evaluation/python_env.yml
index 451cdb7..2278969 100644
--- a/app/llmops/src/rag_adaptive_evaluation/python_env.yml
+++ b/app/llmops/src/rag_adaptive_evaluation/python_env.yml
@@ -24,7 +24,6 @@ build_dependencies:
   - tavily-python
   - langchain_huggingface
   - pydantic
-  - openevals
 # Dependencies required to run the project.
 dependencies:
   - mlflow==2.8.1
\ No newline at end of file
diff --git a/app/llmops/src/rag_adaptive_evaluation/run.py b/app/llmops/src/rag_adaptive_evaluation/run.py
index 1fe7543..c629de7 100644
--- a/app/llmops/src/rag_adaptive_evaluation/run.py
+++ b/app/llmops/src/rag_adaptive_evaluation/run.py
@@ -38,30 +38,26 @@ from evaluators import (
     gemini_evaluator_correctness,
     deepseek_evaluator_correctness,
     moonshot_evaluator_correctness,
-    gemini_evaluator_conciseness,
-    deepseek_evaluator_conciseness,
-    moonshot_evaluator_conciseness,
-    gemini_evaluator_hallucination,
-    deepseek_evaluator_hallucination,
-    moonshot_evaluator_hallucination
+    # gemini_evaluator_conciseness,
+    # deepseek_evaluator_conciseness,
+    # moonshot_evaluator_conciseness,
+    # gemini_evaluator_hallucination,
+    # deepseek_evaluator_hallucination,
+    # moonshot_evaluator_hallucination
 )
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s")
 logger = logging.getLogger()
 
-GEMINI_API_KEY = config("GOOGLE_API_KEY", cast=str)
-DEEKSEEK_API_KEY = config("DEEKSEEK_API_KEY", cast=str)
-MOONSHOT_API_KEY = config("MOONSHOT_API_KEY", cast=str)
-TAVILY_API_KEY = config("TAVILY_API_KEY", cast=str)
-LANGSMITH_API_KEY = config("LANGSMITH_API_KEY", cast=str)
-LANGSMITH_TRACING = config("LANGSMITH_TRACING", cast=str)
-LANGSMITH_PROJECT = config("LANGSMITH_PROJECT", cast=str)
-os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY
+os.environ["GOOGLE_API_KEY"] = config("GOOGLE_API_KEY", cast=str)
+os.environ["DEEPSEEK_API_KEY"] = config("DEEPSEEK_API_KEY", cast=str)
+os.environ["MOONSHOT_API_KEY"] = config("MOONSHOT_API_KEY", cast=str)
+os.environ["TAVILY_API_KEY"] = config("TAVILY_API_KEY", cast=str)
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY
-os.environ["LANGSMITH_TRACING"] = LANGSMITH_TRACING
+os.environ["LANGSMITH_API_KEY"] = config("LANGSMITH_API_KEY", cast=str)
+os.environ["LANGSMITH_TRACING"] = config("LANGSMITH_TRACING", cast=str)
 os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
-os.environ["LANGSMITH_PROJECT"] = LANGSMITH_PROJECT
+os.environ["LANGSMITH_PROJECT"] = config("LANGSMITH_PROJECT", cast=str)
 
 def go(args):
 
@@ -95,12 +91,10 @@ def go(args):
                 max_tokens=None,
                 timeout=None,
                 max_retries=2,
-                api_key=DEEKSEEK_API_KEY
             )
         elif args.chat_model_provider == 'gemini':
             llm = ChatGoogleGenerativeAI(
                 model="gemini-1.5-flash", 
-                google_api_key=GEMINI_API_KEY,
                 temperature=0,
                 max_retries=3,
                 streaming=True
@@ -112,7 +106,6 @@ def go(args):
                 max_tokens=None,
                 timeout=None,
                 max_retries=2,
-                api_key=MOONSHOT_API_KEY
             )
 
         # Load data from ChromaDB
@@ -479,7 +472,61 @@ def go(args):
             pprint("\n---\n")
 
         # Final generation
-        pprint(value["generation"])
+        print(value["generation"])
+
+        return {"response": value["generation"]}
+    
+def go_evaluation(args):
+    if args.query_evaluation_dataset_csv_path:
+        # import pandas as pd
+        # from tqdm import tqdm
+
+        # df = pd.read_csv(args.query_evaluation_dataset_csv_path)
+        client = Client()
+        # # Create inputs and reference outputs
+        # examples = [
+        # (
+        #     "Which country is Mount Kilimanjaro located in?",
+        #     "Mount Kilimanjaro is located in Tanzania.",
+        # ),
+        # (
+        #     "What is Earth's lowest point?",
+        #     "Earth's lowest point is The Dead Sea.",
+        # ),
+        # ]
+
+        # inputs = [{"question": input_prompt} for input_prompt, _ in examples]
+        # outputs = [{"answer": output_answer} for _, output_answer in examples]
+
+        # # Programmatically create a dataset in LangSmith
+        # dataset = client.create_dataset(
+        #     dataset_name = "Sample dataset",
+        #     description = "A sample dataset in LangSmith."
+        # )
+
+        # # Add examples to the dataset
+        # client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
+
+        def target(inputs: dict) -> dict:
+            new_args = argparse.Namespace(**vars(args))
+            new_args.query = inputs["question"]
+            return go(new_args)
+
+        
+        # After running the evaluation, a link will be provided to view the results in langsmith
+        experiment_results = client.evaluate(
+            target,
+            data = "Sample dataset",
+            evaluators = [
+                    moonshot_evaluator_correctness,
+                    deepseek_evaluator_correctness,
+                    gemini_evaluator_correctness
+                # can add multiple evaluators here
+            ],
+            experiment_prefix = "first-eval-in-langsmith",
+            max_concurrency = 1,
+            
+        )
 
     
 
@@ -523,4 +570,5 @@ if __name__ == "__main__":
 
     args = parser.parse_args()
     
-    go(args)
\ No newline at end of file
+    # go(args)
+    go_evaluation(args)
\ No newline at end of file
diff --git a/app/llmops/src/rag_cot_evaluation/run.py b/app/llmops/src/rag_cot_evaluation/run.py
index aa773b1..06484df 100644
--- a/app/llmops/src/rag_cot_evaluation/run.py
+++ b/app/llmops/src/rag_cot_evaluation/run.py
@@ -14,18 +14,14 @@ from langchain_community.llms.moonshot import Moonshot
 logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s")
 logger = logging.getLogger()
 
+os.environ["GOOGLE_API_KEY"] = config("GOOGLE_API_KEY", cast=str)
+os.environ["DEEPSEEK_API_KEY"] = config("DEEPSEEK_API_KEY", cast=str)
+os.environ["MOONSHOT_API_KEY"] = config("MOONSHOT_API_KEY", cast=str)
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-GEMINI_API_KEY = config("GOOGLE_API_KEY", cast=str)
-DEEKSEEK_API_KEY = config("DEEKSEEK_API_KEY", cast=str)
-MOONSHOT_API_KEY = config("MOONSHOT_API_KEY", cast=str)
-LANGSMITH_API_KEY = config("LANGSMITH_API_KEY", cast=str)
-LANGSMITH_TRACING = config("LANGSMITH_TRACING", cast=str)
-LANGSMITH_PROJECT = config("LANGSMITH_PROJECT", cast=str)
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY
-os.environ["LANGSMITH_TRACING"] = LANGSMITH_TRACING
+os.environ["LANGSMITH_API_KEY"] = config("LANGSMITH_API_KEY", cast=str)
+os.environ["LANGSMITH_TRACING"] = config("LANGSMITH_TRACING", cast=str)
 os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
-os.environ["LANGSMITH_PROJECT"] = LANGSMITH_PROJECT
+os.environ["LANGSMITH_PROJECT"] = config("LANGSMITH_PROJECT", cast=str)
 
 def go(args):
 
@@ -68,14 +64,12 @@ def go(args):
                 max_tokens=None,
                 timeout=None,
                 max_retries=2,
-                api_key=DEEKSEEK_API_KEY
             )
             
         elif args.chat_model_provider == "gemini":
             # Initialize Gemini model
             llm = ChatGoogleGenerativeAI(
                 model="gemini-1.5-flash", 
-                google_api_key=GEMINI_API_KEY,
                 temperature=0,
                 max_retries=3
                 )
@@ -88,7 +82,6 @@ def go(args):
                 max_tokens=None,
                 timeout=None,
                 max_retries=2,
-                api_key=MOONSHOT_API_KEY
             )
             
 

From b6ca6ac677181512a3e8777691dc40c693b82029 Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Thu, 13 Mar 2025 16:19:39 +0800
Subject: [PATCH 08/12] faithfulness done

---
 .../src/rag_adaptive_evaluation/evaluators.py | 72 +++++++++++++++++--
 .../prompts_library.py                        | 44 +++++++++---
 app/llmops/src/rag_adaptive_evaluation/run.py | 14 ++--
 3 files changed, 109 insertions(+), 21 deletions(-)

diff --git a/app/llmops/src/rag_adaptive_evaluation/evaluators.py b/app/llmops/src/rag_adaptive_evaluation/evaluators.py
index 0d16dea..f7f1184 100644
--- a/app/llmops/src/rag_adaptive_evaluation/evaluators.py
+++ b/app/llmops/src/rag_adaptive_evaluation/evaluators.py
@@ -7,7 +7,7 @@ from langchain_community.llms.moonshot import Moonshot
 
 from pydantic import BaseModel, Field
 
-from prompts_library import CORRECTNESS_PROMPT
+from prompts_library import CORRECTNESS_PROMPT, FAITHFULNESS_PROMPT
 
 os.environ["GOOGLE_API_KEY"] = config("GOOGLE_API_KEY", cast=str)
 os.environ["DEEPSEEK_API_KEY"] = config("DEEPSEEK_API_KEY", cast=str)
@@ -18,11 +18,12 @@ os.environ["MOONSHOT_API_KEY"] = config("MOONSHOT_API_KEY", cast=str)
 class CorrectnessGrade(BaseModel):
     score: int = Field(description="Numerical score (1-5) indicating the correctness of the response.")
 
-# Todo:
-# class RelevanceGrade(BaseModel):
+class FaithfulnessGrade(BaseModel):
+    score: int = Field(description="Numerical score (1-5) indicating the faithfulness of the response.")
 
 
 
+# Evaluators
 def gemini_evaluator_correctness(outputs: dict, reference_outputs: dict) -> CorrectnessGrade:
     llm = ChatGoogleGenerativeAI(
                 model="gemini-1.5-flash", 
@@ -74,4 +75,67 @@ def moonshot_evaluator_correctness(outputs: dict, reference_outputs: dict) -> Co
 
     response = llm.invoke(messages)
 
-    return CorrectnessGrade(score=int(response)).score
+    try:
+        return CorrectnessGrade(score=int(response)).score
+    except ValueError:
+        score_str = response.split(":")[1].strip()
+        return CorrectnessGrade(score=int(score_str)).score
+    
+
+def gemini_evaluator_faithfulness(outputs: dict, reference_outputs: dict) -> FaithfulnessGrade:
+    llm = ChatGoogleGenerativeAI(
+                model="gemini-1.5-pro", 
+                temperature=0.5,
+            )
+
+    messages = [
+        {"role": "system", "content": FAITHFULNESS_PROMPT},
+        {"role": "user", "content": f"""Context: {reference_outputs["answer"]};
+        Output: {outputs['response']}
+        """}
+    ]
+
+    response = llm.invoke(messages)
+
+    return FaithfulnessGrade(score=int(response.content)).score
+
+
+def deepseek_evaluator_faithfulness(outputs: dict, reference_outputs: dict) -> FaithfulnessGrade:
+    llm = ChatDeepSeek(
+                model="deepseek-chat", 
+                temperature=0.5,
+            )
+
+    messages = [
+        {"role": "system", "content": FAITHFULNESS_PROMPT},
+        {"role": "user", "content": f"""Context: {reference_outputs["answer"]};
+        Output: {outputs['response']}
+        """}
+    ]
+
+    response = llm.invoke(messages)
+
+    return FaithfulnessGrade(score=int(response.content)).score
+
+
+def moonshot_evaluator_faithfulness(outputs: dict, reference_outputs: dict) -> FaithfulnessGrade:
+    llm = Moonshot(
+                model="moonshot-v1-128k",
+                temperature=0.5,
+            )
+
+    messages = [
+        {"role": "system", "content": FAITHFULNESS_PROMPT},
+        {"role": "user", "content": f"""Context: {reference_outputs["answer"]};
+        Output: {outputs['response']}
+        """}
+    ]
+
+    response = llm.invoke(messages)
+
+    try:
+        return FaithfulnessGrade(score=int(response)).score
+    except ValueError:
+        score_str = response.split(":")[1].strip()
+        return FaithfulnessGrade(score=int(score_str)).score
+
diff --git a/app/llmops/src/rag_adaptive_evaluation/prompts_library.py b/app/llmops/src/rag_adaptive_evaluation/prompts_library.py
index 3bfed18..33d23a7 100644
--- a/app/llmops/src/rag_adaptive_evaluation/prompts_library.py
+++ b/app/llmops/src/rag_adaptive_evaluation/prompts_library.py
@@ -20,20 +20,18 @@ system_question_rewriter = """You a question re-writer that converts an input qu
 
 
 # Evaluation
-CORRECTNESS_PROMPT = """Evaluate Student Answer against Ground Truth for conceptual similarity and correctness.
-
-You are an impartial judge. Evaluate Student Answer against Ground Truth for conceptual similarity and correctness. 
+CORRECTNESS_PROMPT = """You are an impartial judge. Evaluate Student Answer against Ground Truth for conceptual similarity and correctness. 
 You may also be given additional information that was used by the model to generate the output.
 
-Your task is to determine a numerical score called faithfulness based on the input and output.
+Your task is to determine a numerical score called correctness based on the Student Answer and Ground Truth.
 A definition of correctness and a grading rubric are provided below.
 You must use the grading rubric to determine your score.
 
 Metric definition:
-Correctness assesses the degree to which a provided output aligns with factual accuracy, completeness, logical 
-consistency, and precise terminology. It evaluates the intrinsic validity of the output, independent of any 
+Correctness assesses the degree to which a provided Student Answer aligns with factual accuracy, completeness, logical 
+consistency, and precise terminology of the Ground Truth. It evaluates the intrinsic validity of the Student Answer , independent of any 
 external context. A higher score indicates a higher adherence to factual accuracy, completeness, logical consistency, 
-and precise terminology.
+and precise terminology of the Ground Truth.
 
 Grading rubric:
 Correctness: Below are the details for different scores: 
@@ -44,11 +42,37 @@ Correctness: Below are the details for different scores:
  - 5: Accurate, complete, logically consistent, and uses precise terminology.
  
  Reminder:
-  - Carefully read the input and output
-  - Check for factual accuracy and completeness
+  - Carefully read the Student Answer and Ground Truth
+  - Check for factual accuracy and completeness of Student Answer compared to the Ground Truth
   - Focus on correctness of information rather than style or verbosity
-  - The goal is to evaluate factual correctness and completeness of the response.
+  - The goal is to evaluate factual correctness and completeness of the Student Answer.
   - Please provide your answer score only with the numerical number between 1 and 5. No score: or other text is allowed.
 
 """
 
+FAITHFULNESS_PROMPT = """You are an impartial judge. Evaluate output against context for faithfulness. 
+You may also be given additional information that was used by the model to generate the Output.
+
+Your task is to determine a numerical score called faithfulness based on the output and context.
+A definition of faithfulness and a grading rubric are provided below.
+You must use the grading rubric to determine your score.
+
+Metric definition:
+Faithfulness is only evaluated with the provided output and context. Faithfulness assesses how much of the 
+provided output is factually consistent with the provided context. A higher score indicates that a higher proportion of 
+claims present in the output can be derived from the provided context. Faithfulness does not consider how much extra 
+information from the context is not present in the output.
+
+Grading rubric:
+Faithfulness: Below are the details for different scores:
+- Score 1: None of the claims in the output can be inferred from the provided context.
+- Score 2: Some of the claims in the output can be inferred from the provided context, but the majority of the output is missing from, inconsistent with, or contradictory to the provided context.
+- Score 3: Half or more of the claims in the output can be inferred from the provided context.
+- Score 4: Most of the claims in the output can be inferred from the provided context, with very little information that is not directly supported by the provided context.
+- Score 5: All of the claims in the output are directly supported by the provided context, demonstrating high faithfulness to the provided context.
+
+Reminder:
+- Carefully read the output and context
+- Focus on the information instead of the writing style or verbosity.
+- Please provide your answer score only with the numerical number between 1 and 5, according to the grading rubric above. No score: or other text is allowed.  
+"""
\ No newline at end of file
diff --git a/app/llmops/src/rag_adaptive_evaluation/run.py b/app/llmops/src/rag_adaptive_evaluation/run.py
index c629de7..bf8de6c 100644
--- a/app/llmops/src/rag_adaptive_evaluation/run.py
+++ b/app/llmops/src/rag_adaptive_evaluation/run.py
@@ -38,12 +38,9 @@ from evaluators import (
     gemini_evaluator_correctness,
     deepseek_evaluator_correctness,
     moonshot_evaluator_correctness,
-    # gemini_evaluator_conciseness,
-    # deepseek_evaluator_conciseness,
-    # moonshot_evaluator_conciseness,
-    # gemini_evaluator_hallucination,
-    # deepseek_evaluator_hallucination,
-    # moonshot_evaluator_hallucination
+    gemini_evaluator_faithfulness,
+    deepseek_evaluator_faithfulness,
+    moonshot_evaluator_faithfulness
 )
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s")
@@ -520,7 +517,10 @@ def go_evaluation(args):
             evaluators = [
                     moonshot_evaluator_correctness,
                     deepseek_evaluator_correctness,
-                    gemini_evaluator_correctness
+                    gemini_evaluator_correctness,
+                    gemini_evaluator_faithfulness,
+                    deepseek_evaluator_faithfulness,
+                    moonshot_evaluator_faithfulness
                 # can add multiple evaluators here
             ],
             experiment_prefix = "first-eval-in-langsmith",

From 86a2c1a055d048eb2697f2575c7255e3a702049a Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Thu, 13 Mar 2025 21:23:36 +0800
Subject: [PATCH 09/12] update for now

---
 app/llmops/config.yaml                        | 11 ++-
 app/llmops/main.py                            | 25 ++---
 .../src/rag_adaptive_evaluation/MLproject     | 22 ++++-
 app/llmops/src/rag_adaptive_evaluation/run.py | 94 +++++++++++--------
 4 files changed, 99 insertions(+), 53 deletions(-)

diff --git a/app/llmops/config.yaml b/app/llmops/config.yaml
index 5452f8c..33383ea 100644
--- a/app/llmops/config.yaml
+++ b/app/llmops/config.yaml
@@ -9,8 +9,15 @@ etl:
   path_document_folder: "../../../../data"
   run_id_documents: None
   embedding_model: paraphrase-multilingual-mpnet-base-v2
-prompt_engineering:
+rag:
   run_id_chromadb: None
   chat_model_provider: gemini
+testing:
   query: "如何治疗乳腺癌?"
-  query_evaluation_dataset_csv_path: "../../../../data/qa_datasets.csv"   
\ No newline at end of file
+evaluation:
+  evaluation_dataset_csv_path: "../../../../data/qa_datasets.csv"
+  evaluation_dataset_column_question: question
+  evaluation_dataset_column_answer: answer
+  ls_chat_model_provider:
+  - gemini
+  - moonshot
\ No newline at end of file
diff --git a/app/llmops/main.py b/app/llmops/main.py
index ac768b4..32d6a8b 100644
--- a/app/llmops/main.py
+++ b/app/llmops/main.py
@@ -104,7 +104,7 @@ def go(config: DictConfig):
             )
         if "rag_cot_evaluation" in active_steps:
 
-            if config["prompt_engineering"]["run_id_chromadb"] == "None":
+            if config["rag"]["run_id_chromadb"] == "None":
                 # Look for run_id that has artifact logged as documents
                 run_id = None
                 client = mlflow.tracking.MlflowClient()
@@ -119,22 +119,22 @@ def go(config: DictConfig):
                 if run_id is None:
                     raise ValueError("No run_id found with artifact logged as documents")
             else:
-                run_id = config["prompt_engineering"]["run_id_chromadb"]
+                run_id = config["rag"]["run_id_chromadb"]
 
             _ = mlflow.run(
                 os.path.join(hydra.utils.get_original_cwd(), "src", "rag_cot_evaluation"),
                 "main",
                 parameters={
-                    "query": config["prompt_engineering"]["query"],
+                    "query": config["testing"]["query"],
                     "input_chromadb_artifact": f'runs:/{run_id}/chromadb/chroma_db.zip',
                     "embedding_model": config["etl"]["embedding_model"],
-                    "chat_model_provider": config["prompt_engineering"]["chat_model_provider"]
+                    "chat_model_provider": config["rag"]["chat_model_provider"]
                 },
             )
         
         if "rag_adaptive_evaluation" in active_steps:
 
-            if config["prompt_engineering"]["run_id_chromadb"] == "None":
+            if config["rag"]["run_id_chromadb"] == "None":
                 # Look for run_id that has artifact logged as documents
                 run_id = None
                 client = mlflow.tracking.MlflowClient()
@@ -149,17 +149,20 @@ def go(config: DictConfig):
                 if run_id is None:
                     raise ValueError("No run_id found with artifact logged as documents")
             else:
-                run_id = config["prompt_engineering"]["run_id_chromadb"]
+                run_id = config["rag"]["run_id_chromadb"]
 
             _ = mlflow.run(
                 os.path.join(hydra.utils.get_original_cwd(), "src", "rag_adaptive_evaluation"),
                 "main",
                 parameters={
-                    "query": config["prompt_engineering"]["query"],
-                    "query_evaluation_dataset_csv_path": config["prompt_engineering"]["query_evaluation_dataset_csv_path"],
+                    "query": config["testing"]["query"],
+                    "evaluation_dataset_csv_path": config["evaluation"]["evaluation_dataset_csv_path"],
+                    "evaluation_dataset_column_question": config["evaluation"]["evaluation_dataset_column_question"],
+                    "evaluation_dataset_column_answer": config["evaluation"]["evaluation_dataset_column_answer"],
                     "input_chromadb_artifact": f'runs:/{run_id}/chromadb/chroma_db.zip',
                     "embedding_model": config["etl"]["embedding_model"],
-                    "chat_model_provider": config["prompt_engineering"]["chat_model_provider"]
+                    "chat_model_provider": config["rag"]["chat_model_provider"],
+                    "ls_chat_model_evaluator": ','.join(config["evaluation"]["ls_chat_model_provider"]) if config["evaluation"]["ls_chat_model_provider"] is not None else 'None',
                 },
             )
 
@@ -169,10 +172,10 @@ def go(config: DictConfig):
                 os.path.join(hydra.utils.get_original_cwd(), "components", "test_rag_cot"),
                 "main",
                 parameters={
-                    "query": config["prompt_engineering"]["query"],
+                    "query": config["testing"]["query"],
                     "input_chromadb_local": os.path.join(hydra.utils.get_original_cwd(), "src", "rag_cot_evaluation", "chroma_db"),
                     "embedding_model": config["etl"]["embedding_model"],
-                    "chat_model_provider": config["prompt_engineering"]["chat_model_provider"]
+                    "chat_model_provider": config["rag"]["chat_model_provider"]
                 },
             )
 
diff --git a/app/llmops/src/rag_adaptive_evaluation/MLproject b/app/llmops/src/rag_adaptive_evaluation/MLproject
index 457116d..77061d4 100644
--- a/app/llmops/src/rag_adaptive_evaluation/MLproject
+++ b/app/llmops/src/rag_adaptive_evaluation/MLproject
@@ -9,10 +9,18 @@ entry_points:
         description: Query to run
         type: string
       
-      query_evaluation_dataset_csv_path:
+      evaluation_dataset_csv_path:
         description: query evaluation dataset csv path
         type: string
 
+      evaluation_dataset_column_question:
+        description: query evaluation dataset column question
+        type: string
+
+      evaluation_dataset_column_answer:
+        description: query evaluation dataset column groundtruth
+        type: string
+
       input_chromadb_artifact:
         description: Fully-qualified name for the input artifact
         type: string
@@ -24,10 +32,18 @@ entry_points:
       chat_model_provider:
         description: Fully-qualified name for the chat model provider
         type: string
+
+      ls_chat_model_evaluator:
+        description: list of chat model providers for evaluation
+        type: string
+      
     
     command: >-
         python run.py --query {query} \
-                      --query_evaluation_dataset_csv_path {query_evaluation_dataset_csv_path} \
+                      --evaluation_dataset_csv_path {evaluation_dataset_csv_path} \
+                      --evaluation_dataset_column_question {evaluation_dataset_column_question} \
+                      --evaluation_dataset_column_answer {evaluation_dataset_column_answer} \
                       --input_chromadb_artifact {input_chromadb_artifact} \
                       --embedding_model {embedding_model} \
-                      --chat_model_provider {chat_model_provider}
\ No newline at end of file
+                      --chat_model_provider {chat_model_provider} \
+                      --ls_chat_model_evaluator {ls_chat_model_evaluator}
\ No newline at end of file
diff --git a/app/llmops/src/rag_adaptive_evaluation/run.py b/app/llmops/src/rag_adaptive_evaluation/run.py
index bf8de6c..4acc4c7 100644
--- a/app/llmops/src/rag_adaptive_evaluation/run.py
+++ b/app/llmops/src/rag_adaptive_evaluation/run.py
@@ -474,61 +474,60 @@ def go(args):
         return {"response": value["generation"]}
     
 def go_evaluation(args):
-    if args.query_evaluation_dataset_csv_path:
-        # import pandas as pd
-        # from tqdm import tqdm
+    if args.evaluation_dataset_csv_path:
 
-        # df = pd.read_csv(args.query_evaluation_dataset_csv_path)
+        import pandas as pd
+
+        df = pd.read_csv(args.evaluation_dataset_csv_path)
+        dataset_name = os.path.basename(args.evaluation_dataset_csv_path).split('.')[0]
+
+        # df contains columns of question and answer
+        examples = df[[args.evaluation_dataset_column_question, args.evaluation_dataset_column_answer]].values.tolist()
+        inputs = [{"question": input_prompt} for input_prompt, _ in examples]
+        outputs = [{"answer": output_answer} for _, output_answer in examples]
+
+        # Programmatically create a dataset in LangSmith
         client = Client()
-        # # Create inputs and reference outputs
-        # examples = [
-        # (
-        #     "Which country is Mount Kilimanjaro located in?",
-        #     "Mount Kilimanjaro is located in Tanzania.",
-        # ),
-        # (
-        #     "What is Earth's lowest point?",
-        #     "Earth's lowest point is The Dead Sea.",
-        # ),
-        # ]
 
-        # inputs = [{"question": input_prompt} for input_prompt, _ in examples]
-        # outputs = [{"answer": output_answer} for _, output_answer in examples]
+        dataset = client.create_dataset(
+            dataset_name = dataset_name,
+            description = "A sample dataset in LangSmith."
+        )
 
-        # # Programmatically create a dataset in LangSmith
-        # dataset = client.create_dataset(
-        #     dataset_name = "Sample dataset",
-        #     description = "A sample dataset in LangSmith."
-        # )
+        # Add examples to the dataset
+        client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
 
-        # # Add examples to the dataset
-        # client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
+        
+        args.ls_chat_model_evaluator = None if args.ls_chat_model_evaluator == 'None' else args.ls_chat_model_evaluator.split(',')
 
         def target(inputs: dict) -> dict:
             new_args = argparse.Namespace(**vars(args))
             new_args.query = inputs["question"]
             return go(new_args)
 
+        ls_evaluators = []
+        if args.ls_chat_model_evaluator:
+            for evaluator in args.ls_chat_model_evaluator:
+                if evaluator == 'moonshot':
+                    ls_evaluators.append(moonshot_evaluator_correctness)
+                    ls_evaluators.append(moonshot_evaluator_faithfulness)
+                elif evaluator == 'deepseek':
+                    ls_evaluators.append(deepseek_evaluator_correctness)
+                    ls_evaluators.append(deepseek_evaluator_faithfulness)
+                elif evaluator == 'gemini':
+                    ls_evaluators.append(gemini_evaluator_correctness)
+                    ls_evaluators.append(gemini_evaluator_faithfulness)
         
         # After running the evaluation, a link will be provided to view the results in langsmith
         experiment_results = client.evaluate(
             target,
             data = "Sample dataset",
-            evaluators = [
-                    moonshot_evaluator_correctness,
-                    deepseek_evaluator_correctness,
-                    gemini_evaluator_correctness,
-                    gemini_evaluator_faithfulness,
-                    deepseek_evaluator_faithfulness,
-                    moonshot_evaluator_faithfulness
-                # can add multiple evaluators here
-            ],
+            evaluators = ls_evaluators,
             experiment_prefix = "first-eval-in-langsmith",
             max_concurrency = 1,
             
         )
 
-    
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Adaptive AG")
@@ -541,12 +540,26 @@ if __name__ == "__main__":
     )
 
     parser.add_argument(
-        "--query_evaluation_dataset_csv_path",
+        "--evaluation_dataset_csv_path",
         type=str,
         help="Path to the query evaluation dataset",
         default=None,
     )
 
+    parser.add_argument(
+        "--evaluation_dataset_column_question",
+        type=str,
+        help="Column name for the questions in the evaluation dataset",
+        default="question",
+    )
+
+    parser.add_argument(
+        "--evaluation_dataset_column_answer",
+        type=str,
+        help="Column name for the groundtruth answers in the evaluation dataset",
+        default="groundtruth",
+    )
+
     parser.add_argument(
         "--input_chromadb_artifact", 
         type=str,
@@ -568,7 +581,14 @@ if __name__ == "__main__":
         help="Chat model provider"
     )
 
+    parser.add_argument(
+        "--ls_chat_model_evaluator",
+        type=str,
+        help="list of Chat model providers for evaluation",
+        required=False,
+        default="None"
+    )
+
     args = parser.parse_args()
-    
-    # go(args)
+
     go_evaluation(args)
\ No newline at end of file

From afbb34079abb8548f7a5b0ac4f651f4436a9d6d8 Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Thu, 13 Mar 2025 22:19:30 +0800
Subject: [PATCH 10/12] Ready for dataset evaluation

---
 app/llmops/config.yaml                        |  5 +++--
 app/llmops/src/rag_adaptive_evaluation/run.py | 22 ++++++++++++-------
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/app/llmops/config.yaml b/app/llmops/config.yaml
index 33383ea..4ea1d94 100644
--- a/app/llmops/config.yaml
+++ b/app/llmops/config.yaml
@@ -11,13 +11,14 @@ etl:
   embedding_model: paraphrase-multilingual-mpnet-base-v2
 rag:
   run_id_chromadb: None
-  chat_model_provider: gemini
+  chat_model_provider: deepseek
 testing:
   query: "如何治疗乳腺癌?"
 evaluation:
-  evaluation_dataset_csv_path: "../../../../data/qa_datasets.csv"
+  evaluation_dataset_csv_path: "../../../../data/qa_dataset_01.csv"
   evaluation_dataset_column_question: question
   evaluation_dataset_column_answer: answer
   ls_chat_model_provider:
   - gemini
+  - deepseek
   - moonshot
\ No newline at end of file
diff --git a/app/llmops/src/rag_adaptive_evaluation/run.py b/app/llmops/src/rag_adaptive_evaluation/run.py
index 4acc4c7..275b4d3 100644
--- a/app/llmops/src/rag_adaptive_evaluation/run.py
+++ b/app/llmops/src/rag_adaptive_evaluation/run.py
@@ -3,6 +3,8 @@ import logging
 import argparse
 import mlflow
 import shutil
+import langsmith
+
 from decouple import config
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_deepseek import ChatDeepSeek
@@ -10,6 +12,7 @@ from langchain_community.llms.moonshot import Moonshot
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores.chroma import Chroma
 
+
 from typing import List
 from typing_extensions import TypedDict
 
@@ -489,13 +492,16 @@ def go_evaluation(args):
         # Programmatically create a dataset in LangSmith
         client = Client()
 
-        dataset = client.create_dataset(
-            dataset_name = dataset_name,
-            description = "A sample dataset in LangSmith."
-        )
-
-        # Add examples to the dataset
-        client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
+        try:
+            # Create a dataset
+            dataset = client.create_dataset(
+                dataset_name = dataset_name,
+                description = "An evaluation dataset in LangSmith."
+            )
+            # Add examples to the dataset
+            client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
+        except langsmith.utils.LangSmithConflictError:
+            pass
 
         
         args.ls_chat_model_evaluator = None if args.ls_chat_model_evaluator == 'None' else args.ls_chat_model_evaluator.split(',')
@@ -521,7 +527,7 @@ def go_evaluation(args):
         # After running the evaluation, a link will be provided to view the results in langsmith
         experiment_results = client.evaluate(
             target,
-            data = "Sample dataset",
+            data = dataset_name,
             evaluators = ls_evaluators,
             experiment_prefix = "first-eval-in-langsmith",
             max_concurrency = 1,

From 6471626497b83943515db5c4402a5714511e10ab Mon Sep 17 00:00:00 2001
From: leehk <leehongkai@gmail.com>
Date: Mon, 24 Mar 2025 13:35:40 +0800
Subject: [PATCH 11/12] revision for 3 questions testing

---
 .gitignore                                       | 3 ++-
 app/llmops/config.yaml                           | 8 ++++----
 app/llmops/src/etl_chromadb_pdf/run.py           | 6 +++---
 app/streamlit/initialize_sentence_transformer.py | 4 +++-
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/.gitignore b/.gitignore
index ffc3c5e..8d92113 100644
--- a/.gitignore
+++ b/.gitignore
@@ -208,4 +208,5 @@ data/*
 **/*.zip
 **/llm-examples/*
 **/*.ipynb_checkpoints
-**/*.ipynb
\ No newline at end of file
+**/*.ipynb
+**/transformer_model/*
\ No newline at end of file
diff --git a/app/llmops/config.yaml b/app/llmops/config.yaml
index 4ea1d94..834fce8 100644
--- a/app/llmops/config.yaml
+++ b/app/llmops/config.yaml
@@ -15,10 +15,10 @@ rag:
 testing:
   query: "如何治疗乳腺癌?"
 evaluation:
-  evaluation_dataset_csv_path: "../../../../data/qa_dataset_01.csv"
+  evaluation_dataset_csv_path: "../../../../data/qa_dataset_20240321a.csv"
   evaluation_dataset_column_question: question
   evaluation_dataset_column_answer: answer
   ls_chat_model_provider:
-  - gemini
-  - deepseek
-  - moonshot
\ No newline at end of file
+    - gemini
+    - deepseek
+    - moonshot
diff --git a/app/llmops/src/etl_chromadb_pdf/run.py b/app/llmops/src/etl_chromadb_pdf/run.py
index edaaa01..9b2a82b 100644
--- a/app/llmops/src/etl_chromadb_pdf/run.py
+++ b/app/llmops/src/etl_chromadb_pdf/run.py
@@ -105,7 +105,7 @@ def go(args):
         documents_folder = os.path.splitext(os.path.basename(artifact_local_path))[0]
 
         text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
-            chunk_size=1000, chunk_overlap=500
+            chunk_size=15000, chunk_overlap=7500
         )
 
         ls_docs = []
@@ -113,7 +113,7 @@ def go(args):
             for file in files:
                 if file.endswith(".pdf"):
                     read_text = extract_chinese_text_from_pdf(os.path.join(root, file))
-                    document = Document(metadata={"file": file}, page_content=read_text)
+                    document = Document(metadata={"file": f"{documents_folder}/{file}"}, page_content=read_text)
                     ls_docs.append(document)
                                         
         doc_splits = text_splitter.split_documents(ls_docs)
@@ -138,7 +138,7 @@ def go(args):
 
 if __name__ == "__main__":
 
-    parser = argparse.ArgumentParser(description="A very basic data cleaning")
+    parser = argparse.ArgumentParser(description="ETL for ChromaDB with readable PDF")
 
     parser.add_argument(
         "--input_artifact", 
diff --git a/app/streamlit/initialize_sentence_transformer.py b/app/streamlit/initialize_sentence_transformer.py
index 3026701..937c70d 100644
--- a/app/streamlit/initialize_sentence_transformer.py
+++ b/app/streamlit/initialize_sentence_transformer.py
@@ -4,4 +4,6 @@ from sentence_transformers import SentenceTransformer
 EMBEDDING_MODEL = config("EMBEDDING_MODEL", cast=str, default="paraphrase-multilingual-mpnet-base-v2")
 
 # Initialize embedding model
-model = SentenceTransformer(EMBEDDING_MODEL) 
\ No newline at end of file
+model = SentenceTransformer(EMBEDDING_MODEL) 
+
+model.save("./transformer_model/paraphrase-multilingual-mpnet-base-v2")

From 366f6850a9b59f10ece0b6f63e31e4a23102819a Mon Sep 17 00:00:00 2001
From: Hong Kai LEE <hkailee@users.noreply.github.com>
Date: Tue, 1 Apr 2025 10:55:26 +0800
Subject: [PATCH 12/12] Update run.py

---
 app/llmops/src/rag_adaptive_evaluation/run.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/llmops/src/rag_adaptive_evaluation/run.py b/app/llmops/src/rag_adaptive_evaluation/run.py
index 275b4d3..033fd0d 100644
--- a/app/llmops/src/rag_adaptive_evaluation/run.py
+++ b/app/llmops/src/rag_adaptive_evaluation/run.py
@@ -525,7 +525,7 @@ def go_evaluation(args):
                     ls_evaluators.append(gemini_evaluator_faithfulness)
         
         # After running the evaluation, a link will be provided to view the results in langsmith
-        experiment_results = client.evaluate(
+        _ = client.evaluate(
             target,
             data = dataset_name,
             evaluators = ls_evaluators,
@@ -597,4 +597,4 @@ if __name__ == "__main__":
 
     args = parser.parse_args()
 
-    go_evaluation(args)
\ No newline at end of file
+    go_evaluation(args)