working front end

2026-01-19 13:23:23 +08:00 · 2025-03-05 19:28:28 +08:00 · 2025-03-05 19:28:28 +08:00 · 49082a238c
commit 49082a238c
parent 320bae36c7
5 changed files with 2247 additions and 225 deletions
--- a/app/llmops/components/test_rag_cot/run.py
+++ b/app/llmops/components/test_rag_cot/run.py
@ -9,6 +9,7 @@ from sentence_transformers import SentenceTransformer
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_deepseek import ChatDeepSeek
 from langchain_community.llms.moonshot import Moonshot
+import sys

 logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s")
 logger = logging.getLogger()
@ -18,10 +19,15 @@ GEMINI_API_KEY = config("GOOGLE_API_KEY", cast=str)
 DEEKSEEK_API_KEY = config("DEEKSEEK_API_KEY", cast=str)
 MOONSHOT_API_KEY = config("MOONSHOT_API_KEY", cast=str)

+def stream_output(text):
+    for char in text:
+        print(char, end="")
+        sys.stdout.flush()
+
 def go(args):

    # start a new MLflow run
-    with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id, run_name="etl_chromdb_pdf"):
+    with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id, run_name="etl_chromadb_pdf"):
        existing_params = mlflow.get_run(mlflow.active_run().info.run_id).data.params
        if 'query' not in existing_params:
            mlflow.log_param('query', args.query)
@ -95,7 +101,9 @@ def go(args):

        # Generate chain of thought
        cot_output = cot_chain.invoke({"documents_text": documents_text, "question": question})
-        print("Chain of Thought: ", cot_output)
+        print("Chain of Thought: ", end="")
+        stream_output(cot_output.content)
+        print()

        # Answer Prompt
        answer_template = """Given the chain of thought: {cot}
@ -107,7 +115,9 @@ def go(args):

        # Generate answer
        answer_output = answer_chain.invoke({"cot": cot_output, "question": question})
-        print("Answer: ", answer_output)
+        print("Answer: ", end="")
+        stream_output(answer_output.content)
+        print()


 if __name__ == "__main__":
--- a/app/llmops/config.yaml
+++ b/app/llmops/config.yaml
@ -11,6 +11,6 @@ etl:
  embedding_model: paraphrase-multilingual-mpnet-base-v2
 prompt_engineering:
  run_id_chromadb: None
-  chat_model_provider: moonshot
+  chat_model_provider: gemini
  query: "怎么治疗有kras的肺癌?"
  
--- a/app/streamlit/Chatbot.py
+++ b/app/streamlit/Chatbot.py
@ -1,29 +1,109 @@
-from openai import OpenAI
+import os
+import subprocess
 import streamlit as st
+import chromadb
+from decouple import config
+from langchain.prompts import PromptTemplate
+from sentence_transformers import SentenceTransformer
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_deepseek import ChatDeepSeek
+from langchain_community.llms.moonshot import Moonshot

-with st.sidebar:
-    openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password")
-    "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
-    "[View the source code](https://github.com/streamlit/llm-examples/blob/main/Chatbot.py)"
-    "[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/streamlit/llm-examples?quickstart=1)"
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+GEMINI_API_KEY = config("GOOGLE_API_KEY", cast=str)
+DEEKSEEK_API_KEY = config("DEEKSEEK_API_KEY", cast=str)
+MOONSHOT_API_KEY = config("MOONSHOT_API_KEY", cast=str)
+CHAT_MODEL_PROVIDER = config("CHAT_MODEL_PROVIDER", cast=str)
+INPUT_CHROMADB_LOCAL = config("INPUT_CHROMADB_LOCAL", cast=str)
+EMBEDDING_MODEL = config("EMBEDDING_MODEL", cast=str)
+COLLECTION_NAME = config("COLLECTION_NAME", cast=str)

-st.title("💬 Chatbot")
-st.caption("🚀 A Streamlit chatbot powered by OpenAI")
+st.title("💬 RAG AI for Medical Guideline")
+st.caption(f"🚀 A RAG AI for Medical Guideline powered by {CHAT_MODEL_PROVIDER}")
 if "messages" not in st.session_state:
    st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
-
 for msg in st.session_state.messages:
    st.chat_message(msg["role"]).write(msg["content"])

-if prompt := st.chat_input():
-    if not openai_api_key:
-        st.info("Please add your OpenAI API key to continue.")
-        st.stop()
+# Load data from ChromaDB
+chroma_client = chromadb.PersistentClient(path=INPUT_CHROMADB_LOCAL)
+collection = chroma_client.get_collection(name=COLLECTION_NAME)

-    client = OpenAI(api_key=openai_api_key)
+# Initialize embedding model
+model = SentenceTransformer(EMBEDDING_MODEL) 
+
+if CHAT_MODEL_PROVIDER == "deepseek":
+    # Initialize DeepSeek model
+    llm = ChatDeepSeek(
+        model="deepseek-chat", 
+        temperature=0,
+        max_tokens=None,
+        timeout=None,
+        max_retries=2,
+        api_key=DEEKSEEK_API_KEY
+    )
+    
+elif CHAT_MODEL_PROVIDER == "gemini":
+    # Initialize Gemini model
+    llm = ChatGoogleGenerativeAI(
+        model="gemini-1.5-flash", 
+        google_api_key=GEMINI_API_KEY,
+        temperature=0,
+        max_retries=3
+        )
+    
+elif CHAT_MODEL_PROVIDER == "moonshot":
+    # Initialize Moonshot model
+    llm = Moonshot(
+        model="moonshot-v1-128k", 
+        temperature=0,
+        max_tokens=None,
+        timeout=None,
+        max_retries=2,
+        api_key=MOONSHOT_API_KEY
+    )
+
+# Chain of Thought Prompt
+cot_template = """Let's think step by step. 
+Given the following document in text: {documents_text}
+Question: {question}
+Reply with language that is similar to the language used with asked question.
+"""
+cot_prompt = PromptTemplate(template=cot_template, input_variables=["documents_text", "question"])
+cot_chain = cot_prompt | llm
+
+# Answer Prompt
+answer_template = """Given the chain of thought: {cot}
+Provide a concise answer to the question: {question}
+Provide the answer with language that is similar to the question asked.
+"""
+answer_prompt = PromptTemplate(template=answer_template, input_variables=["cot", "question"])
+answer_chain = answer_prompt | llm
+
+if prompt := st.chat_input():
+    
    st.session_state.messages.append({"role": "user", "content": prompt})
    st.chat_message("user").write(prompt)
-    response = client.chat.completions.create(model="gpt-3.5-turbo", messages=st.session_state.messages)
-    msg = response.choices[0].message.content
+
+    # Query (prompt)
+    query_embedding = model.encode(prompt)  # Embed the query using the SAME model
+
+    # Search ChromaDB
+    documents_text = collection.query(query_embeddings=[query_embedding], n_results=5)
+
+    # Generate chain of thought
+    cot_output = cot_chain.invoke({"documents_text": documents_text, "question": prompt})
+
+    # response = client.chat.completions.create(model="gpt-3.5-turbo", messages=st.session_state.messages)
+    msg = cot_output.content
    st.session_state.messages.append({"role": "assistant", "content": msg})
    st.chat_message("assistant").write(msg)
+
+    # Generate answer
+    answer_output = answer_chain.invoke({"cot": cot_output, "question": prompt})
+    msg = answer_output.content
+    st.session_state.messages.append({"role": "assistant", "content": msg})
+    st.chat_message("assistant").write(msg)
+
+
+
--- a/app/streamlit/Pipfile
+++ b/app/streamlit/Pipfile
@ -5,13 +5,19 @@ name = "pypi"

 [packages]
 streamlit = "==1.28"
-langchain = "==0.0.217"
-openai = "==1.2"
+langchain = "*"
 duckduckgo-search = "*"
-anthropic = "==0.3.0"
-trubrics = "==1.4.3"
+anthropic = "*"
+trubrics = "*"
 streamlit-feedback = "*"
 langchain-community = "*"
+watchdog = "*"
+mlflow = "==2.8.1"
+python-decouple = "*"
+langchain_google_genai = "*"
+langchain-deepseek = "*"
+sentence_transformers = "*"
+chromadb = "*"

 [dev-packages]

--- a/app/streamlit/Pipfile.lock
+++ b/app/streamlit/Pipfile.lock