Merge pull request #14 from aimingmed/develop

badge
2026-01-19 21:37:31 +08:00 · 2025-03-06 19:28:32 +08:00 · 2025-03-06 19:28:32 +08:00 · 5118f7dabc
commit 5118f7dabc
parent ed58ba502e 29d82e7cef
18 changed files with 198 additions and 85 deletions
--- a/.github/workflows/app-testing.yml
+++ b/.github/workflows/app-testing.yml
@ -0,0 +1,36 @@
+name: App testing
+
+on:
+  push:
+    branches: [ "develop" ]
+  pull_request:
+    branches: [ "develop" ]
+
+permissions:
+  contents: read
+
+jobs:
+  streamlit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Install dependencies
+        run: |
+          cd app/streamlit
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+      - uses: streamlit/streamlit-app-action@v0.0.3
+        with:
+          app-path: app/streamlit/Chatbot.py
+          ruff: true
+          skip-smoke: true
+          pytest-args: -v --junit-xml=test-results.xml
+      - if: always()
+        uses: pmeier/pytest-results-action@v0.6.0
+        with:
+          path: test-results.xml
+          summary: true
+          display-options: fEX
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,7 @@
+{
+    "python.testing.pytestArgs": [
+        "app"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
+}
--- a/README.md
+++ b/README.md
@ -1,4 +1,7 @@
-## Important note: 
+[![App testing](https://github.com/aimingmed/aimingmed-ai/actions/workflows/app-testing.yml/badge.svg?branch=develop)](https://github.com/aimingmed/aimingmed-ai/actions/workflows/app-testing.yml)
+
+## Important note:
+
 No data or output should be uploaded to this repo. Please make use of .gitignore template in the root directory if you have folder/directory containing dataset. The content in folder/directory currently being ignored from git push are data/ and output/, recursively.

 ## Configure Hooks
@ -12,4 +15,3 @@ To set up the hooks for only this Repo run `git config core.hooksPath ./.hooks/`
 ## Please enter your general Project description here

 ## If you don't need all folder feel free to delete them
-
--- a/app/docker-compose.yml
+++ b/app/docker-compose.yml
@ -3,8 +3,8 @@ version: "3.9"
 services:
  streamlit:
    build: ./streamlit
+    platform: linux/amd64
    ports:
      - "8501:8501"
    volumes:
      - ./llmops/src/rag_cot/chroma_db:/app/llmops/src/rag_cot/chroma_db
-
--- a/app/llmops/components/get_documents/run.py
+++ b/app/llmops/components/get_documents/run.py
@ -16,7 +16,7 @@ def go(args):
    zip_path = os.path.join(args.path_document_folder, f"{args.document_folder}.zip")
    shutil.make_archive(zip_path.replace('.zip', ''), 'zip', args.path_document_folder, args.document_folder)

-    with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id) as run:
+    with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id):

        existing_params = mlflow.get_run(mlflow.active_run().info.run_id).data.params
        if 'artifact_description' not in existing_params:
--- a/app/llmops/components/wandb_utils/log_artifact.py
+++ b/app/llmops/components/wandb_utils/log_artifact.py
@ -1,6 +1,4 @@
 import wandb
-import mlflow
-

 def log_artifact(artifact_name, artifact_type, artifact_description, filename, wandb_run):
    """
--- a/app/llmops/main.py
+++ b/app/llmops/main.py
@ -1,11 +1,8 @@
-import json
-
 import mlflow
 import tempfile
 import os
 import hydra
 from omegaconf import DictConfig
-from decouple import config

 _steps = [
    "get_documents",
@ -27,7 +24,7 @@ def go(config: DictConfig):
    active_steps = steps_par.split(",") if steps_par != "all" else _steps

    # Move to a temporary directory
-    with tempfile.TemporaryDirectory() as tmp_dir:
+    with tempfile.TemporaryDirectory():

        if "get_documents" in active_steps:
            # Download file and load in W&B
--- a/app/llmops/src/etl_chromadb_scanned_pdf/run.py
+++ b/app/llmops/src/etl_chromadb_scanned_pdf/run.py
@ -10,8 +10,6 @@ import shutil

 import chromadb
 # from openai import OpenAI
-from typing import List
-import numpy as np
 import pytesseract as pt
 from pdf2image import convert_from_path
 from langchain.schema import Document
--- a/app/llmops/tests/src/chain_of_thought/test_run.py
+++ b/app/llmops/tests/src/chain_of_thought/test_run.py
@ -1,31 +0,0 @@
-import pytest
-from unittest.mock import patch, MagicMock
-import sys
-sys.path.append("/Users/leehongkai/projects/aimingmed/aimingmed-ai/app/llmops")
-from src.chain_of_thought import run
-
-def test_go():
-    # Create mock arguments
-    args = MagicMock()
-    args.query = "test_query"
-    args.input_chromadb_artifact = "test_artifact"
-    args.embedding_model = "test_embedding_model"
-    args.chat_model_provider = "gemini"
-
-    # Mock wandb.init and other external dependencies
-    with patch("wandb.init") as mock_wandb_init, \
-         patch("chromadb.PersistentClient") as mock_chromadb_client, \
-         patch("sentence_transformers.SentenceTransformer") as mock_sentence_transformer, \
-         patch("langchain_google_genai.ChatGoogleGenerativeAI") as mock_chat_google_generative_ai:
-
-        # Configure the mocks
-        mock_wandb_init.return_value = MagicMock()
-        mock_chromadb_client.return_value = MagicMock()
-        mock_sentence_transformer.return_value = MagicMock()
-        mock_chat_google_generative_ai.return_value = MagicMock()
-
-        # Call the go function
-        run.go(args)
-
-        # Add assertions to validate the behavior of the go function
-        assert mock_wandb_init.called
--- a/app/streamlit/Chatbot.py
+++ b/app/streamlit/Chatbot.py
@ -1,5 +1,4 @@
 import os
-import subprocess
 import streamlit as st
 import chromadb
 from decouple import config
--- a/app/streamlit/Dockerfile
+++ b/app/streamlit/Dockerfile
@ -2,9 +2,9 @@ FROM python:3.11-slim

 WORKDIR /app/streamlit

-COPY Pipfile Pipfile.lock ./
+COPY requirements.txt ./

-RUN pip install pipenv && pipenv install --system --deploy
+RUN pip install --no-cache-dir -r requirements.txt

 COPY Chatbot.py .
 COPY .env .
--- a/app/streamlit/app_test.py
+++ b/app/streamlit/app_test.py
@ -0,0 +1,61 @@
+import datetime
+from unittest.mock import patch
+from streamlit.testing.v1 import AppTest
+from openai.types.chat import ChatCompletionMessage
+from openai.types.chat.chat_completion import ChatCompletion, Choice
+
+
+# See https://github.com/openai/openai-python/issues/715#issuecomment-1809203346
+def create_chat_completion(response: str, role: str = "assistant") -> ChatCompletion:
+    return ChatCompletion(
+        id="foo",
+        model="gpt-3.5-turbo",
+        object="chat.completion",
+        choices=[
+            Choice(
+                finish_reason="stop",
+                index=0,
+                message=ChatCompletionMessage(
+                    content=response,
+                    role=role,
+                ),
+            )
+        ],
+        created=int(datetime.datetime.now().timestamp()),
+    )
+
+
+# @patch("langchain_deepseek.ChatDeepSeek.__call__")
+# @patch("langchain_google_genai.ChatGoogleGenerativeAI.invoke")
+# @patch("langchain_community.llms.moonshot.Moonshot.__call__")
+# def test_Chatbot(moonshot_llm, gemini_llm, deepseek_llm):
+#     at = AppTest.from_file("Chatbot.py").run()
+#     assert not at.exception
+    
+#     QUERY = "What is the best treatment for hypertension?"
+#     RESPONSE = "The best treatment for hypertension is..."
+    
+#     deepseek_llm.return_value.content = RESPONSE
+#     gemini_llm.return_value.content = RESPONSE
+#     moonshot_llm.return_value = RESPONSE
+    
+#     at.chat_input[0].set_value(QUERY).run()
+    
+#     assert any(mock.called for mock in [deepseek_llm, gemini_llm, moonshot_llm])
+#     assert at.chat_message[1].markdown[0].value == QUERY
+#     assert at.chat_message[2].markdown[0].value == RESPONSE
+#     assert at.chat_message[2].avatar == "assistant"
+#     assert not at.exception
+
+
+@patch("langchain.llms.OpenAI.__call__")
+def test_Langchain_Quickstart(langchain_llm):
+    at = AppTest.from_file("pages/3_Langchain_Quickstart.py").run()
+    assert at.info[0].value == "Please add your OpenAI API key to continue."
+
+    RESPONSE = "1. The best way to learn how to code is by practicing..."
+    langchain_llm.return_value = RESPONSE
+    at.sidebar.text_input[0].set_value("sk-...")
+    at.button[0].set_value(True).run()
+    print(at)
+    assert at.info[0].value == RESPONSE
--- a/app/streamlit/pages/3_Langchain_Quickstart.py
+++ b/app/streamlit/pages/3_Langchain_Quickstart.py
@ -0,0 +1,22 @@
+import streamlit as st
+from langchain.llms import OpenAI
+
+st.title("🦜🔗 Langchain Quickstart App")
+
+with st.sidebar:
+    openai_api_key = st.text_input("OpenAI API Key", type="password")
+    "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
+
+
+def generate_response(input_text):
+    llm = OpenAI(temperature=0.7, openai_api_key=openai_api_key)
+    st.info(llm(input_text))
+
+
+with st.form("my_form"):
+    text = st.text_area("Enter text:", "What are 3 key advice for learning how to code?")
+    submitted = st.form_submit_button("Submit")
+    if not openai_api_key:
+        st.info("Please add your OpenAI API key to continue.")
+    elif submitted:
+        generate_response(text)
--- a/app/streamlit/requirements-dev.txt
+++ b/app/streamlit/requirements-dev.txt
@ -0,0 +1,5 @@
+black==23.3.0
+mypy==1.4.1
+pre-commit==3.3.3
+watchdog
+pytest
--- a/app/streamlit/requirements.txt
+++ b/app/streamlit/requirements.txt
@ -0,0 +1,14 @@
+streamlit>=1.28
+langchain>=0.0.217
+openai>=1.2
+duckduckgo-search
+anthropic>=0.3.0
+trubrics>=1.4.3
+streamlit-feedback
+langchain-community
+chromadb
+python-decouple
+langchain_google_genai
+langchain-deepseek
+sentence_transformers
+watchdog
--- a/app/streamlit/tests/_test_chatbot.py
+++ b/app/streamlit/tests/_test_chatbot.py
@ -0,0 +1,44 @@
+import pytest
+import chromadb
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_deepseek import ChatDeepSeek
+from langchain_community.llms.moonshot import Moonshot
+
+import sys
+sys.path.append(".")
+import streamlit as st
+from unittest.mock import patch
+from Chatbot import CHAT_MODEL_PROVIDER, INPUT_CHROMADB_LOCAL, COLLECTION_NAME, cot_template, answer_template
+
+@pytest.fixture(autouse=True)
+def mock_session_state():
+    with patch.object(st, "session_state", {"messages": []}):
+        yield
+
+def test_prompt_templates():
+    # Test that the prompt templates are correctly formatted
+    assert "documents_text" in cot_template
+    assert "question" in cot_template
+    assert "cot" in answer_template
+    assert "question" in answer_template
+
+def test_chromadb_connection():
+    # Test that the ChromaDB client is initialized correctly
+    chroma_client = chromadb.PersistentClient(path=INPUT_CHROMADB_LOCAL)
+    collection = chroma_client.get_collection(name=COLLECTION_NAME)
+    assert collection is not None
+
+@pytest.mark.skipif(CHAT_MODEL_PROVIDER not in ["deepseek", "gemini", "moonshot"], reason="requires a valid CHAT_MODEL_PROVIDER")
+def test_llm_initialization():
+    # Test that the correct LLM is initialized based on the CHAT_MODEL_PROVIDER environment variable
+    if CHAT_MODEL_PROVIDER == "deepseek":
+        llm = ChatDeepSeek(model="deepseek-chat")
+        assert isinstance(llm, ChatDeepSeek)
+    elif CHAT_MODEL_PROVIDER == "gemini":
+        llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
+        assert isinstance(llm, ChatGoogleGenerativeAI)
+    elif CHAT_MODEL_PROVIDER == "moonshot":
+        llm = Moonshot(model="moonshot-v1-128k")
+        assert isinstance(llm, Moonshot)
+        llm = Moonshot(model="moonshot-v1-128k")
+        assert isinstance(llm, Moonshot)
--- a/app/streamlit/tests/test_chatbot.py
+++ b/app/streamlit/tests/test_chatbot.py
@ -1,39 +0,0 @@
-import pytest
-import streamlit as st
-from unittest.mock import patch
-
-# add app/streamlit to sys.path
-import sys
-sys.path.insert(0, "/Users/leehongkai/projects/aimingmed/aimingmed-ai/app/streamlit")
-
-from unittest.mock import patch, MagicMock
-
-
-def test_title():
-    with patch("streamlit.title") as mock_title, \
-         patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
-        import Chatbot
-        st.session_state["messages"] = []
-        mock_title.assert_called_once_with("💬 RAG AI for Medical Guideline")
-
-def test_caption():
-    with patch("streamlit.caption") as mock_caption, \
-         patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
-        import Chatbot
-        st.session_state["messages"] = []
-        mock_caption.assert_called()
-
-def test_chat_input():
-    with patch("streamlit.chat_input", return_value="test_prompt") as mock_chat_input, \
-         patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
-        import Chatbot
-        st.session_state["messages"] = []
-        mock_chat_input.assert_called_once()
-
-def test_chat_message():
-    with patch("streamlit.chat_message") as mock_chat_message, \
-         patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
-        with patch("streamlit.chat_input", return_value="test_prompt"):
-            import Chatbot
-        st.session_state["messages"] = []
-        mock_chat_message.assert_called()
--- a/notebooks/notebook.ipynb
+++ b/notebooks/notebook.ipynb