Merge pull request #14 from aimingmed/develop

badge
2026-02-05 14:43:20 +08:00 · 2025-03-06 19:28:32 +08:00 · 2025-03-06 19:28:32 +08:00 · 5118f7dabc
commit 5118f7dabc
parent ed58ba502e 29d82e7cef
18 changed files with 198 additions and 85 deletions
--- a/.github/workflows/app-testing.yml
+++ b/.github/workflows/app-testing.yml
@ -0,0 +1,36 @@
 name: App testing
 on:
  push:
    branches: [ "develop" ]
  pull_request:
    branches: [ "develop" ]
 permissions:
  contents: read
 jobs:
  streamlit:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: '3.11'
      - name: Install dependencies
        run: |
          cd app/streamlit
          python -m pip install --upgrade pip
          pip install -r requirements.txt
      - uses: streamlit/streamlit-app-action@v0.0.3
        with:
          app-path: app/streamlit/Chatbot.py
          ruff: true
          skip-smoke: true
          pytest-args: -v --junit-xml=test-results.xml
      - if: always()
        uses: pmeier/pytest-results-action@v0.6.0
        with:
          path: test-results.xml
          summary: true
          display-options: fEX
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,7 @@
 {
    "python.testing.pytestArgs": [
        "app"
    ],
    "python.testing.unittestEnabled": false,
    "python.testing.pytestEnabled": true
 }
--- a/README.md
+++ b/README.md
@ -1,4 +1,7 @@
 [![App testing](https://github.com/aimingmed/aimingmed-ai/actions/workflows/app-testing.yml/badge.svg?branch=develop)](https://github.com/aimingmed/aimingmed-ai/actions/workflows/app-testing.yml)
 ## Important note:
 No data or output should be uploaded to this repo. Please make use of .gitignore template in the root directory if you have folder/directory containing dataset. The content in folder/directory currently being ignored from git push are data/ and output/, recursively.
 ## Configure Hooks
@ -12,4 +15,3 @@ To set up the hooks for only this Repo run `git config core.hooksPath ./.hooks/`
 ## Please enter your general Project description here
 ## If you don't need all folder feel free to delete them
--- a/app/docker-compose.yml
+++ b/app/docker-compose.yml
@ -3,8 +3,8 @@ version: "3.9"
 services:
  streamlit:
    build: ./streamlit
    platform: linux/amd64
    ports:
      - "8501:8501"
    volumes:
      - ./llmops/src/rag_cot/chroma_db:/app/llmops/src/rag_cot/chroma_db
--- a/app/llmops/components/get_documents/run.py
+++ b/app/llmops/components/get_documents/run.py
@ -16,7 +16,7 @@ def go(args):
    zip_path = os.path.join(args.path_document_folder, f"{args.document_folder}.zip")
    shutil.make_archive(zip_path.replace('.zip', ''), 'zip', args.path_document_folder, args.document_folder)
-    with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id) as run:
+    with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id):
        existing_params = mlflow.get_run(mlflow.active_run().info.run_id).data.params
        if 'artifact_description' not in existing_params:
--- a/app/llmops/components/wandb_utils/log_artifact.py
+++ b/app/llmops/components/wandb_utils/log_artifact.py
@ -1,6 +1,4 @@
 import wandb
 import mlflow
 def log_artifact(artifact_name, artifact_type, artifact_description, filename, wandb_run):
    """
--- a/app/llmops/main.py
+++ b/app/llmops/main.py
@ -1,11 +1,8 @@
 import json
 import mlflow
 import tempfile
 import os
 import hydra
 from omegaconf import DictConfig
 from decouple import config
 _steps = [
    "get_documents",
@ -27,7 +24,7 @@ def go(config: DictConfig):
    active_steps = steps_par.split(",") if steps_par != "all" else _steps
    # Move to a temporary directory
-    with tempfile.TemporaryDirectory() as tmp_dir:
+    with tempfile.TemporaryDirectory():
        if "get_documents" in active_steps:
            # Download file and load in W&B
--- a/app/llmops/src/etl_chromadb_scanned_pdf/run.py
+++ b/app/llmops/src/etl_chromadb_scanned_pdf/run.py
@ -10,8 +10,6 @@ import shutil
 import chromadb
 # from openai import OpenAI
 from typing import List
 import numpy as np
 import pytesseract as pt
 from pdf2image import convert_from_path
 from langchain.schema import Document
--- a/app/llmops/tests/src/chain_of_thought/test_run.py
+++ b/app/llmops/tests/src/chain_of_thought/test_run.py
@ -1,31 +0,0 @@
 import pytest
 from unittest.mock import patch, MagicMock
 import sys
 sys.path.append("/Users/leehongkai/projects/aimingmed/aimingmed-ai/app/llmops")
 from src.chain_of_thought import run
 def test_go():
    # Create mock arguments
    args = MagicMock()
    args.query = "test_query"
    args.input_chromadb_artifact = "test_artifact"
    args.embedding_model = "test_embedding_model"
    args.chat_model_provider = "gemini"
    # Mock wandb.init and other external dependencies
    with patch("wandb.init") as mock_wandb_init, \
         patch("chromadb.PersistentClient") as mock_chromadb_client, \
         patch("sentence_transformers.SentenceTransformer") as mock_sentence_transformer, \
         patch("langchain_google_genai.ChatGoogleGenerativeAI") as mock_chat_google_generative_ai:
        # Configure the mocks
        mock_wandb_init.return_value = MagicMock()
        mock_chromadb_client.return_value = MagicMock()
        mock_sentence_transformer.return_value = MagicMock()
        mock_chat_google_generative_ai.return_value = MagicMock()
        # Call the go function
        run.go(args)
        # Add assertions to validate the behavior of the go function
        assert mock_wandb_init.called
--- a/app/streamlit/Chatbot.py
+++ b/app/streamlit/Chatbot.py
@ -1,5 +1,4 @@
 import os
 import subprocess
 import streamlit as st
 import chromadb
 from decouple import config
--- a/app/streamlit/Dockerfile
+++ b/app/streamlit/Dockerfile
@ -2,9 +2,9 @@ FROM python:3.11-slim
 WORKDIR /app/streamlit
-COPY Pipfile Pipfile.lock ./
+COPY requirements.txt ./
-RUN pip install pipenv && pipenv install --system --deploy
+RUN pip install --no-cache-dir -r requirements.txt
 COPY Chatbot.py .
 COPY .env .
--- a/app/streamlit/app_test.py
+++ b/app/streamlit/app_test.py
@ -0,0 +1,61 @@
 import datetime
 from unittest.mock import patch
 from streamlit.testing.v1 import AppTest
 from openai.types.chat import ChatCompletionMessage
 from openai.types.chat.chat_completion import ChatCompletion, Choice
 # See https://github.com/openai/openai-python/issues/715#issuecomment-1809203346
 def create_chat_completion(response: str, role: str = "assistant") -> ChatCompletion:
    return ChatCompletion(
        id="foo",
        model="gpt-3.5-turbo",
        object="chat.completion",
        choices=[
            Choice(
                finish_reason="stop",
                index=0,
                message=ChatCompletionMessage(
                    content=response,
                    role=role,
                ),
            )
        ],
        created=int(datetime.datetime.now().timestamp()),
    )
 # @patch("langchain_deepseek.ChatDeepSeek.__call__")
 # @patch("langchain_google_genai.ChatGoogleGenerativeAI.invoke")
 # @patch("langchain_community.llms.moonshot.Moonshot.__call__")
 # def test_Chatbot(moonshot_llm, gemini_llm, deepseek_llm):
 #     at = AppTest.from_file("Chatbot.py").run()
 #     assert not at.exception
 #     QUERY = "What is the best treatment for hypertension?"
 #     RESPONSE = "The best treatment for hypertension is..."
 #     deepseek_llm.return_value.content = RESPONSE
 #     gemini_llm.return_value.content = RESPONSE
 #     moonshot_llm.return_value = RESPONSE
 #     at.chat_input[0].set_value(QUERY).run()
 #     assert any(mock.called for mock in [deepseek_llm, gemini_llm, moonshot_llm])
 #     assert at.chat_message[1].markdown[0].value == QUERY
 #     assert at.chat_message[2].markdown[0].value == RESPONSE
 #     assert at.chat_message[2].avatar == "assistant"
 #     assert not at.exception
@patch("langchain.llms.OpenAI.__call__")
 def test_Langchain_Quickstart(langchain_llm):
    at = AppTest.from_file("pages/3_Langchain_Quickstart.py").run()
    assert at.info[0].value == "Please add your OpenAI API key to continue."
    RESPONSE = "1. The best way to learn how to code is by practicing..."
    langchain_llm.return_value = RESPONSE
    at.sidebar.text_input[0].set_value("sk-...")
    at.button[0].set_value(True).run()
    print(at)
    assert at.info[0].value == RESPONSE
--- a/app/streamlit/pages/3_Langchain_Quickstart.py
+++ b/app/streamlit/pages/3_Langchain_Quickstart.py
@ -0,0 +1,22 @@
 import streamlit as st
 from langchain.llms import OpenAI
 st.title("🦜🔗 Langchain Quickstart App")
 with st.sidebar:
    openai_api_key = st.text_input("OpenAI API Key", type="password")
    "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
 def generate_response(input_text):
    llm = OpenAI(temperature=0.7, openai_api_key=openai_api_key)
    st.info(llm(input_text))
 with st.form("my_form"):
    text = st.text_area("Enter text:", "What are 3 key advice for learning how to code?")
    submitted = st.form_submit_button("Submit")
    if not openai_api_key:
        st.info("Please add your OpenAI API key to continue.")
    elif submitted:
        generate_response(text)
--- a/app/streamlit/requirements-dev.txt
+++ b/app/streamlit/requirements-dev.txt
@ -0,0 +1,5 @@
 black==23.3.0
 mypy==1.4.1
 pre-commit==3.3.3
 watchdog
 pytest
--- a/app/streamlit/requirements.txt
+++ b/app/streamlit/requirements.txt
@ -0,0 +1,14 @@
 streamlit>=1.28
 langchain>=0.0.217
 openai>=1.2
 duckduckgo-search
 anthropic>=0.3.0
 trubrics>=1.4.3
 streamlit-feedback
 langchain-community
 chromadb
 python-decouple
 langchain_google_genai
 langchain-deepseek
 sentence_transformers
 watchdog
--- a/app/streamlit/tests/_test_chatbot.py
+++ b/app/streamlit/tests/_test_chatbot.py
@ -0,0 +1,44 @@
 import pytest
 import chromadb
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_deepseek import ChatDeepSeek
 from langchain_community.llms.moonshot import Moonshot
 import sys
 sys.path.append(".")
 import streamlit as st
 from unittest.mock import patch
 from Chatbot import CHAT_MODEL_PROVIDER, INPUT_CHROMADB_LOCAL, COLLECTION_NAME, cot_template, answer_template
@pytest.fixture(autouse=True)
 def mock_session_state():
    with patch.object(st, "session_state", {"messages": []}):
        yield
 def test_prompt_templates():
    # Test that the prompt templates are correctly formatted
    assert "documents_text" in cot_template
    assert "question" in cot_template
    assert "cot" in answer_template
    assert "question" in answer_template
 def test_chromadb_connection():
    # Test that the ChromaDB client is initialized correctly
    chroma_client = chromadb.PersistentClient(path=INPUT_CHROMADB_LOCAL)
    collection = chroma_client.get_collection(name=COLLECTION_NAME)
    assert collection is not None
@pytest.mark.skipif(CHAT_MODEL_PROVIDER not in ["deepseek", "gemini", "moonshot"], reason="requires a valid CHAT_MODEL_PROVIDER")
 def test_llm_initialization():
    # Test that the correct LLM is initialized based on the CHAT_MODEL_PROVIDER environment variable
    if CHAT_MODEL_PROVIDER == "deepseek":
        llm = ChatDeepSeek(model="deepseek-chat")
        assert isinstance(llm, ChatDeepSeek)
    elif CHAT_MODEL_PROVIDER == "gemini":
        llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
        assert isinstance(llm, ChatGoogleGenerativeAI)
    elif CHAT_MODEL_PROVIDER == "moonshot":
        llm = Moonshot(model="moonshot-v1-128k")
        assert isinstance(llm, Moonshot)
        llm = Moonshot(model="moonshot-v1-128k")
        assert isinstance(llm, Moonshot)
--- a/app/streamlit/tests/test_chatbot.py
+++ b/app/streamlit/tests/test_chatbot.py
@ -1,39 +0,0 @@
 import pytest
 import streamlit as st
 from unittest.mock import patch
 # add app/streamlit to sys.path
 import sys
 sys.path.insert(0, "/Users/leehongkai/projects/aimingmed/aimingmed-ai/app/streamlit")
 from unittest.mock import patch, MagicMock
 def test_title():
    with patch("streamlit.title") as mock_title, \
         patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
        import Chatbot
        st.session_state["messages"] = []
        mock_title.assert_called_once_with("💬 RAG AI for Medical Guideline")
 def test_caption():
    with patch("streamlit.caption") as mock_caption, \
         patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
        import Chatbot
        st.session_state["messages"] = []
        mock_caption.assert_called()
 def test_chat_input():
    with patch("streamlit.chat_input", return_value="test_prompt") as mock_chat_input, \
         patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
        import Chatbot
        st.session_state["messages"] = []
        mock_chat_input.assert_called_once()
 def test_chat_message():
    with patch("streamlit.chat_message") as mock_chat_message, \
         patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
        with patch("streamlit.chat_input", return_value="test_prompt"):
            import Chatbot
        st.session_state["messages"] = []
        mock_chat_message.assert_called()
--- a/notebooks/notebook.ipynb
+++ b/notebooks/notebook.ipynb