diff --git a/.github/workflows/app-testing.yml b/.github/workflows/app-testing.yml new file mode 100644 index 0000000..5ab69c0 --- /dev/null +++ b/.github/workflows/app-testing.yml @@ -0,0 +1,36 @@ +name: App testing + +on: + push: + branches: [ "develop" ] + pull_request: + branches: [ "develop" ] + +permissions: + contents: read + +jobs: + streamlit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install dependencies + run: | + cd app/streamlit + python -m pip install --upgrade pip + pip install -r requirements.txt + - uses: streamlit/streamlit-app-action@v0.0.3 + with: + app-path: app/streamlit/Chatbot.py + ruff: true + skip-smoke: true + pytest-args: -v --junit-xml=test-results.xml + - if: always() + uses: pmeier/pytest-results-action@v0.6.0 + with: + path: test-results.xml + summary: true + display-options: fEX diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d171c99 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "app" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/README.md b/README.md index 8c7869f..bec4ca9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ -## Important note: +[![App testing](https://github.com/aimingmed/aimingmed-ai/actions/workflows/app-testing.yml/badge.svg?branch=develop)](https://github.com/aimingmed/aimingmed-ai/actions/workflows/app-testing.yml) + +## Important note: + No data or output should be uploaded to this repo. Please make use of .gitignore template in the root directory if you have folder/directory containing dataset. The content in folder/directory currently being ignored from git push are data/ and output/, recursively. ## Configure Hooks @@ -12,4 +15,3 @@ To set up the hooks for only this Repo run `git config core.hooksPath ./.hooks/` ## Please enter your general Project description here ## If you don't need all folder feel free to delete them - diff --git a/app/docker-compose.yml b/app/docker-compose.yml index 69ae235..fb7640a 100644 --- a/app/docker-compose.yml +++ b/app/docker-compose.yml @@ -3,8 +3,8 @@ version: "3.9" services: streamlit: build: ./streamlit + platform: linux/amd64 ports: - "8501:8501" volumes: - ./llmops/src/rag_cot/chroma_db:/app/llmops/src/rag_cot/chroma_db - diff --git a/app/llmops/components/get_documents/run.py b/app/llmops/components/get_documents/run.py index 0e097a5..6e0c22e 100644 --- a/app/llmops/components/get_documents/run.py +++ b/app/llmops/components/get_documents/run.py @@ -16,7 +16,7 @@ def go(args): zip_path = os.path.join(args.path_document_folder, f"{args.document_folder}.zip") shutil.make_archive(zip_path.replace('.zip', ''), 'zip', args.path_document_folder, args.document_folder) - with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id) as run: + with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id): existing_params = mlflow.get_run(mlflow.active_run().info.run_id).data.params if 'artifact_description' not in existing_params: diff --git a/app/llmops/components/wandb_utils/log_artifact.py b/app/llmops/components/wandb_utils/log_artifact.py index c242c60..67275de 100644 --- a/app/llmops/components/wandb_utils/log_artifact.py +++ b/app/llmops/components/wandb_utils/log_artifact.py @@ -1,6 +1,4 @@ import wandb -import mlflow - def log_artifact(artifact_name, artifact_type, artifact_description, filename, wandb_run): """ diff --git a/app/llmops/main.py b/app/llmops/main.py index 3e9aacb..670a7fa 100644 --- a/app/llmops/main.py +++ b/app/llmops/main.py @@ -1,11 +1,8 @@ -import json - import mlflow import tempfile import os import hydra from omegaconf import DictConfig -from decouple import config _steps = [ "get_documents", @@ -27,7 +24,7 @@ def go(config: DictConfig): active_steps = steps_par.split(",") if steps_par != "all" else _steps # Move to a temporary directory - with tempfile.TemporaryDirectory() as tmp_dir: + with tempfile.TemporaryDirectory(): if "get_documents" in active_steps: # Download file and load in W&B diff --git a/app/llmops/src/etl_chromadb_scanned_pdf/run.py b/app/llmops/src/etl_chromadb_scanned_pdf/run.py index f282826..472fe38 100644 --- a/app/llmops/src/etl_chromadb_scanned_pdf/run.py +++ b/app/llmops/src/etl_chromadb_scanned_pdf/run.py @@ -10,8 +10,6 @@ import shutil import chromadb # from openai import OpenAI -from typing import List -import numpy as np import pytesseract as pt from pdf2image import convert_from_path from langchain.schema import Document diff --git a/app/llmops/tests/src/chain_of_thought/test_run.py b/app/llmops/tests/src/chain_of_thought/test_run.py deleted file mode 100644 index fb0a73e..0000000 --- a/app/llmops/tests/src/chain_of_thought/test_run.py +++ /dev/null @@ -1,31 +0,0 @@ -import pytest -from unittest.mock import patch, MagicMock -import sys -sys.path.append("/Users/leehongkai/projects/aimingmed/aimingmed-ai/app/llmops") -from src.chain_of_thought import run - -def test_go(): - # Create mock arguments - args = MagicMock() - args.query = "test_query" - args.input_chromadb_artifact = "test_artifact" - args.embedding_model = "test_embedding_model" - args.chat_model_provider = "gemini" - - # Mock wandb.init and other external dependencies - with patch("wandb.init") as mock_wandb_init, \ - patch("chromadb.PersistentClient") as mock_chromadb_client, \ - patch("sentence_transformers.SentenceTransformer") as mock_sentence_transformer, \ - patch("langchain_google_genai.ChatGoogleGenerativeAI") as mock_chat_google_generative_ai: - - # Configure the mocks - mock_wandb_init.return_value = MagicMock() - mock_chromadb_client.return_value = MagicMock() - mock_sentence_transformer.return_value = MagicMock() - mock_chat_google_generative_ai.return_value = MagicMock() - - # Call the go function - run.go(args) - - # Add assertions to validate the behavior of the go function - assert mock_wandb_init.called diff --git a/app/streamlit/Chatbot.py b/app/streamlit/Chatbot.py index beb7f08..16266d4 100644 --- a/app/streamlit/Chatbot.py +++ b/app/streamlit/Chatbot.py @@ -1,5 +1,4 @@ import os -import subprocess import streamlit as st import chromadb from decouple import config diff --git a/app/streamlit/Dockerfile b/app/streamlit/Dockerfile index b0e9b15..b335644 100644 --- a/app/streamlit/Dockerfile +++ b/app/streamlit/Dockerfile @@ -2,9 +2,9 @@ FROM python:3.11-slim WORKDIR /app/streamlit -COPY Pipfile Pipfile.lock ./ +COPY requirements.txt ./ -RUN pip install pipenv && pipenv install --system --deploy +RUN pip install --no-cache-dir -r requirements.txt COPY Chatbot.py . COPY .env . diff --git a/app/streamlit/app_test.py b/app/streamlit/app_test.py new file mode 100644 index 0000000..dce15fd --- /dev/null +++ b/app/streamlit/app_test.py @@ -0,0 +1,61 @@ +import datetime +from unittest.mock import patch +from streamlit.testing.v1 import AppTest +from openai.types.chat import ChatCompletionMessage +from openai.types.chat.chat_completion import ChatCompletion, Choice + + +# See https://github.com/openai/openai-python/issues/715#issuecomment-1809203346 +def create_chat_completion(response: str, role: str = "assistant") -> ChatCompletion: + return ChatCompletion( + id="foo", + model="gpt-3.5-turbo", + object="chat.completion", + choices=[ + Choice( + finish_reason="stop", + index=0, + message=ChatCompletionMessage( + content=response, + role=role, + ), + ) + ], + created=int(datetime.datetime.now().timestamp()), + ) + + +# @patch("langchain_deepseek.ChatDeepSeek.__call__") +# @patch("langchain_google_genai.ChatGoogleGenerativeAI.invoke") +# @patch("langchain_community.llms.moonshot.Moonshot.__call__") +# def test_Chatbot(moonshot_llm, gemini_llm, deepseek_llm): +# at = AppTest.from_file("Chatbot.py").run() +# assert not at.exception + +# QUERY = "What is the best treatment for hypertension?" +# RESPONSE = "The best treatment for hypertension is..." + +# deepseek_llm.return_value.content = RESPONSE +# gemini_llm.return_value.content = RESPONSE +# moonshot_llm.return_value = RESPONSE + +# at.chat_input[0].set_value(QUERY).run() + +# assert any(mock.called for mock in [deepseek_llm, gemini_llm, moonshot_llm]) +# assert at.chat_message[1].markdown[0].value == QUERY +# assert at.chat_message[2].markdown[0].value == RESPONSE +# assert at.chat_message[2].avatar == "assistant" +# assert not at.exception + + +@patch("langchain.llms.OpenAI.__call__") +def test_Langchain_Quickstart(langchain_llm): + at = AppTest.from_file("pages/3_Langchain_Quickstart.py").run() + assert at.info[0].value == "Please add your OpenAI API key to continue." + + RESPONSE = "1. The best way to learn how to code is by practicing..." + langchain_llm.return_value = RESPONSE + at.sidebar.text_input[0].set_value("sk-...") + at.button[0].set_value(True).run() + print(at) + assert at.info[0].value == RESPONSE diff --git a/app/streamlit/pages/3_Langchain_Quickstart.py b/app/streamlit/pages/3_Langchain_Quickstart.py new file mode 100644 index 0000000..38c820f --- /dev/null +++ b/app/streamlit/pages/3_Langchain_Quickstart.py @@ -0,0 +1,22 @@ +import streamlit as st +from langchain.llms import OpenAI + +st.title("🦜🔗 Langchain Quickstart App") + +with st.sidebar: + openai_api_key = st.text_input("OpenAI API Key", type="password") + "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)" + + +def generate_response(input_text): + llm = OpenAI(temperature=0.7, openai_api_key=openai_api_key) + st.info(llm(input_text)) + + +with st.form("my_form"): + text = st.text_area("Enter text:", "What are 3 key advice for learning how to code?") + submitted = st.form_submit_button("Submit") + if not openai_api_key: + st.info("Please add your OpenAI API key to continue.") + elif submitted: + generate_response(text) diff --git a/app/streamlit/requirements-dev.txt b/app/streamlit/requirements-dev.txt new file mode 100644 index 0000000..8635be6 --- /dev/null +++ b/app/streamlit/requirements-dev.txt @@ -0,0 +1,5 @@ +black==23.3.0 +mypy==1.4.1 +pre-commit==3.3.3 +watchdog +pytest diff --git a/app/streamlit/requirements.txt b/app/streamlit/requirements.txt new file mode 100644 index 0000000..a7df356 --- /dev/null +++ b/app/streamlit/requirements.txt @@ -0,0 +1,14 @@ +streamlit>=1.28 +langchain>=0.0.217 +openai>=1.2 +duckduckgo-search +anthropic>=0.3.0 +trubrics>=1.4.3 +streamlit-feedback +langchain-community +chromadb +python-decouple +langchain_google_genai +langchain-deepseek +sentence_transformers +watchdog \ No newline at end of file diff --git a/app/streamlit/tests/_test_chatbot.py b/app/streamlit/tests/_test_chatbot.py new file mode 100644 index 0000000..2540a9e --- /dev/null +++ b/app/streamlit/tests/_test_chatbot.py @@ -0,0 +1,44 @@ +import pytest +import chromadb +from langchain_google_genai import ChatGoogleGenerativeAI +from langchain_deepseek import ChatDeepSeek +from langchain_community.llms.moonshot import Moonshot + +import sys +sys.path.append(".") +import streamlit as st +from unittest.mock import patch +from Chatbot import CHAT_MODEL_PROVIDER, INPUT_CHROMADB_LOCAL, COLLECTION_NAME, cot_template, answer_template + +@pytest.fixture(autouse=True) +def mock_session_state(): + with patch.object(st, "session_state", {"messages": []}): + yield + +def test_prompt_templates(): + # Test that the prompt templates are correctly formatted + assert "documents_text" in cot_template + assert "question" in cot_template + assert "cot" in answer_template + assert "question" in answer_template + +def test_chromadb_connection(): + # Test that the ChromaDB client is initialized correctly + chroma_client = chromadb.PersistentClient(path=INPUT_CHROMADB_LOCAL) + collection = chroma_client.get_collection(name=COLLECTION_NAME) + assert collection is not None + +@pytest.mark.skipif(CHAT_MODEL_PROVIDER not in ["deepseek", "gemini", "moonshot"], reason="requires a valid CHAT_MODEL_PROVIDER") +def test_llm_initialization(): + # Test that the correct LLM is initialized based on the CHAT_MODEL_PROVIDER environment variable + if CHAT_MODEL_PROVIDER == "deepseek": + llm = ChatDeepSeek(model="deepseek-chat") + assert isinstance(llm, ChatDeepSeek) + elif CHAT_MODEL_PROVIDER == "gemini": + llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash") + assert isinstance(llm, ChatGoogleGenerativeAI) + elif CHAT_MODEL_PROVIDER == "moonshot": + llm = Moonshot(model="moonshot-v1-128k") + assert isinstance(llm, Moonshot) + llm = Moonshot(model="moonshot-v1-128k") + assert isinstance(llm, Moonshot) \ No newline at end of file diff --git a/app/streamlit/tests/test_chatbot.py b/app/streamlit/tests/test_chatbot.py deleted file mode 100644 index 97ad3d1..0000000 --- a/app/streamlit/tests/test_chatbot.py +++ /dev/null @@ -1,39 +0,0 @@ -import pytest -import streamlit as st -from unittest.mock import patch - -# add app/streamlit to sys.path -import sys -sys.path.insert(0, "/Users/leehongkai/projects/aimingmed/aimingmed-ai/app/streamlit") - -from unittest.mock import patch, MagicMock - - -def test_title(): - with patch("streamlit.title") as mock_title, \ - patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state: - import Chatbot - st.session_state["messages"] = [] - mock_title.assert_called_once_with("💬 RAG AI for Medical Guideline") - -def test_caption(): - with patch("streamlit.caption") as mock_caption, \ - patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state: - import Chatbot - st.session_state["messages"] = [] - mock_caption.assert_called() - -def test_chat_input(): - with patch("streamlit.chat_input", return_value="test_prompt") as mock_chat_input, \ - patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state: - import Chatbot - st.session_state["messages"] = [] - mock_chat_input.assert_called_once() - -def test_chat_message(): - with patch("streamlit.chat_message") as mock_chat_message, \ - patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state: - with patch("streamlit.chat_input", return_value="test_prompt"): - import Chatbot - st.session_state["messages"] = [] - mock_chat_message.assert_called() \ No newline at end of file diff --git a/notebooks/notebook.ipynb b/notebooks/notebook.ipynb deleted file mode 100644 index e69de29..0000000