Merge pull request #14 from aimingmed/develop

badge
This commit is contained in:
Hong Kai LEE 2025-03-06 19:28:32 +08:00 committed by GitHub
commit 5118f7dabc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 198 additions and 85 deletions

36
.github/workflows/app-testing.yml vendored Normal file
View File

@ -0,0 +1,36 @@
name: App testing
on:
push:
branches: [ "develop" ]
pull_request:
branches: [ "develop" ]
permissions:
contents: read
jobs:
streamlit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install dependencies
run: |
cd app/streamlit
python -m pip install --upgrade pip
pip install -r requirements.txt
- uses: streamlit/streamlit-app-action@v0.0.3
with:
app-path: app/streamlit/Chatbot.py
ruff: true
skip-smoke: true
pytest-args: -v --junit-xml=test-results.xml
- if: always()
uses: pmeier/pytest-results-action@v0.6.0
with:
path: test-results.xml
summary: true
display-options: fEX

7
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"app"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}

View File

@ -1,4 +1,7 @@
## Important note:
[![App testing](https://github.com/aimingmed/aimingmed-ai/actions/workflows/app-testing.yml/badge.svg?branch=develop)](https://github.com/aimingmed/aimingmed-ai/actions/workflows/app-testing.yml)
## Important note:
No data or output should be uploaded to this repo. Please make use of .gitignore template in the root directory if you have folder/directory containing dataset. The content in folder/directory currently being ignored from git push are data/ and output/, recursively.
## Configure Hooks
@ -12,4 +15,3 @@ To set up the hooks for only this Repo run `git config core.hooksPath ./.hooks/`
## Please enter your general Project description here
## If you don't need all folder feel free to delete them

View File

@ -3,8 +3,8 @@ version: "3.9"
services:
streamlit:
build: ./streamlit
platform: linux/amd64
ports:
- "8501:8501"
volumes:
- ./llmops/src/rag_cot/chroma_db:/app/llmops/src/rag_cot/chroma_db

View File

@ -16,7 +16,7 @@ def go(args):
zip_path = os.path.join(args.path_document_folder, f"{args.document_folder}.zip")
shutil.make_archive(zip_path.replace('.zip', ''), 'zip', args.path_document_folder, args.document_folder)
with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id) as run:
with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("development").experiment_id):
existing_params = mlflow.get_run(mlflow.active_run().info.run_id).data.params
if 'artifact_description' not in existing_params:

View File

@ -1,6 +1,4 @@
import wandb
import mlflow
def log_artifact(artifact_name, artifact_type, artifact_description, filename, wandb_run):
"""

View File

@ -1,11 +1,8 @@
import json
import mlflow
import tempfile
import os
import hydra
from omegaconf import DictConfig
from decouple import config
_steps = [
"get_documents",
@ -27,7 +24,7 @@ def go(config: DictConfig):
active_steps = steps_par.split(",") if steps_par != "all" else _steps
# Move to a temporary directory
with tempfile.TemporaryDirectory() as tmp_dir:
with tempfile.TemporaryDirectory():
if "get_documents" in active_steps:
# Download file and load in W&B

View File

@ -10,8 +10,6 @@ import shutil
import chromadb
# from openai import OpenAI
from typing import List
import numpy as np
import pytesseract as pt
from pdf2image import convert_from_path
from langchain.schema import Document

View File

@ -1,31 +0,0 @@
import pytest
from unittest.mock import patch, MagicMock
import sys
sys.path.append("/Users/leehongkai/projects/aimingmed/aimingmed-ai/app/llmops")
from src.chain_of_thought import run
def test_go():
# Create mock arguments
args = MagicMock()
args.query = "test_query"
args.input_chromadb_artifact = "test_artifact"
args.embedding_model = "test_embedding_model"
args.chat_model_provider = "gemini"
# Mock wandb.init and other external dependencies
with patch("wandb.init") as mock_wandb_init, \
patch("chromadb.PersistentClient") as mock_chromadb_client, \
patch("sentence_transformers.SentenceTransformer") as mock_sentence_transformer, \
patch("langchain_google_genai.ChatGoogleGenerativeAI") as mock_chat_google_generative_ai:
# Configure the mocks
mock_wandb_init.return_value = MagicMock()
mock_chromadb_client.return_value = MagicMock()
mock_sentence_transformer.return_value = MagicMock()
mock_chat_google_generative_ai.return_value = MagicMock()
# Call the go function
run.go(args)
# Add assertions to validate the behavior of the go function
assert mock_wandb_init.called

View File

@ -1,5 +1,4 @@
import os
import subprocess
import streamlit as st
import chromadb
from decouple import config

View File

@ -2,9 +2,9 @@ FROM python:3.11-slim
WORKDIR /app/streamlit
COPY Pipfile Pipfile.lock ./
COPY requirements.txt ./
RUN pip install pipenv && pipenv install --system --deploy
RUN pip install --no-cache-dir -r requirements.txt
COPY Chatbot.py .
COPY .env .

61
app/streamlit/app_test.py Normal file
View File

@ -0,0 +1,61 @@
import datetime
from unittest.mock import patch
from streamlit.testing.v1 import AppTest
from openai.types.chat import ChatCompletionMessage
from openai.types.chat.chat_completion import ChatCompletion, Choice
# See https://github.com/openai/openai-python/issues/715#issuecomment-1809203346
def create_chat_completion(response: str, role: str = "assistant") -> ChatCompletion:
return ChatCompletion(
id="foo",
model="gpt-3.5-turbo",
object="chat.completion",
choices=[
Choice(
finish_reason="stop",
index=0,
message=ChatCompletionMessage(
content=response,
role=role,
),
)
],
created=int(datetime.datetime.now().timestamp()),
)
# @patch("langchain_deepseek.ChatDeepSeek.__call__")
# @patch("langchain_google_genai.ChatGoogleGenerativeAI.invoke")
# @patch("langchain_community.llms.moonshot.Moonshot.__call__")
# def test_Chatbot(moonshot_llm, gemini_llm, deepseek_llm):
# at = AppTest.from_file("Chatbot.py").run()
# assert not at.exception
# QUERY = "What is the best treatment for hypertension?"
# RESPONSE = "The best treatment for hypertension is..."
# deepseek_llm.return_value.content = RESPONSE
# gemini_llm.return_value.content = RESPONSE
# moonshot_llm.return_value = RESPONSE
# at.chat_input[0].set_value(QUERY).run()
# assert any(mock.called for mock in [deepseek_llm, gemini_llm, moonshot_llm])
# assert at.chat_message[1].markdown[0].value == QUERY
# assert at.chat_message[2].markdown[0].value == RESPONSE
# assert at.chat_message[2].avatar == "assistant"
# assert not at.exception
@patch("langchain.llms.OpenAI.__call__")
def test_Langchain_Quickstart(langchain_llm):
at = AppTest.from_file("pages/3_Langchain_Quickstart.py").run()
assert at.info[0].value == "Please add your OpenAI API key to continue."
RESPONSE = "1. The best way to learn how to code is by practicing..."
langchain_llm.return_value = RESPONSE
at.sidebar.text_input[0].set_value("sk-...")
at.button[0].set_value(True).run()
print(at)
assert at.info[0].value == RESPONSE

View File

@ -0,0 +1,22 @@
import streamlit as st
from langchain.llms import OpenAI
st.title("🦜🔗 Langchain Quickstart App")
with st.sidebar:
openai_api_key = st.text_input("OpenAI API Key", type="password")
"[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
def generate_response(input_text):
llm = OpenAI(temperature=0.7, openai_api_key=openai_api_key)
st.info(llm(input_text))
with st.form("my_form"):
text = st.text_area("Enter text:", "What are 3 key advice for learning how to code?")
submitted = st.form_submit_button("Submit")
if not openai_api_key:
st.info("Please add your OpenAI API key to continue.")
elif submitted:
generate_response(text)

View File

@ -0,0 +1,5 @@
black==23.3.0
mypy==1.4.1
pre-commit==3.3.3
watchdog
pytest

View File

@ -0,0 +1,14 @@
streamlit>=1.28
langchain>=0.0.217
openai>=1.2
duckduckgo-search
anthropic>=0.3.0
trubrics>=1.4.3
streamlit-feedback
langchain-community
chromadb
python-decouple
langchain_google_genai
langchain-deepseek
sentence_transformers
watchdog

View File

@ -0,0 +1,44 @@
import pytest
import chromadb
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_deepseek import ChatDeepSeek
from langchain_community.llms.moonshot import Moonshot
import sys
sys.path.append(".")
import streamlit as st
from unittest.mock import patch
from Chatbot import CHAT_MODEL_PROVIDER, INPUT_CHROMADB_LOCAL, COLLECTION_NAME, cot_template, answer_template
@pytest.fixture(autouse=True)
def mock_session_state():
with patch.object(st, "session_state", {"messages": []}):
yield
def test_prompt_templates():
# Test that the prompt templates are correctly formatted
assert "documents_text" in cot_template
assert "question" in cot_template
assert "cot" in answer_template
assert "question" in answer_template
def test_chromadb_connection():
# Test that the ChromaDB client is initialized correctly
chroma_client = chromadb.PersistentClient(path=INPUT_CHROMADB_LOCAL)
collection = chroma_client.get_collection(name=COLLECTION_NAME)
assert collection is not None
@pytest.mark.skipif(CHAT_MODEL_PROVIDER not in ["deepseek", "gemini", "moonshot"], reason="requires a valid CHAT_MODEL_PROVIDER")
def test_llm_initialization():
# Test that the correct LLM is initialized based on the CHAT_MODEL_PROVIDER environment variable
if CHAT_MODEL_PROVIDER == "deepseek":
llm = ChatDeepSeek(model="deepseek-chat")
assert isinstance(llm, ChatDeepSeek)
elif CHAT_MODEL_PROVIDER == "gemini":
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
assert isinstance(llm, ChatGoogleGenerativeAI)
elif CHAT_MODEL_PROVIDER == "moonshot":
llm = Moonshot(model="moonshot-v1-128k")
assert isinstance(llm, Moonshot)
llm = Moonshot(model="moonshot-v1-128k")
assert isinstance(llm, Moonshot)

View File

@ -1,39 +0,0 @@
import pytest
import streamlit as st
from unittest.mock import patch
# add app/streamlit to sys.path
import sys
sys.path.insert(0, "/Users/leehongkai/projects/aimingmed/aimingmed-ai/app/streamlit")
from unittest.mock import patch, MagicMock
def test_title():
with patch("streamlit.title") as mock_title, \
patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
import Chatbot
st.session_state["messages"] = []
mock_title.assert_called_once_with("💬 RAG AI for Medical Guideline")
def test_caption():
with patch("streamlit.caption") as mock_caption, \
patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
import Chatbot
st.session_state["messages"] = []
mock_caption.assert_called()
def test_chat_input():
with patch("streamlit.chat_input", return_value="test_prompt") as mock_chat_input, \
patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
import Chatbot
st.session_state["messages"] = []
mock_chat_input.assert_called_once()
def test_chat_message():
with patch("streamlit.chat_message") as mock_chat_message, \
patch("streamlit.session_state", new_callable=MagicMock) as mock_session_state:
with patch("streamlit.chat_input", return_value="test_prompt"):
import Chatbot
st.session_state["messages"] = []
mock_chat_message.assert_called()