diff --git a/app/llmops/config.yaml b/app/llmops/config.yaml index 33383ea..4ea1d94 100644 --- a/app/llmops/config.yaml +++ b/app/llmops/config.yaml @@ -11,13 +11,14 @@ etl: embedding_model: paraphrase-multilingual-mpnet-base-v2 rag: run_id_chromadb: None - chat_model_provider: gemini + chat_model_provider: deepseek testing: query: "如何治疗乳腺癌?" evaluation: - evaluation_dataset_csv_path: "../../../../data/qa_datasets.csv" + evaluation_dataset_csv_path: "../../../../data/qa_dataset_01.csv" evaluation_dataset_column_question: question evaluation_dataset_column_answer: answer ls_chat_model_provider: - gemini + - deepseek - moonshot \ No newline at end of file diff --git a/app/llmops/src/rag_adaptive_evaluation/run.py b/app/llmops/src/rag_adaptive_evaluation/run.py index 4acc4c7..275b4d3 100644 --- a/app/llmops/src/rag_adaptive_evaluation/run.py +++ b/app/llmops/src/rag_adaptive_evaluation/run.py @@ -3,6 +3,8 @@ import logging import argparse import mlflow import shutil +import langsmith + from decouple import config from langchain_google_genai import ChatGoogleGenerativeAI from langchain_deepseek import ChatDeepSeek @@ -10,6 +12,7 @@ from langchain_community.llms.moonshot import Moonshot from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores.chroma import Chroma + from typing import List from typing_extensions import TypedDict @@ -489,13 +492,16 @@ def go_evaluation(args): # Programmatically create a dataset in LangSmith client = Client() - dataset = client.create_dataset( - dataset_name = dataset_name, - description = "A sample dataset in LangSmith." - ) - - # Add examples to the dataset - client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id) + try: + # Create a dataset + dataset = client.create_dataset( + dataset_name = dataset_name, + description = "An evaluation dataset in LangSmith." + ) + # Add examples to the dataset + client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id) + except langsmith.utils.LangSmithConflictError: + pass args.ls_chat_model_evaluator = None if args.ls_chat_model_evaluator == 'None' else args.ls_chat_model_evaluator.split(',') @@ -521,7 +527,7 @@ def go_evaluation(args): # After running the evaluation, a link will be provided to view the results in langsmith experiment_results = client.evaluate( target, - data = "Sample dataset", + data = dataset_name, evaluators = ls_evaluators, experiment_prefix = "first-eval-in-langsmith", max_concurrency = 1,