mirror of
https://github.com/aimingmed/aimingmed-ai.git
synced 2026-02-08 08:13:20 +08:00
Ready for dataset evaluation
This commit is contained in:
parent
86a2c1a055
commit
afbb34079a
@ -11,13 +11,14 @@ etl:
|
|||||||
embedding_model: paraphrase-multilingual-mpnet-base-v2
|
embedding_model: paraphrase-multilingual-mpnet-base-v2
|
||||||
rag:
|
rag:
|
||||||
run_id_chromadb: None
|
run_id_chromadb: None
|
||||||
chat_model_provider: gemini
|
chat_model_provider: deepseek
|
||||||
testing:
|
testing:
|
||||||
query: "如何治疗乳腺癌?"
|
query: "如何治疗乳腺癌?"
|
||||||
evaluation:
|
evaluation:
|
||||||
evaluation_dataset_csv_path: "../../../../data/qa_datasets.csv"
|
evaluation_dataset_csv_path: "../../../../data/qa_dataset_01.csv"
|
||||||
evaluation_dataset_column_question: question
|
evaluation_dataset_column_question: question
|
||||||
evaluation_dataset_column_answer: answer
|
evaluation_dataset_column_answer: answer
|
||||||
ls_chat_model_provider:
|
ls_chat_model_provider:
|
||||||
- gemini
|
- gemini
|
||||||
|
- deepseek
|
||||||
- moonshot
|
- moonshot
|
||||||
@ -3,6 +3,8 @@ import logging
|
|||||||
import argparse
|
import argparse
|
||||||
import mlflow
|
import mlflow
|
||||||
import shutil
|
import shutil
|
||||||
|
import langsmith
|
||||||
|
|
||||||
from decouple import config
|
from decouple import config
|
||||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||||
from langchain_deepseek import ChatDeepSeek
|
from langchain_deepseek import ChatDeepSeek
|
||||||
@ -10,6 +12,7 @@ from langchain_community.llms.moonshot import Moonshot
|
|||||||
from langchain_huggingface import HuggingFaceEmbeddings
|
from langchain_huggingface import HuggingFaceEmbeddings
|
||||||
from langchain_community.vectorstores.chroma import Chroma
|
from langchain_community.vectorstores.chroma import Chroma
|
||||||
|
|
||||||
|
|
||||||
from typing import List
|
from typing import List
|
||||||
from typing_extensions import TypedDict
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
@ -489,13 +492,16 @@ def go_evaluation(args):
|
|||||||
# Programmatically create a dataset in LangSmith
|
# Programmatically create a dataset in LangSmith
|
||||||
client = Client()
|
client = Client()
|
||||||
|
|
||||||
dataset = client.create_dataset(
|
try:
|
||||||
dataset_name = dataset_name,
|
# Create a dataset
|
||||||
description = "A sample dataset in LangSmith."
|
dataset = client.create_dataset(
|
||||||
)
|
dataset_name = dataset_name,
|
||||||
|
description = "An evaluation dataset in LangSmith."
|
||||||
# Add examples to the dataset
|
)
|
||||||
client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
|
# Add examples to the dataset
|
||||||
|
client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
|
||||||
|
except langsmith.utils.LangSmithConflictError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
args.ls_chat_model_evaluator = None if args.ls_chat_model_evaluator == 'None' else args.ls_chat_model_evaluator.split(',')
|
args.ls_chat_model_evaluator = None if args.ls_chat_model_evaluator == 'None' else args.ls_chat_model_evaluator.split(',')
|
||||||
@ -521,7 +527,7 @@ def go_evaluation(args):
|
|||||||
# After running the evaluation, a link will be provided to view the results in langsmith
|
# After running the evaluation, a link will be provided to view the results in langsmith
|
||||||
experiment_results = client.evaluate(
|
experiment_results = client.evaluate(
|
||||||
target,
|
target,
|
||||||
data = "Sample dataset",
|
data = dataset_name,
|
||||||
evaluators = ls_evaluators,
|
evaluators = ls_evaluators,
|
||||||
experiment_prefix = "first-eval-in-langsmith",
|
experiment_prefix = "first-eval-in-langsmith",
|
||||||
max_concurrency = 1,
|
max_concurrency = 1,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user