update for now

This commit is contained in:
leehk 2025-03-13 21:23:36 +08:00
parent b6ca6ac677
commit 86a2c1a055
4 changed files with 99 additions and 53 deletions

View File

@ -9,8 +9,15 @@ etl:
path_document_folder: "../../../../data"
run_id_documents: None
embedding_model: paraphrase-multilingual-mpnet-base-v2
prompt_engineering:
rag:
run_id_chromadb: None
chat_model_provider: gemini
testing:
query: "如何治疗乳腺癌?"
query_evaluation_dataset_csv_path: "../../../../data/qa_datasets.csv"
evaluation:
evaluation_dataset_csv_path: "../../../../data/qa_datasets.csv"
evaluation_dataset_column_question: question
evaluation_dataset_column_answer: answer
ls_chat_model_provider:
- gemini
- moonshot

View File

@ -104,7 +104,7 @@ def go(config: DictConfig):
)
if "rag_cot_evaluation" in active_steps:
if config["prompt_engineering"]["run_id_chromadb"] == "None":
if config["rag"]["run_id_chromadb"] == "None":
# Look for run_id that has artifact logged as documents
run_id = None
client = mlflow.tracking.MlflowClient()
@ -119,22 +119,22 @@ def go(config: DictConfig):
if run_id is None:
raise ValueError("No run_id found with artifact logged as documents")
else:
run_id = config["prompt_engineering"]["run_id_chromadb"]
run_id = config["rag"]["run_id_chromadb"]
_ = mlflow.run(
os.path.join(hydra.utils.get_original_cwd(), "src", "rag_cot_evaluation"),
"main",
parameters={
"query": config["prompt_engineering"]["query"],
"query": config["testing"]["query"],
"input_chromadb_artifact": f'runs:/{run_id}/chromadb/chroma_db.zip',
"embedding_model": config["etl"]["embedding_model"],
"chat_model_provider": config["prompt_engineering"]["chat_model_provider"]
"chat_model_provider": config["rag"]["chat_model_provider"]
},
)
if "rag_adaptive_evaluation" in active_steps:
if config["prompt_engineering"]["run_id_chromadb"] == "None":
if config["rag"]["run_id_chromadb"] == "None":
# Look for run_id that has artifact logged as documents
run_id = None
client = mlflow.tracking.MlflowClient()
@ -149,17 +149,20 @@ def go(config: DictConfig):
if run_id is None:
raise ValueError("No run_id found with artifact logged as documents")
else:
run_id = config["prompt_engineering"]["run_id_chromadb"]
run_id = config["rag"]["run_id_chromadb"]
_ = mlflow.run(
os.path.join(hydra.utils.get_original_cwd(), "src", "rag_adaptive_evaluation"),
"main",
parameters={
"query": config["prompt_engineering"]["query"],
"query_evaluation_dataset_csv_path": config["prompt_engineering"]["query_evaluation_dataset_csv_path"],
"query": config["testing"]["query"],
"evaluation_dataset_csv_path": config["evaluation"]["evaluation_dataset_csv_path"],
"evaluation_dataset_column_question": config["evaluation"]["evaluation_dataset_column_question"],
"evaluation_dataset_column_answer": config["evaluation"]["evaluation_dataset_column_answer"],
"input_chromadb_artifact": f'runs:/{run_id}/chromadb/chroma_db.zip',
"embedding_model": config["etl"]["embedding_model"],
"chat_model_provider": config["prompt_engineering"]["chat_model_provider"]
"chat_model_provider": config["rag"]["chat_model_provider"],
"ls_chat_model_evaluator": ','.join(config["evaluation"]["ls_chat_model_provider"]) if config["evaluation"]["ls_chat_model_provider"] is not None else 'None',
},
)
@ -169,10 +172,10 @@ def go(config: DictConfig):
os.path.join(hydra.utils.get_original_cwd(), "components", "test_rag_cot"),
"main",
parameters={
"query": config["prompt_engineering"]["query"],
"query": config["testing"]["query"],
"input_chromadb_local": os.path.join(hydra.utils.get_original_cwd(), "src", "rag_cot_evaluation", "chroma_db"),
"embedding_model": config["etl"]["embedding_model"],
"chat_model_provider": config["prompt_engineering"]["chat_model_provider"]
"chat_model_provider": config["rag"]["chat_model_provider"]
},
)

View File

@ -9,10 +9,18 @@ entry_points:
description: Query to run
type: string
query_evaluation_dataset_csv_path:
evaluation_dataset_csv_path:
description: query evaluation dataset csv path
type: string
evaluation_dataset_column_question:
description: query evaluation dataset column question
type: string
evaluation_dataset_column_answer:
description: query evaluation dataset column groundtruth
type: string
input_chromadb_artifact:
description: Fully-qualified name for the input artifact
type: string
@ -24,10 +32,18 @@ entry_points:
chat_model_provider:
description: Fully-qualified name for the chat model provider
type: string
ls_chat_model_evaluator:
description: list of chat model providers for evaluation
type: string
command: >-
python run.py --query {query} \
--query_evaluation_dataset_csv_path {query_evaluation_dataset_csv_path} \
--evaluation_dataset_csv_path {evaluation_dataset_csv_path} \
--evaluation_dataset_column_question {evaluation_dataset_column_question} \
--evaluation_dataset_column_answer {evaluation_dataset_column_answer} \
--input_chromadb_artifact {input_chromadb_artifact} \
--embedding_model {embedding_model} \
--chat_model_provider {chat_model_provider}
--chat_model_provider {chat_model_provider} \
--ls_chat_model_evaluator {ls_chat_model_evaluator}

View File

@ -474,61 +474,60 @@ def go(args):
return {"response": value["generation"]}
def go_evaluation(args):
if args.query_evaluation_dataset_csv_path:
# import pandas as pd
# from tqdm import tqdm
if args.evaluation_dataset_csv_path:
# df = pd.read_csv(args.query_evaluation_dataset_csv_path)
import pandas as pd
df = pd.read_csv(args.evaluation_dataset_csv_path)
dataset_name = os.path.basename(args.evaluation_dataset_csv_path).split('.')[0]
# df contains columns of question and answer
examples = df[[args.evaluation_dataset_column_question, args.evaluation_dataset_column_answer]].values.tolist()
inputs = [{"question": input_prompt} for input_prompt, _ in examples]
outputs = [{"answer": output_answer} for _, output_answer in examples]
# Programmatically create a dataset in LangSmith
client = Client()
# # Create inputs and reference outputs
# examples = [
# (
# "Which country is Mount Kilimanjaro located in?",
# "Mount Kilimanjaro is located in Tanzania.",
# ),
# (
# "What is Earth's lowest point?",
# "Earth's lowest point is The Dead Sea.",
# ),
# ]
# inputs = [{"question": input_prompt} for input_prompt, _ in examples]
# outputs = [{"answer": output_answer} for _, output_answer in examples]
dataset = client.create_dataset(
dataset_name = dataset_name,
description = "A sample dataset in LangSmith."
)
# # Programmatically create a dataset in LangSmith
# dataset = client.create_dataset(
# dataset_name = "Sample dataset",
# description = "A sample dataset in LangSmith."
# )
# Add examples to the dataset
client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
# # Add examples to the dataset
# client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
args.ls_chat_model_evaluator = None if args.ls_chat_model_evaluator == 'None' else args.ls_chat_model_evaluator.split(',')
def target(inputs: dict) -> dict:
new_args = argparse.Namespace(**vars(args))
new_args.query = inputs["question"]
return go(new_args)
ls_evaluators = []
if args.ls_chat_model_evaluator:
for evaluator in args.ls_chat_model_evaluator:
if evaluator == 'moonshot':
ls_evaluators.append(moonshot_evaluator_correctness)
ls_evaluators.append(moonshot_evaluator_faithfulness)
elif evaluator == 'deepseek':
ls_evaluators.append(deepseek_evaluator_correctness)
ls_evaluators.append(deepseek_evaluator_faithfulness)
elif evaluator == 'gemini':
ls_evaluators.append(gemini_evaluator_correctness)
ls_evaluators.append(gemini_evaluator_faithfulness)
# After running the evaluation, a link will be provided to view the results in langsmith
experiment_results = client.evaluate(
target,
data = "Sample dataset",
evaluators = [
moonshot_evaluator_correctness,
deepseek_evaluator_correctness,
gemini_evaluator_correctness,
gemini_evaluator_faithfulness,
deepseek_evaluator_faithfulness,
moonshot_evaluator_faithfulness
# can add multiple evaluators here
],
evaluators = ls_evaluators,
experiment_prefix = "first-eval-in-langsmith",
max_concurrency = 1,
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Adaptive AG")
@ -541,12 +540,26 @@ if __name__ == "__main__":
)
parser.add_argument(
"--query_evaluation_dataset_csv_path",
"--evaluation_dataset_csv_path",
type=str,
help="Path to the query evaluation dataset",
default=None,
)
parser.add_argument(
"--evaluation_dataset_column_question",
type=str,
help="Column name for the questions in the evaluation dataset",
default="question",
)
parser.add_argument(
"--evaluation_dataset_column_answer",
type=str,
help="Column name for the groundtruth answers in the evaluation dataset",
default="groundtruth",
)
parser.add_argument(
"--input_chromadb_artifact",
type=str,
@ -568,7 +581,14 @@ if __name__ == "__main__":
help="Chat model provider"
)
parser.add_argument(
"--ls_chat_model_evaluator",
type=str,
help="list of Chat model providers for evaluation",
required=False,
default="None"
)
args = parser.parse_args()
# go(args)
go_evaluation(args)