Merge pull request #24 from aimingmed/feature/prompt-reengineering-to-improve6qas

Feature/prompt reengineering to improve6qas
This commit is contained in:
Hong Kai LEE 2025-04-02 16:39:41 +08:00 committed by GitHub
commit 3692c22241
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 38 additions and 10 deletions

View File

@ -6,8 +6,9 @@ Use the vectorstore for questions on these topics. Otherwise, use web-search.
system_retriever_grader = """You are a grader assessing relevance of a retrieved document to a user question. \n
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
You must make sure to read carefully that the document contains a sentence or chunk of sentences that is exactly related but not closely related to the question subject (e.g. must be the exact disease or subject in question). \n
The goal is to filter out erroneous retrievals. \n
Must return a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
system_hallucination_grader = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
@ -18,6 +19,25 @@ system_answer_grader = """You are a grader assessing whether an answer addresses
system_question_rewriter = """You a question re-writer that converts an input question to a better version that is optimized \n
for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
# prompt for question answering based on retrieved documents
qa_prompt_template = """You are an expert at answering questions based on the following retrieved context.\n
Before answering the question, you must have your own thought process what are the general scopes to cover when answering this question, step-by-step. Do not include this thought process in the answer.\n
Then, given your thought process, you must read the provided context carefully and extract the relevant information.\n
If the question is about medical question, you must answer the question in a medical way and assume that the audience is a junior doctor or a medical student: \n
1. For cancer diseases, you must include comprehensive treatment advices that encompasses multidisciplinary treatment options that included but not limited to surgery, chemotherapy, radiology, internal medicine (drugs), nutritional ratio (protein), etc. You must layout out the treatment options like what are the first-line, second-line treatment etc.\n
2. For cancer diseases, don't consider context that is not primary tumor/cancer related, unless the question specifically mention it is secondary tumor/cancer related.\n
3. If the question didn't state the stage of the cancer disease, you must reply with treatment options for each stage of the cancer disease, if they are availalbe in the provided context. If they are not available in the provided context, give a general one.\n
You must not use any information that is not present in the provided context to answer the question. Make sure to remove those information not present in the provided context.\n
If you don't know the answer, just say that you don't know.\n
Provide the answer in a concise and organized manner. \n
Question: {question} \n
Context: {context} \n
Answer:
"""
# Evaluation
CORRECTNESS_PROMPT = """You are an impartial judge. Evaluate Student Answer against Ground Truth for conceptual similarity and correctness.

View File

@ -18,11 +18,14 @@ from typing_extensions import TypedDict
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate
from langchain.schema import Document
from pprint import pprint
from langgraph.graph import END, StateGraph, START
from langsmith import Client
from data_models import (
RouteQuery,
GradeDocuments,
@ -34,7 +37,8 @@ from prompts_library import (
system_retriever_grader,
system_hallucination_grader,
system_answer_grader,
system_question_rewriter
system_question_rewriter,
qa_prompt_template
)
from evaluators import (
@ -141,18 +145,22 @@ def go(args):
##########################################
### Generate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
# Prompt
prompt = hub.pull("rlm/rag-prompt")
# Create a PromptTemplate with the given prompt
new_prompt_template = PromptTemplate(
input_variables=["context", "question"],
template=qa_prompt_template,
)
# Post-processing
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
# Create a new HumanMessagePromptTemplate with the new PromptTemplate
new_human_message_prompt_template = HumanMessagePromptTemplate(
prompt=new_prompt_template
)
prompt_qa = ChatPromptTemplate.from_messages([new_human_message_prompt_template])
# Chain
rag_chain = prompt | llm | StrOutputParser()
rag_chain = prompt_qa | llm | StrOutputParser()
##########################################