mirror of
https://github.com/aimingmed/aimingmed-ai.git
synced 2026-01-19 21:37:31 +08:00
125 lines
4.8 KiB
Python
125 lines
4.8 KiB
Python
# main.py
|
|
import os
|
|
import wandb
|
|
from config import GOOGLE_API_KEY, WANDB_API_KEY, LANGSMITH_API_KEY, LANGCHAIN_PROJECT
|
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
from langchain.callbacks import LangChainTracer
|
|
from langchain.chains import LLMChain
|
|
from langchain.prompts import PromptTemplate
|
|
|
|
# Set LangSmith environment variables
|
|
os.environ["LANGCHAIN_TRACING"] = "true"
|
|
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
|
|
os.environ["LANGCHAIN_PROJECT"] = LANGCHAIN_PROJECT
|
|
os.environ["LANGCHAIN_API_KEY"] = LANGSMITH_API_KEY
|
|
|
|
# Initialize Weights & Biases
|
|
wandb.login(key=WANDB_API_KEY)
|
|
run = wandb.init(project=LANGCHAIN_PROJECT, entity="aimingmed")
|
|
|
|
# Initialize Gemini API
|
|
tracer = LangChainTracer()
|
|
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001", google_api_key=GOOGLE_API_KEY, callbacks=[tracer])
|
|
|
|
# Example usage of Gemini API
|
|
prompt_template = PromptTemplate(template="Write a short poem about the sun.", input_variables=[])
|
|
chain = LLMChain(llm=llm, prompt=prompt_template)
|
|
response = chain.run({})
|
|
|
|
# print(response)
|
|
|
|
import time
|
|
from langsmith import Client, Run
|
|
from langsmith.evaluation import EvaluationResult
|
|
|
|
# Initialize LangSmith client
|
|
client = Client(api_key=LANGSMITH_API_KEY) # Replace with your API key
|
|
project_name = "my-gemini-evaluation" # Your LangSmith project name
|
|
|
|
def evaluate_gemini_response(prompt, expected_response, gemini_response):
|
|
"""Evaluates Gemini's response against an expected response and logs to LangSmith."""
|
|
|
|
# 1. Create a Run object (This is the correct way for manual logging)
|
|
run = Run(
|
|
client=client,
|
|
project_name=project_name,
|
|
inputs={"prompt": prompt, "expected_response": expected_response}, # Log inputs here
|
|
)
|
|
|
|
try: # Use a try-except block for proper error handling
|
|
start_time = time.time()
|
|
|
|
# ... (Your Gemini API call goes here) ...
|
|
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001", google_api_key=GOOGLE_API_KEY, callbacks=[tracer])
|
|
# Example usage of Gemini API
|
|
prompt_template = PromptTemplate(template="Start from here: ", input_variables=[])
|
|
chain = LLMChain(llm=llm, prompt=prompt_template)
|
|
gemini_response = chain.run({}) # Replace with your actual Gemini call
|
|
|
|
end_time = time.time()
|
|
latency = end_time - start_time
|
|
|
|
run.outputs = {"gemini_response": gemini_response} # Log outputs
|
|
run.latency = latency # Log latency
|
|
|
|
# 2. End the Run (Important!)
|
|
run.end() # Mark the run as complete.
|
|
|
|
# 3. Evaluate and log the result
|
|
evaluation_result = evaluate_response(expected_response, gemini_response)
|
|
run.create_evaluation(evaluation_result) # Log the evaluation
|
|
|
|
return evaluation_result
|
|
|
|
except Exception as e: # Handle exceptions
|
|
run.end(error=str(e)) # Log the error in LangSmith
|
|
print(f"Error during Gemini call or evaluation: {e}")
|
|
return None # Or handle the error as needed
|
|
|
|
def evaluate_response(expected_response, gemini_response):
|
|
"""Performs the actual evaluation logic. Customize this!"""
|
|
|
|
# Example 1: Exact match (simple, but often not realistic)
|
|
if expected_response.strip().lower() == gemini_response.strip().lower():
|
|
score = 1.0 # Perfect match
|
|
feedback = "Exact match!"
|
|
# Example 2: Keyword overlap (more flexible)
|
|
elif any(keyword in gemini_response.lower() for keyword in expected_response.lower().split()):
|
|
score = 0.7 # Partial match (adjust score as needed)
|
|
feedback = "Keyword overlap."
|
|
# Example 3: Semantic similarity (requires external library/API) - Advanced!
|
|
# ... (Use a library like SentenceTransformers or an API for semantic similarity) ...
|
|
else:
|
|
score = 0.0
|
|
feedback = "No match."
|
|
|
|
# Create a LangSmith EvaluationResult object
|
|
evaluation_result = EvaluationResult(
|
|
score=score,
|
|
value=gemini_response, # The actual response being evaluated
|
|
comment=feedback,
|
|
# Other metadata you might want to add, like prompt or expected response
|
|
metadata = {"expected_response": expected_response}
|
|
)
|
|
|
|
return evaluation_result
|
|
|
|
|
|
|
|
# Example usage:
|
|
prompt = "Translate 'Hello, world!' to French."
|
|
expected_response = "Bonjour le monde !"
|
|
gemini_response = "Bonjour monde !" # Or get the actual response from Gemini
|
|
|
|
evaluation = evaluate_gemini_response(prompt, expected_response, gemini_response)
|
|
print(f"Evaluation Score: {evaluation.score}, Feedback: {evaluation.comment}")
|
|
|
|
# Another example
|
|
prompt = "What is the capital of France?"
|
|
expected_response = "Paris"
|
|
gemini_response = "The capital of France is Paris."
|
|
|
|
evaluation = evaluate_gemini_response(prompt, expected_response, gemini_response)
|
|
print(f"Evaluation Score: {evaluation.score}, Feedback: {evaluation.comment}")
|
|
|
|
wandb.finish() |