import os
from datetime import datetime
from operator import itemgetter
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import RecursiveUrlLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.text_splitter import TokenTextSplitter
from langchain.vectorstores import Chroma
from parea import Parea
from parea.utils.trace_integrations.langchain import PareaAILangchainTracer
from parea.evals.general import answer_matches_target_llm_grader_factory
from parea.evals.rag import context_query_relevancy_factory, percent_target_supported_by_context_factory
from parea.evals import EvalFuncTuple, run_evals_in_thread_and_log
from parea.schemas import Log
load_dotenv()
# Need to instantiate Parea for tracing and evals
p = Parea(api_key=os.getenv("PAREA_API_KEY"))
CONTEXT = None
def format_docs(docs) -> str:
global CONTEXT # Declare CONTEXT as a global variable
context = "\n\n".join(doc.page_content for doc in docs)
CONTEXT = context
return context
raw_documents = RecursiveUrlLoader("https://en.wikipedia.org/wiki/New_York_City").load()
transformed = Html2TextTransformer().transform_documents(raw_documents)
documents = TokenTextSplitter(model_name="gpt-3.5-turbo", chunk_size=2000, chunk_overlap=200).split_documents(transformed)
retriever = Chroma.from_documents(documents, OpenAIEmbeddings()).as_retriever()
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"You are a helpful documentation Q&A assistant, trained to answer questions from the provided context."
"\nThe current time is {time}.\n\nRelevant documents will be retrieved in the following messages.",
),
("system", "{context}"),
("human", "{question}"),
]
).partial(time=str(datetime.now()))
model = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)
response_generator = prompt | model | StrOutputParser()
chain = (
# The runnable map here routes the original inputs to a context and a question dictionary to pass to the response generator
{"context": itemgetter("question") | retriever | format_docs, "question": itemgetter("question")}
| response_generator
)
# Get started with Parea's auto evaluation metrics for RAG
EVALS = [
EvalFuncTuple(name="matches_target", func=answer_matches_target_llm_grader_factory(question_field="question")),
EvalFuncTuple(name="relevancy", func=context_query_relevancy_factory(question_field="question", context_fields=["context"])),
EvalFuncTuple(name="supported_by_context", func=percent_target_supported_by_context_factory(question_field="question", context_fields=["context"])),
]
def main():
handler = PareaAILangchainTracer()
question = "What is the population of New York City as of 2020?"
output = chain.invoke({"question": question}, config={"callbacks": [handler]})
# get parent trace id from the tracer
parent_trace_id = handler.get_parent_trace_id()
# build log component needed for evaluation metric functions
log = Log(inputs={"question": question, "context": CONTEXT}, output=output, target="8,804,190")
# parea provided helper function to run evaluation metrics in a thread to avoid blocking
run_evals_in_thread_and_log(trace_id=str(parent_trace_id), log=log, eval_funcs=EVALS, verbose=True)
# ###Eval Results###
# NamedEvaluationScore(name='matches_target', score=1.0)
# NamedEvaluationScore(name='relevancy', score=0.0053)
# NamedEvaluationScore(name='supported_by_context', score=1.0)
# View trace at: https://app.parea.ai/logs/detailed/{parent_trace_id}