from langchain_openai import ChatOpenAI
from langchain_community.document_loaders.csv_loader import CSVLoader
import pandas as pd
= ChatOpenAI(model="gpt-4o-mini") llm
13 How to Load CSV
https://python.langchain.com/v0.2/docs/how_to/document_loader_csv
13.1 Approach No RAG
= pd.read_csv("data/TelephoneDepartment.csv")
telephone_df telephone_df.head()
Description | Telephone | |
---|---|---|
0 | Counter US at ER | 0338 |
1 | Tech CT at ER | 0350, 47115 |
2 | Radiology Interpretation Room | 0346, 47968 |
3 | Ask Team ER | 1182 |
4 | ER Team 1 | 0301 |
print(telephone_df.to_markdown())
| | Description | Telephone |
|---:|:------------------------------|:------------|
| 0 | Counter US at ER | 0338 |
| 1 | Tech CT at ER | 0350, 47115 |
| 2 | Radiology Interpretation Room | 0346, 47968 |
| 3 | Ask Team ER | 1182 |
| 4 | ER Team 1 | 0301 |
| 5 | ER Team 2 | 0303 |
| 6 | ER Team 3 | 0334 |
| 7 | Resusitation 1 | 1107 |
| 8 | Resusitation 2 | 2282 |
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
= f"""You are an assistant for searching hospital telephone number. Use the following <phone_book> to answer human request. If you don't know the answer, just say that you don't know. Keep the answer concise.
system_prompt <phone_book>
{telephone_df.to_markdown()}
</phone_book>
"""
= ChatPromptTemplate.from_messages(
prompt
["system", system_prompt),
("human", "{input}"),
(
]
)
"input": "H"}).to_messages() prompt.invoke({
[SystemMessage(content="You are an assistant for searching hospital telephone number. Use the following <phone_book> to answer human request. If you don't know the answer, just say that you don't know. Keep the answer concise.\n<phone_book>\n| | Description | Telephone |\n|---:|:------------------------------|:------------|\n| 0 | Counter US at ER | 0338 |\n| 1 | Tech CT at ER | 0350, 47115 |\n| 2 | Radiology Interpretation Room | 0346, 47968 |\n| 3 | Ask Team ER | 1182 |\n| 4 | ER Team 1 | 0301 |\n| 5 | ER Team 2 | 0303 |\n| 6 | ER Team 3 | 0334 |\n| 7 | Resusitation 1 | 1107 |\n| 8 | Resusitation 2 | 2282 |\n</phone_book>\n"),
HumanMessage(content='H')]
= prompt | llm | StrOutputParser() chain
"Resus 1") chain.invoke(
'The telephone number for Resuscitation 1 is 1107.'
"Resus 1, 2") chain.invoke(
'Resusitation 1: 1107 \nResusitation 2: 2282'
"ER3") chain.invoke(
'The telephone number for ER Team 3 is 0334.'
13.2 Approch with RAG
= CSVLoader(file_path="data/TelephoneDepartment.csv", source_column="Description")
loader = loader.load()
data
for record in data[:2]:
print(record)
page_content='Description: Counter US at ER
Telephone: 0338' metadata={'source': 'Counter US at ER', 'row': 0}
page_content='Description: Tech CT at ER
Telephone: 0350, 47115' metadata={'source': 'Tech CT at ER', 'row': 1}
13.2.1 Index
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
= Chroma.from_documents(documents=data, embedding=OpenAIEmbeddings()) vectorstore
= vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3}) retriever
"Resus 1") retriever.invoke(
[Document(metadata={'row': 7, 'source': 'Resusitation 1'}, page_content='Description: Resusitation 1\nTelephone: 1107'),
Document(metadata={'row': 8, 'source': 'Resusitation 2'}, page_content='Description: Resusitation 2\nTelephone: 2282'),
Document(metadata={'row': 4, 'source': 'ER Team 1'}, page_content='Description: ER Team 1\nTelephone: 0301')]
13.2.2 Prompt
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
= """You are an assistant for searching hospital telephone number. Use the following <phone_book> to answer human request. If you don't know the answer, just say that you don't know. Keep the answer concise.
system_prompt <phone_book>
{phone_book}
</phone_book>
"""
= ChatPromptTemplate.from_messages(
prompt
["system", system_prompt),
("human", "{input}"),
(
] )
13.2.3 Chain 1
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
= (
rag_chain_1 "phone_book": retriever, "input": RunnablePassthrough()}
{| prompt
| llm
| StrOutputParser()
)
= rag_chain_1.invoke("Resus 1")
res1 print(res1)
The telephone number for Resusitation 1 is 1107.
"Resus 1, 2") rag_chain_1.invoke(
'Resusitation 1: 1107 \nResusitation 2: 2282'
"ER3") rag_chain_1.invoke(
'The telephone number for ER Team 3 is 0334.'
13.3 Compare
print("Simple")
%time chain.invoke("Resus 1")
print("\nRAG")
%time rag_chain_1.invoke("Resus 1")
Simple
CPU times: user 25.8 ms, sys: 4.24 ms, total: 30 ms
Wall time: 998 ms
RAG
CPU times: user 50.3 ms, sys: 5.28 ms, total: 55.5 ms
Wall time: 2.15 s
'The telephone number for Resusitation 1 is 1107.'
print("Simple")
%time chain.invoke("Resus 1, 2")
print("\nRAG")
%time rag_chain_1.invoke("Resus 1, 2")
Simple
CPU times: user 24.7 ms, sys: 3.32 ms, total: 28 ms
Wall time: 2 s
RAG
CPU times: user 54 ms, sys: 6.21 ms, total: 60.2 ms
Wall time: 1.84 s
'Resusitation 1: 1107 \nResusitation 2: 2282'