13 How to Load CSV

https://python.langchain.com/v0.2/docs/how_to/document_loader_csv

from langchain_openai import ChatOpenAI
from langchain_community.document_loaders.csv_loader import CSVLoader
import pandas as pd

llm = ChatOpenAI(model="gpt-4o-mini")

13.1 Approach No RAG

telephone_df = pd.read_csv("data/TelephoneDepartment.csv")
telephone_df.head()

	Description	Telephone
0	Counter US at ER	0338
1	Tech CT at ER	0350, 47115
2	Radiology Interpretation Room	0346, 47968
3	Ask Team ER	1182
4	ER Team 1	0301

print(telephone_df.to_markdown())

|    | Description                   | Telephone   |
|---:|:------------------------------|:------------|
|  0 | Counter US at ER              | 0338        |
|  1 | Tech CT at ER                 | 0350, 47115 |
|  2 | Radiology Interpretation Room | 0346, 47968 |
|  3 | Ask Team ER                   | 1182        |
|  4 | ER Team 1                     | 0301        |
|  5 | ER Team 2                     | 0303        |
|  6 | ER Team 3                     | 0334        |
|  7 | Resusitation 1                | 1107        |
|  8 | Resusitation 2                | 2282        |

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = f"""You are an assistant for searching hospital telephone number. Use the following <phone_book> to answer human request. If you don't know the answer, just say that you don't know. Keep the answer concise.
<phone_book>
{telephone_df.to_markdown()}
</phone_book>
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

prompt.invoke({"input": "H"}).to_messages()

[SystemMessage(content="You are an assistant for searching hospital telephone number. Use the following <phone_book> to answer human request. If you don't know the answer, just say that you don't know. Keep the answer concise.\n<phone_book>\n|    | Description                   | Telephone   |\n|---:|:------------------------------|:------------|\n|  0 | Counter US at ER              | 0338        |\n|  1 | Tech CT at ER                 | 0350, 47115 |\n|  2 | Radiology Interpretation Room | 0346, 47968 |\n|  3 | Ask Team ER                   | 1182        |\n|  4 | ER Team 1                     | 0301        |\n|  5 | ER Team 2                     | 0303        |\n|  6 | ER Team 3                     | 0334        |\n|  7 | Resusitation 1                | 1107        |\n|  8 | Resusitation 2                | 2282        |\n</phone_book>\n"),
 HumanMessage(content='H')]

chain = prompt | llm | StrOutputParser()

chain.invoke("Resus 1")

'The telephone number for Resuscitation 1 is 1107.'

chain.invoke("Resus 1, 2")

'Resusitation 1: 1107  \nResusitation 2: 2282'

chain.invoke("ER3")

'The telephone number for ER Team 3 is 0334.'

13.2 Approch with RAG

loader = CSVLoader(file_path="data/TelephoneDepartment.csv", source_column="Description")
data = loader.load()

for record in data[:2]:
    print(record)

page_content='Description: Counter US at ER
Telephone: 0338' metadata={'source': 'Counter US at ER', 'row': 0}
page_content='Description: Tech CT at ER
Telephone: 0350, 47115' metadata={'source': 'Tech CT at ER', 'row': 1}

13.2.1 Index

from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=data, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

retriever.invoke("Resus 1")

[Document(metadata={'row': 7, 'source': 'Resusitation 1'}, page_content='Description: Resusitation 1\nTelephone: 1107'),
 Document(metadata={'row': 8, 'source': 'Resusitation 2'}, page_content='Description: Resusitation 2\nTelephone: 2282'),
 Document(metadata={'row': 4, 'source': 'ER Team 1'}, page_content='Description: ER Team 1\nTelephone: 0301')]

13.2.2 Prompt

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = """You are an assistant for searching hospital telephone number. Use the following <phone_book> to answer human request. If you don't know the answer, just say that you don't know. Keep the answer concise.
<phone_book>
{phone_book}
</phone_book>
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

13.2.3 Chain 1

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain_1 = (
    {"phone_book": retriever, "input": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

res1 = rag_chain_1.invoke("Resus 1")
print(res1)

The telephone number for Resusitation 1 is 1107.

rag_chain_1.invoke("Resus 1, 2")

'Resusitation 1: 1107  \nResusitation 2: 2282'

rag_chain_1.invoke("ER3")

'The telephone number for ER Team 3 is 0334.'

13.3 Compare

print("Simple")
%time chain.invoke("Resus 1")

print("\nRAG")
%time rag_chain_1.invoke("Resus 1")

Simple
CPU times: user 25.8 ms, sys: 4.24 ms, total: 30 ms
Wall time: 998 ms

RAG
CPU times: user 50.3 ms, sys: 5.28 ms, total: 55.5 ms
Wall time: 2.15 s

'The telephone number for Resusitation 1 is 1107.'

print("Simple")
%time chain.invoke("Resus 1, 2")

print("\nRAG")
%time rag_chain_1.invoke("Resus 1, 2")

Simple
CPU times: user 24.7 ms, sys: 3.32 ms, total: 28 ms
Wall time: 2 s

RAG
CPU times: user 54 ms, sys: 6.21 ms, total: 60.2 ms
Wall time: 1.84 s

'Resusitation 1: 1107  \nResusitation 2: 2282'