10  How to Q&A over CSVs

import pandas as pd
from langchain_openai import ChatOpenAI

10.1 LLM

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

10.2 Read CSV

df = pd.read_csv("data/Titanic-Dataset.csv")
print(df.shape)
print(df.columns.tolist())
(891, 12)
['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']

10.3 SQL

import os
from langchain_community.utilities import SQLDatabase
from sqlalchemy import create_engine

if not os.path.exists("titanic.db"):
    engine = create_engine("sqlite:///titanic.db")
    df.to_sql("titanic", engine, index=False)
db = SQLDatabase(engine=engine)
print(db.dialect)
sqlite
print(db.get_usable_table_names())
print(db.run("SELECT * FROM titanic WHERE Age < 2;"))
['titanic']
[(79, 1, 2, 'Caldwell, Master. Alden Gates', 'male', 0.83, 0, 2, '248738', 29.0, None, 'S'), (165, 0, 3, 'Panula, Master. Eino Viljami', 'male', 1.0, 4, 1, '3101295', 39.6875, None, 'S'), (173, 1, 3, 'Johnson, Miss. Eleanor Ileen', 'female', 1.0, 1, 1, '347742', 11.1333, None, 'S'), (184, 1, 2, 'Becker, Master. Richard F', 'male', 1.0, 2, 1, '230136', 39.0, 'F4', 'S'), (306, 1, 1, 'Allison, Master. Hudson Trevor', 'male', 0.92, 1, 2, '113781', 151.55, 'C22 C26', 'S'), (382, 1, 3, 'Nakid, Miss. Maria ("Mary")', 'female', 1.0, 0, 2, '2653', 15.7417, None, 'C'), (387, 0, 3, 'Goodwin, Master. Sidney Leonard', 'male', 1.0, 5, 2, 'CA 2144', 46.9, None, 'S'), (470, 1, 3, 'Baclini, Miss. Helene Barbara', 'female', 0.75, 2, 1, '2666', 19.2583, None, 'C'), (645, 1, 3, 'Baclini, Miss. Eugenie', 'female', 0.75, 2, 1, '2666', 19.2583, None, 'C'), (756, 1, 2, 'Hamalainen, Master. Viljo', 'male', 0.67, 1, 1, '250649', 14.5, None, 'S'), (789, 1, 3, 'Dean, Master. Bertram Vere', 'male', 1.0, 1, 2, 'C.A. 2315', 20.575, None, 'S'), (804, 1, 3, 'Thomas, Master. Assad Alexander', 'male', 0.42, 0, 1, '2625', 8.5167, None, 'C'), (828, 1, 2, 'Mallet, Master. Andre', 'male', 1.0, 0, 2, 'S.C./PARIS 2079', 37.0042, None, 'C'), (832, 1, 2, 'Richards, Master. George Sibley', 'male', 0.83, 1, 1, '29106', 18.75, None, 'S')]

10.4 SQL Agent

from langchain_community.agent_toolkits import create_sql_agent

agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True)
res = agent_executor.invoke({"input": "what's the average age of survivors"})
res


> Entering new SQL Agent Executor chain...

Invoking: `sql_db_list_tables` with `{}`


titanic
Invoking: `sql_db_schema` with `{'table_names': 'titanic'}`



CREATE TABLE titanic (
    "PassengerId" BIGINT, 
    "Survived" BIGINT, 
    "Pclass" BIGINT, 
    "Name" TEXT, 
    "Sex" TEXT, 
    "Age" FLOAT, 
    "SibSp" BIGINT, 
    "Parch" BIGINT, 
    "Ticket" TEXT, 
    "Fare" FLOAT, 
    "Cabin" TEXT, 
    "Embarked" TEXT
)

/*
3 rows from titanic table:
PassengerId Survived    Pclass  Name    Sex Age SibSp   Parch   Ticket  Fare    Cabin   Embarked
1   0   3   Braund, Mr. Owen Harris male    22.0    1   0   A/5 21171   7.25    None    S
2   1   1   Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38.0    1   0   PC 17599    71.2833 C85 C
3   1   3   Heikkinen, Miss. Laina  female  26.0    0   0   STON/O2. 3101282    7.925   None    S
*/
Invoking: `sql_db_query` with `{'query': 'SELECT AVG(Age) AS Average_Age FROM titanic WHERE Survived = 1'}`


[(28.343689655172415,)]The average age of survivors in the Titanic dataset is approximately 28.34 years.

> Finished chain.
{'input': "what's the average age of survivors",
 'output': 'The average age of survivors in the Titanic dataset is approximately 28.34 years.'}
res
{'input': "what's the average age of survivors",
 'output': 'The average age of survivors in the Titanic dataset is approximately 28.34 years.'}