11  PDF Q&A System (Nike)

import getpass
import os
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


llm = ChatOpenAI(model="gpt-4o-mini")

11.0.1 Load PDF

from langchain_community.document_loaders import PyPDFLoader

file_path = "pdf/414759-1-_5_Nike-NPS-Combo_Form-10-K_WR.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))
106
## Example content
for line in [doc.page_content for doc in docs[1:2]]:
    print(line[1:250])
NITED STATES
SECURITIES AND EXCHANGE COMMISSION
Washington, D.C. 20549
FORM 10-K 
(Mark One)
☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934
FOR THE FISCAL YEAR ENDED MAY 31, 2023 
OR
☐TRANSITION REPORT PURSUAN
print(docs[0].metadata)
{'source': 'pdf/414759-1-_5_Nike-NPS-Combo_Form-10-K_WR.pdf', 'page': 0}

11.0.2 Split & Indexing

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings(),
                                    # Save
                                    persist_directory="db")


# Now we can load the persisted database from disk, and use it as normal. 
vectorstore = Chroma(persist_directory="db")
retriever = vectorstore.as_retriever()
retriever
VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x13709bbb0>)

11.0.3 Prompt

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

11.0.4 Chain

question_answer_chain = create_stuff_documents_chain(llm, prompt)
question_answer_chain
RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.\n\n{context}")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x1304ed2d0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x1304ef0a0>, model_name='gpt-4o-mini', openai_api_key=SecretStr('**********'), openai_proxy='')
| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
results = rag_chain.invoke({"input": "What was Nike's revenue in 2023?"})
results
{'input': "What was Nike's revenue in 2023?",
 'context': [Document(metadata={'page': 36, 'source': 'pdf/414759-1-_5_Nike-NPS-Combo_Form-10-K_WR.pdf'}, page_content='FISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTS\nThe following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and \nmajor product line:\nFISCAL 2023 COMPARED TO FISCAL 2022\n•NIKE, Inc. Revenues were $51.2 billion in fiscal 2023, which increased 10% and 16% compared to fiscal 2022 on a reported \nand currency-neutral basis, respectively. The increase was due to higher revenues in North America, Europe, Middle East & \nAfrica ("EMEA"), APLA and Greater China, which contributed approximately 7, 6, 2 and 1 percentage points to NIKE, Inc. \nRevenues, respectively. \n•NIKE Brand revenues, which represented over 90% of NIKE, Inc. Revenues,  increased  10% and 16% on a reported and \ncurrency-neutral basis, respectively. This increase was primarily due to higher revenues in Men\'s, the Jordan Brand, \nWomen\'s and Kids\' which grew 17%, 35%,11% and 10%, respectively, on a wholesale equivalent basis.'),
  Document(metadata={'page': 36, 'source': 'pdf/414759-1-_5_Nike-NPS-Combo_Form-10-K_WR.pdf'}, page_content='FISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTS\nThe following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and \nmajor product line:\nFISCAL 2023 COMPARED TO FISCAL 2022\n•NIKE, Inc. Revenues were $51.2 billion in fiscal 2023, which increased 10% and 16% compared to fiscal 2022 on a reported \nand currency-neutral basis, respectively. The increase was due to higher revenues in North America, Europe, Middle East & \nAfrica ("EMEA"), APLA and Greater China, which contributed approximately 7, 6, 2 and 1 percentage points to NIKE, Inc. \nRevenues, respectively. \n•NIKE Brand revenues, which represented over 90% of NIKE, Inc. Revenues,  increased  10% and 16% on a reported and \ncurrency-neutral basis, respectively. This increase was primarily due to higher revenues in Men\'s, the Jordan Brand, \nWomen\'s and Kids\' which grew 17%, 35%,11% and 10%, respectively, on a wholesale equivalent basis.'),
  Document(metadata={'page': 36, 'source': 'pdf/414759-1-_5_Nike-NPS-Combo_Form-10-K_WR.pdf'}, page_content='FISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTS\nThe following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and \nmajor product line:\nFISCAL 2023 COMPARED TO FISCAL 2022\n•NIKE, Inc. Revenues were $51.2 billion in fiscal 2023, which increased 10% and 16% compared to fiscal 2022 on a reported \nand currency-neutral basis, respectively. The increase was due to higher revenues in North America, Europe, Middle East & \nAfrica ("EMEA"), APLA and Greater China, which contributed approximately 7, 6, 2 and 1 percentage points to NIKE, Inc. \nRevenues, respectively. \n•NIKE Brand revenues, which represented over 90% of NIKE, Inc. Revenues,  increased  10% and 16% on a reported and \ncurrency-neutral basis, respectively. This increase was primarily due to higher revenues in Men\'s, the Jordan Brand, \nWomen\'s and Kids\' which grew 17%, 35%,11% and 10%, respectively, on a wholesale equivalent basis.'),
  Document(metadata={'page': 31, 'source': 'pdf/414759-1-_5_Nike-NPS-Combo_Form-10-K_WR.pdf'}, page_content='to-end technology foundation, which we believe will further accelerate our digital transformation. W e believe this unified approach \nwill accelerate growth and unlock more efficiency for our business, while driving speed and responsiveness as we serve \nconsumers globally.\nFINANCIAL HIGHLIGHTS \n•In fiscal 2023, NIKE, Inc. achieved record Revenues of $51.2 billion, which increased 10% and 16% on a reported and \ncurrency-neutral basis, respectively \n•NIKE Direct revenues grew 14% from $18.7 billion in fiscal 2022 to $21.3 billion in fiscal 2023, and represented \napproximately 44% of total NIKE Brand revenues for fiscal 2023\n•Gross margin for the fiscal year decreased 250 basis points to 43.5% primarily driven by higher product costs, higher \nmarkdowns and unfavorable changes in foreign currency exchange rates, partially of fset by strategic pricing actions\n•Inventories as of May 31, 2023 were $8.5 billion, flat compared to the prior year, driven by the actions we took throughout')],
 'answer': "Nike's revenue in fiscal 2023 was $51.2 billion."}
for doc in results["context"]:
    print(doc.page_content[1:250])
ISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTS
The following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and 
major product line:
FISCAL 2023 COMPARED TO FISCAL 2022
•NIKE, Inc. Revenues were $51.
o-end technology foundation, which we believe will further accelerate our digital transformation. W e believe this unified approach 
will accelerate growth and unlock more efficiency for our business, while driving speed and responsiveness as we ser
ORTH AMERICA
(Dollars in millions) FISCAL 2023 FISCAL 2022 % CHANGE% CHANGE 
EXCLUDING 
CURRENCY 
CHANGES FISCAL 2021 % CHANGE% CHANGE 
EXCLUDING 
CURRENCY 
CHANGES
Revenues by:
Footwear $ 14,897 $ 12,228  22 %  22 % $ 11,644  5 %  5 %
Apparel  5,94
UROPE, MIDDLE EAST & AFRICA
(Dollars in millions) FISCAL 2023 FISCAL 2022 % CHANGE% CHANGE 
EXCLUDING 
CURRENCY 
CHANGES FISCAL 2021 % CHANGE% CHANGE 
EXCLUDING 
CURRENCY 
CHANGES
Revenues by:
Footwear $ 8,260 $ 7,388  12 %  25 % $ 6,970  6 %  9 %
A