from langchain_core.documents import Document
7 Vector Store and Retriver (Basic)
From: vector store and retriver
7.1 Documents
= [
documents
Document(="Dogs are great companions, known for their loyalty and friendliness.",
page_content={"source": "mammal-pets-doc"},
metadata
),
Document(="Cats are independent pets that often enjoy their own space.",
page_content={"source": "mammal-pets-doc"},
metadata
),
Document(="Goldfish are popular pets for beginners, requiring relatively simple care.",
page_content={"source": "fish-pets-doc"},
metadata
),
Document(="Parrots are intelligent birds capable of mimicking human speech.",
page_content={"source": "bird-pets-doc"},
metadata
),
Document(="Rabbits are social animals that need plenty of space to hop around.",
page_content={"source": "mammal-pets-doc"},
metadata
), ]
7.2 Vector Store
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
= Chroma.from_documents(
vectorstore
documents,=OpenAIEmbeddings(),
embedding )
type(vectorstore)
langchain_chroma.vectorstores.Chroma
7.2.1 Similarity Search
- Synchronously and asynchronously;
- By string query and by vector;
- With and without returning similarity scores;
- By similarity
"cat") vectorstore.similarity_search(
[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Rabbits are social animals that need plenty of space to hop around.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Parrots are intelligent birds capable of mimicking human speech.', metadata={'source': 'bird-pets-doc'})]
Async
await vectorstore.asimilarity_search("cat")
[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Rabbits are social animals that need plenty of space to hop around.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Parrots are intelligent birds capable of mimicking human speech.', metadata={'source': 'bird-pets-doc'})]
Return scores:
# Note that providers implement different scores; Chroma here
# returns a distance metric that should vary inversely with
# similarity.
"cat") vectorstore.similarity_search_with_score(
[(Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
0.37532728910446167),
(Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
0.4833085536956787),
(Document(page_content='Rabbits are social animals that need plenty of space to hop around.', metadata={'source': 'mammal-pets-doc'}),
0.49588823318481445),
(Document(page_content='Parrots are intelligent birds capable of mimicking human speech.', metadata={'source': 'bird-pets-doc'}),
0.49741730093955994)]
Return documents based on similarity to a embedded query:
= OpenAIEmbeddings().embed_query("cat")
embedding
vectorstore.similarity_search_by_vector(embedding)
[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Rabbits are social animals that need plenty of space to hop around.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Parrots are intelligent birds capable of mimicking human speech.', metadata={'source': 'bird-pets-doc'})]
7.3 Retrievers
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda
= RunnableLambda(vectorstore.similarity_search).bind(k=1) # select top result
retriever
"cat", "shark"]) retriever.batch([
[[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'})],
[Document(page_content='Goldfish are popular pets for beginners, requiring relatively simple care.', metadata={'source': 'fish-pets-doc'})]]
= vectorstore.as_retriever(
retriever ="similarity",
search_type={"k": 1},
search_kwargs
)
"cat", "shark"]) retriever.batch([
[[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'})],
[Document(page_content='Goldfish are popular pets for beginners, requiring relatively simple care.', metadata={'source': 'fish-pets-doc'})]]
7.4 Full Example of Retriver
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
= ChatOpenAI(model="gpt-3.5-turbo-0125")
llm
= """
message Answer this question using the provided context only.
{question}
Context:
{context}
"""
= ChatPromptTemplate.from_messages([("human", message)])
prompt
print(prompt.invoke({"question": "your question",
"context": "your context",
"message": "HI"}).to_string())
Human:
Answer this question using the provided context only.
your question
Context:
your context
= {"context": retriever, "question": RunnablePassthrough()} | prompt | llm rag_chain
= rag_chain.invoke("tell me about cats")
response
print(response.content)
Cats are independent pets that often enjoy their own space.
= rag_chain.invoke("What's special about cats?")
response2 print(response2.content)
Cats are special because they are independent pets that often enjoy their own space.