from langchain_core.documents import Document11 Vector Store and Retriver (Basic)
From: vector store and retriver
11.1 Documents
documents = [
Document(
page_content="Dogs are great companions, known for their loyalty and friendliness.",
metadata={"source": "mammal-pets-doc"},
),
Document(
page_content="Cats are independent pets that often enjoy their own space.",
metadata={"source": "mammal-pets-doc"},
),
Document(
page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
metadata={"source": "fish-pets-doc"},
),
Document(
page_content="Parrots are intelligent birds capable of mimicking human speech.",
metadata={"source": "bird-pets-doc"},
),
Document(
page_content="Rabbits are social animals that need plenty of space to hop around.",
metadata={"source": "mammal-pets-doc"},
),
]11.2 Vector Store
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
vectorstore = Chroma.from_documents(
documents,
embedding=OpenAIEmbeddings(),
)type(vectorstore)langchain_chroma.vectorstores.Chroma
11.2.1 Similarity Search
- Synchronously and asynchronously;
- By string query and by vector;
- With and without returning similarity scores;
- By similarity
vectorstore.similarity_search("cat")[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Rabbits are social animals that need plenty of space to hop around.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Parrots are intelligent birds capable of mimicking human speech.', metadata={'source': 'bird-pets-doc'})]
Async
await vectorstore.asimilarity_search("cat")[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Rabbits are social animals that need plenty of space to hop around.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Parrots are intelligent birds capable of mimicking human speech.', metadata={'source': 'bird-pets-doc'})]
Return scores:
# Note that providers implement different scores; Chroma here
# returns a distance metric that should vary inversely with
# similarity.
vectorstore.similarity_search_with_score("cat")[(Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
0.37532728910446167),
(Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
0.4833085536956787),
(Document(page_content='Rabbits are social animals that need plenty of space to hop around.', metadata={'source': 'mammal-pets-doc'}),
0.49588823318481445),
(Document(page_content='Parrots are intelligent birds capable of mimicking human speech.', metadata={'source': 'bird-pets-doc'}),
0.49741730093955994)]
Return documents based on similarity to a embedded query:
embedding = OpenAIEmbeddings().embed_query("cat")
vectorstore.similarity_search_by_vector(embedding)[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Rabbits are social animals that need plenty of space to hop around.', metadata={'source': 'mammal-pets-doc'}),
Document(page_content='Parrots are intelligent birds capable of mimicking human speech.', metadata={'source': 'bird-pets-doc'})]
11.3 Retrievers
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda
retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1) # select top result
retriever.batch(["cat", "shark"])[[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'})],
[Document(page_content='Goldfish are popular pets for beginners, requiring relatively simple care.', metadata={'source': 'fish-pets-doc'})]]
retriever = vectorstore.as_retriever(
search_type="similarity",
search_kwargs={"k": 1},
)
retriever.batch(["cat", "shark"])[[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'})],
[Document(page_content='Goldfish are popular pets for beginners, requiring relatively simple care.', metadata={'source': 'fish-pets-doc'})]]
11.4 Full Example of Retriver
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
message = """
Answer this question using the provided context only.
{question}
Context:
{context}
"""
prompt = ChatPromptTemplate.from_messages([("human", message)])
print(prompt.invoke({"question": "your question",
"context": "your context",
"message": "HI"}).to_string())Human:
Answer this question using the provided context only.
your question
Context:
your context
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llmresponse = rag_chain.invoke("tell me about cats")
print(response.content)Cats are independent pets that often enjoy their own space.
response2 = rag_chain.invoke("What's special about cats?")
print(response2.content)Cats are special because they are independent pets that often enjoy their own space.