RAG combines retrieval systems with generative AI to create applications that provide accurate, source-based responses using your own data.
What is RAG?
# Traditional LLM - Limited knowledge
response = llm("What is our company's refund policy?")
# May hallucinate or give generic answer
# RAG Approach:
# 1. Retrieve relevant documents from your knowledge base
# 2. Include them in the prompt
# 3. LLM generates accurate answer based on your data
```
Building a Basic RAG System
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
# Step 1: Load documents
loader = DirectoryLoader('./docs', glob="**/*.txt")
documents = loader.load()
# Step 2: Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)
# Step 3: Create embeddings and vector store
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(chunks, embeddings)
# Step 4: Create retrieval chain
qa_chain = RetrievalQA.from_chain_type(
llm=OpenAI(),
chain_type="stuff",
retriever=vectorstore.as_retriever()
)
# Step 5: Ask questions
result = qa_chain.run("What is the company's remote work policy?")
print(result)
```
Advanced RAG with Citations
from langchain.chains import RetrievalQAWithSourcesChain
# Create chain that returns sources
qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
llm=OpenAI(temperature=0),
chain_type="stuff",
retriever=vectorstore.as_retriever()
)
# Get answer with sources
result = qa_with_sources({"question": "What are the benefits?"})
print(f"Answer: {result['answer']}")
print(f"Sources: {result['sources']}")
```
Custom Retrieval Logic
# Retrieve more documents for complex questions
retriever = vectorstore.as_retriever(
search_type="similarity",
search_kwargs={"k": 5} # Return top 5 chunks
)
# MMR (Maximum Marginal Relevance) for diversity
retriever = vectorstore.as_retriever(
search_type="mmr",
search_kwargs={"k": 5, "fetch_k": 20}
)
```
Production RAG System
class ProductionRAG:
def __init__(self, docs_path):
self.vectorstore = self.build_vectorstore(docs_path)
self.qa_chain = self.create_qa_chain()
def build_vectorstore(self, docs_path):
loader = DirectoryLoader(docs_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
return Chroma.from_documents(
chunks,
embeddings,
persist_directory="./chroma_db"
)
def create_qa_chain(self):
return RetrievalQAWithSourcesChain.from_chain_type(
llm=OpenAI(temperature=0),
chain_type="stuff",
retriever=self.vectorstore.as_retriever(
search_kwargs={"k": 3}
)
)
def ask(self, question):
return self.qa_chain({"question": question})
# Use it
rag = ProductionRAG("./company_docs")
response = rag.ask("What is the vacation policy?")
```
RAG enables AI to provide accurate answers using your specific data. Essential for business applications!