You already have a RAG pipeline using LangGraph and Pinecone. Now we’re turning it into a real backend your frontend can call.
You’ll build:
- Data ingestion (your AI memory)
- FastAPI backend
- React-ready endpoint
- Full local test setup
Project Structure
│── api.py
│── lgdemo.py
│── ingest_docs.py
│── requirements.txt
│── .env
│── data/
│── .gitignore
Step 1 – Set Up VS Code + Virtual Environment
cd rag-project
code .
python -m venv venv
Activate Virtual Environment
PowerShell
CMD
Mac/Linux/Git Bash
Common Errors (Fix Fast)
PowerShell error:
Fix:
Then:
Do NOT press anything — wait until it fully completes. It will take a couple of minutes to complete.
Python not recognized
- Reinstall Python
- Check “Add to PATH”
Confirm It Worked
Step 2 – Initialize Git + GitHub
Create .gitignore:
.env
__pycache__/
git commit -m “Initial RAG FastAPI setup”
Create repo on GitHub then:
git branch -M main
git push -u origin main
Step 3 – Install Dependencies + requirements.txt
Install:
Generate requirements file:
👉 This allows you (or your server) to reinstall everything later:
Step 4 – Environment Variables
PINECONE_API_KEY=your_key
PINECONE_INDEX_NAME=rag-index
PINECONE_ENV=us-east-1
Step 5 – Add Your Data (TXT + PDF)
Create data folder
Create TXT file (Notepad)
- Right-click → New → Text Document
- Add content:
- Save as:
Create PDF (from Notepad)
- Open Notepad
- Add content
- File → Print
- Select Microsoft Print to PDF
- Save as:
Move files into /data
What This Does
This becomes your AI’s knowledge base
Step 6 – Update ingest_docs.py (Make It API-Friendly)
File: ingest_docs.py
<?php
# Loads documents → converts to embeddings → stores in Pinecone
import os
import uuid
from dotenv import load_dotenv
from openai import OpenAI
from pinecone import Pinecone
from pypdf import PdfReader
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index(os.getenv("PINECONE_INDEX_NAME"))
DATA_FOLDER = "data"
def load_txt(path):
with open(path, "r", encoding="utf-8") as f:
return f.read()
def load_pdf(path):
reader = PdfReader(path)
text = ""
for page in reader.pages:
t = page.extract_text()
if t:
text += t + "\n"
return text
def chunk_text(text, chunk_size=500, overlap=100):
chunks, start = [], 0
while start < len(text):
chunk = text[start:start + chunk_size].strip()
if chunk:
chunks.append(chunk)
start += chunk_size - overlap
return chunks
def embed_batch(texts):
res = client.embeddings.create(
model="text-embedding-3-small",
input=texts
)
return [d.embedding for d in res.data]
def process_file(file_path):
if file_path.endswith(".txt"):
text = load_txt(file_path)
elif file_path.endswith(".pdf"):
text = load_pdf(file_path)
else:
return
if not text.strip():
return
chunks = chunk_text(text)
embeddings = embed_batch(chunks)
vectors = []
for i, chunk in enumerate(chunks):
vectors.append({
"id": str(uuid.uuid4()),
"values": embeddings[i],
"metadata": {"text": chunk}
})
index.upsert(vectors)
print(f"Inserted {len(vectors)} chunks")
def ingest_all():
for f in os.listdir(DATA_FOLDER):
process_file(os.path.join(DATA_FOLDER, f))
if __name__ == "__main__":
ingest_all()
Run Ingestion
👉 Loads your data into Pinecone
Step 7 – Update lgdemo.py (Make It API-Friendly)
✅ What changed:
- Removed
input() - Accept query as parameter
- Return answer instead of printing
File: lgdemo.py
<?php
# ============================================
# RAG PIPELINE (API VERSION)
# ============================================
import os
from typing import TypedDict, List
from dotenv import load_dotenv
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec
from langgraph.graph import StateGraph, END
load_dotenv()
class GraphState(TypedDict):
query: str
embedding: List[float]
results: list
answer: str
class EmbeddingService:
def __init__(self, client):
self.client = client
def embed(self, text):
res = self.client.embeddings.create(
model="text-embedding-3-small",
input=text
)
return res.data[0].embedding
class PineconeService:
def __init__(self):
self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
self.index_name = os.getenv("PINECONE_INDEX_NAME")
existing = [i.name for i in self.pc.list_indexes()]
if self.index_name not in existing:
self.pc.create_index(
name=self.index_name,
dimension=1536,
metric="cosine",
spec=ServerlessSpec(
cloud="aws",
region=os.getenv("PINECONE_ENV")
)
)
self.index = self.pc.Index(self.index_name)
def query(self, vector):
return self.index.query(
vector=vector,
top_k=3,
include_metadata=True
).matches
class LLMService:
def __init__(self):
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def generate(self, query, context):
prompt = f"""
Use the context below to answer the question.
Context:
{context}
Question:
{query}
"""
res = self.client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}]
)
return res.choices[0].message.content
class LangGraphService:
def __init__(self):
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
self.embedder = EmbeddingService(client)
self.vector = PineconeService()
self.llm = LLMService()
self.graph = StateGraph(GraphState)
self._build()
self.app = self.graph.compile()
def embed_query(self, state):
return {"embedding": self.embedder.embed(state["query"])}
def vector_search(self, state):
return {"results": self.vector.query(state["embedding"])}
def generate_answer(self, state):
context = "\n".join([r.metadata["text"] for r in state["results"]])
answer = self.llm.generate(state["query"], context)
return {"answer": answer}
def _build(self):
self.graph.add_node("embed", self.embed_query)
self.graph.add_node("search", self.vector_search)
self.graph.add_node("answer", self.generate_answer)
self.graph.set_entry_point("embed")
self.graph.add_edge("embed", "search")
self.graph.add_edge("search", "answer")
self.graph.add_edge("answer", END)
def ask(self, query: str):
result = self.app.invoke({"query": query})
return result["answer"]
Step 8 – Create FastAPI Backend
File: api.py
<?php
# ============================================
# FASTAPI LAYER FOR RAG SYSTEM
# ============================================
from fastapi.middleware.cors import CORSMiddleware
from fastapi import FastAPI
from pydantic import BaseModel
from lgdemo import LangGraphService
# Create FastAPI app
app = FastAPI()
# Initialize RAG service (runs once)
rag = LangGraphService()
# Request format
class QueryRequest(BaseModel):
question: str
# Allow React to connect
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # allow React
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
#use this for production with specific origins
#allow_origins=[
# "http://localhost:3000",
# "https://yourdomain.com"
#]
# Root test endpoint
@app.get("/")
def home():
return {"message": "RAG API is running"}
# Main endpoint
@app.post("/ask")
def ask_question(request: QueryRequest):
answer = rag.ask(request.question)
return {
"question": request.question,
"answer": answer
}
Step 9 – Run the API Server
👉 Open in browser:
👉 API docs (Swagger UI):
Step 10 – Test API
Example POST request
“question”: “What is LangGraph?”
}
Example response
“question”: “What is LangGraph?”,
“answer”: “LangGraph is a framework…”
}
Step 11 – Install React (If You Don’t Have It Yet)
1. Install Node.js (Required for React)
React runs on Node, so install it first:
👉 Download from: Node.js
Install Steps (Windows)
- Download the LTS version
- Run installer
- ✅ Check: “Add to PATH”
- Click Next → Finish
2. Verify Installation
Open a new terminal in VS Code and run:
npm -v
👉 You should see version numbers like:
10.x.x
If you see “not recognized” → restart VS Code or your computer.
3. Create React App (Frontend)
Inside your project root (rag-project):
What this does:
- Installs React
- Creates full frontend project
- Sets up everything automatically
4. Go into the React Folder
5. Install Dependencies (just in case)
6. Start React App
👉 This will:
- Launch browser automatically
- Open:
7. Confirm It Works
You should see:
👉 React logo spinning
👉 “Edit src/App.js”
9. Folder You Just Created
│── src/
│── public/
│── package.json
Install Required Package (Markdown)
Run this inside /frontend:
Step 12 – Replace App.js with a custom React script to create a Chatbot UI:
File: /download/src/App.js
frontend/src/App.js
import { useState, useEffect, useRef } from "react";
import ReactMarkdown from "react-markdown";function App() {
const [messages, setMessages] = useState(() => {
const saved = localStorage.getItem("chat_history");
return saved ? JSON.parse(saved) : [];
});
const [input, setInput] = useState("");
const [loading, setLoading] = useState(false);
const bottomRef = useRef(null);
// Save chat history
useEffect(() => {
localStorage.setItem("chat_history", JSON.stringify(messages));
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
}, [messages]);
// Fake streaming effect
function streamText(text, callback) {
let index = 0;
let current = "";
const interval = setInterval(() => {
current += text[index];
index++;
callback(current);
if (index >= text.length) {
clearInterval(interval);
}
}, 15); // speed (lower = faster)
}
async function sendMessage() {
if (!input.trim() || loading) return;
const userMessage = { role: "user", content: input };
setMessages((prev) => [...prev, userMessage]);
const question = input;
setInput("");
setLoading(true);
try {
const res = await fetch("http://127.0.0.1:8000/ask", {
method: "POST",
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({ question })
});
const data = await res.json();
// Add empty assistant message first
setMessages((prev) => [
...prev,
{ role: "assistant", content: "" }
]);
// Stream into last message
streamText(data.answer, (partial) => {
setMessages((prev) => {
const updated = [...prev];
updated[updated.length - 1].content = partial;
return updated;
});
});
} catch (err) {
setMessages((prev) => [
...prev,
{ role: "assistant", content: "Error connecting to API." }
]);
}
setLoading(false);
}
function handleKeyPress(e) {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
sendMessage();
}
}
function clearChat() {
localStorage.removeItem("chat_history");
setMessages([]);
}
return (
<div style={styles.container}>
<div style={styles.header}>
<h1>RAG AI Chat</h1>
<button onClick={clearChat} style={styles.clearBtn}>
Clear
</button>
</div>
<div style={styles.chatBox}>
{messages.map((msg, index) => (
<div
key={index}
style={{
...styles.message,
alignSelf:
msg.role === "user" ? "flex-end" : "flex-start",
background:
msg.role === "user" ? "#6366f1" : "#1f2937",
color: "#fff"
}}
>
<ReactMarkdown>{msg.content}</ReactMarkdown>
</div>
))}
{loading && <div style={styles.loading}>Thinking...</div>}
<div ref={bottomRef} />
</div>
<div style={styles.inputArea}>
<textarea
value={input}
onChange={(e) => setInput(e.target.value)}
onKeyDown={handleKeyPress}
placeholder="Ask something..."
style={styles.input}
rows={2}
/>
<button
onClick={sendMessage}
style={styles.button}
disabled={loading}
>
{loading ? "..." : "Send"}
</button>
</div>
</div>
);
}
const styles = {
container: {
maxWidth: "800px",
margin: "0 auto",
padding: "20px",
fontFamily: "Arial",
color: "#fff",
background: "#111827",
minHeight: "100vh"
},
header: {
display: "flex",
justifyContent: "space-between",
alignItems: "center"
},
clearBtn: {
background: "#ef4444",
color: "#fff",
border: "none",
padding: "6px 10px",
borderRadius: "5px",
cursor: "pointer"
},
chatBox: {
display: "flex",
flexDirection: "column",
height: "500px",
overflowY: "auto",
border: "1px solid #374151",
padding: "10px",
borderRadius: "10px",
marginBottom: "10px",
background: "#111827"
},
message: {
padding: "10px",
margin: "5px 0",
borderRadius: "10px",
maxWidth: "75%",
lineHeight: "1.4"
},
inputArea: {
display: "flex",
gap: "10px"
},
input: {
flex: 1,
padding: "10px",
borderRadius: "5px",
border: "1px solid #374151",
background: "#1f2937",
color: "#fff"
},
button: {
padding: "10px 15px",
background: "#6366f1",
color: "#fff",
border: "none",
borderRadius: "5px",
cursor: "pointer"
},
loading: {
fontStyle: "italic",
color: "#9ca3af"
}
};
export default App;
Run the script: npm start
Tip: Make sure to ask a question that was put on one of the docs digested by Pinecone to confirm the RAG part works!

