Lab 022: RAG Pipeline with GitHub Models + pgvectorΒΆ
What You'll LearnΒΆ
- Spin up pgvector (PostgreSQL + vectors) locally via Docker
- Generate embeddings with GitHub Models (
text-embedding-3-small) β free - Ingest documents: chunk, embed, store in pgvector
- Query with semantic similarity search (
<=>cosine operator) - Build a full RAG answer pipeline (retrieve β augment β generate)
IntroductionΒΆ
In Lab 006 you learned RAG theory. Here you build the real thing β a working RAG system using only free tools: GitHub Models for embeddings + generation, and pgvector running locally in Docker.
Pre-built dataset included
This lab uses the OutdoorGear sample dataset β 25 products, FAQs, and company policies, ready to ingest.
π₯ data/products.csv Β· data/knowledge-base.json
PrerequisitesΒΆ
- Docker Desktop β free: https://www.docker.com/products/docker-desktop
- Python 3.11+
GITHUB_TOKENset (from Lab 013)
Lab ExerciseΒΆ
Step 1: Start pgvector with DockerΒΆ
docker run -d \
--name pgvector-rag \
-e POSTGRES_PASSWORD=ragpass \
-e POSTGRES_DB=ragdb \
-p 5432:5432 \
pgvector/pgvector:pg16
Verify it's running:
Step 2: Create the database schemaΒΆ
# setup_db.py
import psycopg2
conn = psycopg2.connect(
host="localhost", port=5432,
dbname="ragdb", user="postgres", password="ragpass"
)
cur = conn.cursor()
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
cur.execute("""
CREATE TABLE IF NOT EXISTS documents (
id SERIAL PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
embedding vector(1536), -- text-embedding-3-small dimension
source TEXT
);
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS documents_embedding_idx
ON documents USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 10);
""")
conn.commit()
cur.close()
conn.close()
print("Database ready.")
Step 3: Ingest documents from the sample datasetΒΆ
The sample dataset lives in the repo. Download it (or use the URL directly):
curl -O https://raw.githubusercontent.com/lcarli/AI-LearningHub/main/data/products.csv
curl -O https://raw.githubusercontent.com/lcarli/AI-LearningHub/main/data/knowledge-base.json
# ingest.py
import os, csv, json, psycopg2
from openai import OpenAI
client = OpenAI(
base_url="https://models.inference.ai.azure.com",
api_key=os.environ["GITHUB_TOKEN"],
)
def build_documents() -> list[dict]:
"""Load products.csv and knowledge-base.json into a flat list of documents."""
docs = []
# --- Products from CSV ---
with open("products.csv") as f:
for p in csv.DictReader(f):
docs.append({
"title": f"{p['name']} β Product Info",
"content": (
f"{p['name']} ({p['category']}/{p['subcategory']}). "
f"SKU: {p['sku']}. Price: ${p['price']}. "
f"In stock: {p['in_stock']}. Weight: {p['weight_kg']}kg. "
f"Rating: {p['rating']}/5. {p['description']}"
),
"source": "product-catalog",
})
# --- Policies, FAQs, and guides from JSON ---
with open("knowledge-base.json") as f:
kb = json.load(f)
for section in kb["sections"].values():
docs.append({
"title": section["title"],
"content": section["content"],
"source": "policies",
})
for faq in kb["faqs"]:
docs.append({
"title": f"FAQ: {faq['question']}",
"content": f"Q: {faq['question']}\nA: {faq['answer']}",
"source": "faq",
})
for guide in kb["product_guides"]:
docs.append({
"title": guide["title"],
"content": guide["content"],
"source": "guide",
})
return docs
def get_embedding(text: str) -> list[float]:
response = client.embeddings.create(
model="text-embedding-3-small",
input=text,
)
return response.data[0].embedding
documents = build_documents()
print(f"Prepared {len(documents)} documents to ingest")
conn = psycopg2.connect(
host="localhost", port=5432,
dbname="ragdb", user="postgres", password="ragpass"
)
cur = conn.cursor()
for doc in documents:
print(f" Embedding: {doc['title'][:60]}")
embedding = get_embedding(doc["content"])
cur.execute(
"INSERT INTO documents (title, content, embedding, source) VALUES (%s, %s, %s, %s)",
(doc["title"], doc["content"], embedding, doc["source"])
)
conn.commit()
cur.close()
conn.close()
print(f"\nβ
Ingested {len(documents)} documents.")
Step 4: Query with semantic searchΒΆ
# search.py
import os, psycopg2
from openai import OpenAI
client = OpenAI(
base_url="https://models.inference.ai.azure.com",
api_key=os.environ["GITHUB_TOKEN"],
)
def search(query: str, top_k: int = 3) -> list[dict]:
# Embed the query
response = client.embeddings.create(
model="text-embedding-3-small",
input=query,
)
query_embedding = response.data[0].embedding
# Cosine similarity search in pgvector
conn = psycopg2.connect(
host="localhost", port=5432,
dbname="ragdb", user="postgres", password="ragpass"
)
cur = conn.cursor()
cur.execute("""
SELECT title, content, source,
1 - (embedding <=> %s::vector) AS similarity
FROM documents
ORDER BY embedding <=> %s::vector
LIMIT %s
""", (query_embedding, query_embedding, top_k))
results = [
{"title": row[0], "content": row[1], "source": row[2], "similarity": row[3]}
for row in cur.fetchall()
]
cur.close()
conn.close()
return results
# Test
results = search("how waterproof are the boots?")
for r in results:
print(f"[{r['similarity']:.3f}] {r['title']}")
Step 5: The RAG answer pipelineΒΆ
# rag.py
import os
from openai import OpenAI
from search import search
client = OpenAI(
base_url="https://models.inference.ai.azure.com",
api_key=os.environ["GITHUB_TOKEN"],
)
def answer(question: str) -> str:
# 1. Retrieve
docs = search(question, top_k=3)
# 2. Augment β build context
context = "\n\n".join([
f"**{d['title']}** (similarity: {d['similarity']:.2f})\n{d['content']}"
for d in docs
])
# 3. Generate
response = client.chat.completions.create(
model="gpt-4o-mini",
temperature=0,
messages=[
{
"role": "system",
"content": (
"You are a helpful outdoor gear assistant. "
"Answer questions using ONLY the provided context. "
"If the context doesn't contain the answer, say so honestly. "
"Always cite which document your answer comes from."
)
},
{
"role": "user",
"content": f"Context:\n{context}\n\nQuestion: {question}"
}
],
)
return response.choices[0].message.content
# Test the full pipeline
questions = [
"Can I return hiking boots I've already worn?",
"How much does express shipping cost?",
"What certification does the harness have?",
"What are the tent dimensions?",
]
for q in questions:
print(f"\nβ {q}")
print(f"π¬ {answer(q)}")
print("β" * 60)
Understanding the ResultsΒΆ
Similarity scores
- > 0.85 β very strong match (the document directly answers the question)
- 0.70β0.85 β related (might be relevant)
- < 0.70 β weak match (probably not helpful, consider filtering)
Add a threshold filter to avoid using low-confidence documents:
docs = [d for d in search(question, top_k=5) if d["similarity"] > 0.75]
if not docs:
return "I don't have information about that in my knowledge base."
CleanupΒΆ
Next StepsΒΆ
- Agentic RAG (query rewriting, multi-hop): β Lab 026 β Agentic RAG Pattern
- RAG with Semantic Kernel: β Lab 023 β SK Plugins, Memory & Planners
- Production pgvector on Azure: β Lab 031 β pgvector on Azure