Construire un système RAG (Retrieval Augmented Generation) avec PostgreSQL + pgvector + Claude/OpenAI en 2026.
Voir notre guide extensions PG.
Architecture RAG
- Indexation : documents → chunks → embeddings → pgvector
- Query : question utilisateur → embedding → similarity search → top K chunks
- Génération : Claude/GPT-4 reçoit la question + chunks → réponse contextualisée
Schéma SQL
CREATE EXTENSION IF NOT EXISTS vector;
CREATE TABLE documents (
id serial PRIMARY KEY,
source varchar(255),
title text,
created_at timestamp DEFAULT now()
);
CREATE TABLE chunks (
id serial PRIMARY KEY,
document_id integer REFERENCES documents(id) ON DELETE CASCADE,
content text NOT NULL,
embedding vector(1536) NOT NULL,
metadata jsonb,
created_at timestamp DEFAULT now()
);
-- Index HNSW pour recherche rapide
CREATE INDEX chunks_embedding_idx ON chunks
USING hnsw (embedding vector_cosine_ops);
Indexation (Node.js)
import OpenAI from "openai";
import { db } from "./db";
const openai = new OpenAI();
async function chunkText(text: string, maxLen = 1000): Promise<string[]> {
const sentences = text.split(/(?<=[.!?])\s+/);
const chunks: string[] = [];
let current = "";
for (const s of sentences) {
if ((current + s).length > maxLen) {
chunks.push(current);
current = s;
} else {
current += " " + s;
}
}
if (current) chunks.push(current);
return chunks;
}
async function indexDocument(source: string, title: string, text: string) {
const [doc] = await db.insert(documents).values({source, title}).returning();
const chunks = await chunkText(text);
const embeddings = await openai.embeddings.create({
model: "text-embedding-3-small",
input: chunks,
});
for (let i = 0; i < chunks.length; i++) {
await db.insert(chunksTable).values({
document_id: doc.id,
content: chunks[i],
embedding: embeddings.data[i].embedding,
});
}
}
Query RAG
import Anthropic from "@anthropic-ai/sdk";
const claude = new Anthropic();
async function ragQuery(question: string): Promise<string> {
// 1. Embed la question
const qEmbed = await openai.embeddings.create({
model: "text-embedding-3-small",
input: question,
});
// 2. Top 5 chunks similaires
const results = await db.execute(sql`
SELECT content, document_id
FROM chunks
ORDER BY embedding <=> ${qEmbed.data[0].embedding}::vector
LIMIT 5
`);
const context = results.map(r => r.content).join("\n\n---\n\n");
// 3. Génération avec contexte
const response = await claude.messages.create({
model: "claude-sonnet-4-6",
max_tokens: 1024,
messages: [{
role: "user",
content: `Contexte :\n${context}\n\nQuestion : ${question}\n\nRéponds uniquement avec les informations du contexte. En français.`,
}],
});
return response.content[0].text;
}
Cas d’usage Afrique
- FAQ intelligente sur catalogue produits PME
- Assistant comptable qui répond depuis vos manuels comptables
- Support client basé sur historique tickets
- Recherche dans documents juridiques BCEAO/UEMOA