Ce que vous saurez faire à la fin
- Utiliser les Window Functions pour analyses comparatives
- Écrire des CTE (WITH) pour lisibilité et performance
- Piloter des agrégations avancées (ROLLUP, CUBE, GROUPING SETS)
- Construire des requêtes RFM et pivots manuels
- Optimiser avec EXPLAIN ANALYZE et les index adaptés
Durée : 3 heures. Pré-requis : PostgreSQL ou équivalent, connaissance des JOIN basiques.
Étape 1 — RANK, DENSE_RANK, ROW_NUMBER
SELECT
region,
commercial,
ca,
RANK() OVER (PARTITION BY region ORDER BY ca DESC) AS rang,
DENSE_RANK() OVER (PARTITION BY region ORDER BY ca DESC) AS rang_dense,
ROW_NUMBER() OVER (PARTITION BY region ORDER BY ca DESC) AS ligne
FROM ventes_agregees;
- RANK : laisse des trous en cas d’égalité (1, 2, 2, 4).
- DENSE_RANK : pas de trou (1, 2, 2, 3).
- ROW_NUMBER : unique, arbitraire en cas d’égalité (1, 2, 3, 4).
Étape 2 — LAG, LEAD pour comparaisons temporelles
SELECT
mois,
ca,
LAG(ca) OVER (ORDER BY mois) AS ca_m_1,
LEAD(ca) OVER (ORDER BY mois) AS ca_m_p1,
ca - LAG(ca) OVER (ORDER BY mois) AS evolution,
ROUND(100.0 * (ca - LAG(ca) OVER (ORDER BY mois))
/ NULLIF(LAG(ca) OVER (ORDER BY mois), 0), 1) AS evol_pct
FROM ventes_mois
ORDER BY mois;
Étape 3 — Moyenne mobile et cumul
SELECT
mois,
ca,
SUM(ca) OVER (ORDER BY mois) AS cumul_annuel,
AVG(ca) OVER (ORDER BY mois
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moy_mobile_3m,
AVG(ca) OVER (ORDER BY mois
ROWS BETWEEN 11 PRECEDING AND CURRENT ROW) AS moy_mobile_12m
FROM ventes_mois;
Étape 4 — CTE (WITH)
WITH top_clients AS (
SELECT client_id, SUM(montant) AS total
FROM factures
WHERE annee = 2026 AND statut = 'payee'
GROUP BY client_id
ORDER BY total DESC
LIMIT 10
),
top_produits AS (
SELECT produit_id, SUM(qte * pu) AS total
FROM lignes_facture
GROUP BY produit_id
ORDER BY total DESC
LIMIT 10
)
SELECT c.nom, tc.total AS ca_client,
(SELECT string_agg(p.nom, ', ')
FROM top_produits tp JOIN produits p ON p.id = tp.produit_id) AS top_produits
FROM top_clients tc
JOIN clients c ON c.id = tc.client_id;
Étape 5 — CTE récursive
-- Organigramme: tous les subordonnés d'un manager
WITH RECURSIVE equipe AS (
SELECT id, nom, manager_id, 1 AS niveau
FROM employes
WHERE id = 42
UNION ALL
SELECT e.id, e.nom, e.manager_id, eq.niveau + 1
FROM employes e
JOIN equipe eq ON e.manager_id = eq.id
)
SELECT repeat(' ', niveau - 1) || nom AS hierarchie
FROM equipe
ORDER BY niveau, nom;
Étape 6 — Pivot manuel
SELECT
region,
SUM(CASE WHEN mois = 1 THEN ca END) AS jan,
SUM(CASE WHEN mois = 2 THEN ca END) AS fev,
SUM(CASE WHEN mois = 3 THEN ca END) AS mar,
SUM(CASE WHEN mois = 4 THEN ca END) AS avr,
SUM(ca) AS total
FROM ventes
WHERE annee = 2026
GROUP BY region
ORDER BY total DESC;
Étape 7 — crosstab (PostgreSQL)
CREATE EXTENSION IF NOT EXISTS tablefunc;
SELECT * FROM crosstab(
'SELECT region, mois, SUM(ca)
FROM ventes WHERE annee=2026
GROUP BY 1,2
ORDER BY 1,2',
'SELECT generate_series(1,12)'
) AS t(region TEXT,
jan NUMERIC, fev NUMERIC, mar NUMERIC, avr NUMERIC,
mai NUMERIC, jun NUMERIC, jul NUMERIC, aou NUMERIC,
sep NUMERIC, oct NUMERIC, nov NUMERIC, dec NUMERIC);
Étape 8 — Segmentation RFM (marketing)
WITH rfm AS (
SELECT
client_id,
(CURRENT_DATE - MAX(date_commande)) AS recence_jours,
COUNT(*) AS frequence,
SUM(montant) AS montant_total
FROM commandes
WHERE date_commande >= CURRENT_DATE - INTERVAL '2 years'
GROUP BY client_id
)
SELECT
client_id,
recence_jours,
frequence,
montant_total,
NTILE(5) OVER (ORDER BY recence_jours DESC) AS r_score,
NTILE(5) OVER (ORDER BY frequence) AS f_score,
NTILE(5) OVER (ORDER BY montant_total) AS m_score,
CASE
WHEN NTILE(5) OVER (ORDER BY recence_jours DESC) >= 4
AND NTILE(5) OVER (ORDER BY frequence) >= 4 THEN 'Champion'
WHEN NTILE(5) OVER (ORDER BY recence_jours DESC) = 1 THEN 'A reactiver'
WHEN NTILE(5) OVER (ORDER BY frequence) = 1 THEN 'Nouveau'
ELSE 'Regulier'
END AS segment
FROM rfm;
Étape 9 — LATERAL JOIN
-- Pour chaque client, les 3 dernières commandes
SELECT c.nom, t.*
FROM clients c
CROSS JOIN LATERAL (
SELECT date_commande, montant
FROM commandes
WHERE client_id = c.id
ORDER BY date_commande DESC
LIMIT 3
) t
ORDER BY c.nom, t.date_commande DESC;
Étape 10 — GROUPING SETS, ROLLUP, CUBE
-- ROLLUP: totaux hiérarchiques
SELECT region, commercial, SUM(ca)
FROM ventes
GROUP BY ROLLUP(region, commercial);
-- Produit: (region,commercial), (region), (grand total)
-- CUBE: toutes les combinaisons
SELECT region, commercial, SUM(ca)
FROM ventes
GROUP BY CUBE(region, commercial);
-- GROUPING SETS: niveaux spécifiques
SELECT region, commercial, SUM(ca)
FROM ventes
GROUP BY GROUPING SETS ((region), (commercial), ());
Étape 11 — JSON/JSONB
SELECT
payload->>'user' AS user,
(payload->>'montant')::NUMERIC AS montant,
payload->'tags' @> '["promo"]' AS a_promo,
jsonb_array_length(payload->'lignes') AS nb_lignes,
payload #>> '{adresse,ville}' AS ville
FROM evenements
WHERE payload @> '{"type":"order"}';
-- Extraire et grouper
SELECT payload->>'categorie' AS cat, COUNT(*)
FROM evenements
GROUP BY 1;
Étape 12 — Window avec PARTITION
-- Pourcentage du total par région
SELECT
region,
commercial,
ca,
ca / SUM(ca) OVER (PARTITION BY region) AS pct_region,
ca / SUM(ca) OVER () AS pct_total
FROM ventes_agregees;
Étape 13 — EXPLAIN ANALYZE
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT c.nom, SUM(f.montant)
FROM clients c
JOIN factures f ON f.client_id = c.id
WHERE f.date_emission >= '2026-01-01'
GROUP BY c.id, c.nom
HAVING SUM(f.montant) > 500000
ORDER BY SUM(f.montant) DESC;
Points à analyser :
- Seq Scan sur grande table : index manquant, ajoutez-en un
- Nested Loop avec beaucoup de lignes : envisagez Hash Join (augmentez work_mem)
- actual time très différent de cost estimé : lancez
ANALYZE table - Bitmap Heap Scan : souvent optimal pour multi-critères
Étape 14 — Index partiels et fonctionnels
-- Index partiel (uniquement sur lignes actives)
CREATE INDEX idx_factures_actives
ON factures(client_id, date_emission DESC)
WHERE statut = 'envoyee';
-- Index fonctionnel (pour recherche insensible à la casse)
CREATE INDEX idx_clients_email_lower
ON clients(LOWER(email));
-- Utilisation
SELECT * FROM clients WHERE LOWER(email) = LOWER('CONTACT@sarl.sn');
Étape 15 — Optimisations avancées
-- Matérialiser un CTE coûteux en table temporaire
CREATE TEMPORARY TABLE tmp_stats AS
SELECT client_id, SUM(montant) AS total
FROM factures
GROUP BY client_id;
CREATE INDEX ON tmp_stats(total);
-- Vue matérialisée (rafraîchie manuellement)
CREATE MATERIALIZED VIEW mv_ca_mensuel AS
SELECT date_trunc('month', date_emission) AS mois, SUM(montant) AS ca
FROM factures
WHERE statut = 'payee'
GROUP BY 1;
CREATE INDEX ON mv_ca_mensuel(mois);
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_ca_mensuel;
Checklist SQL analyste
✓ Window functions pour comparaisons/classements
✓ CTE (WITH) pour lisibilité sur requêtes complexes
✓ NTILE pour segmentation en quantiles
✓ LATERAL JOIN pour top-N par groupe
✓ Pivot manuel avec CASE WHEN pour reporting
✓ Index partiels sur filtres fréquents
✓ EXPLAIN ANALYZE avant push en prod
✓ ANALYZE régulier pour stats à jour
✓ Vues matérialisées pour dashboards lents