19-step SQL analysis on 100,000 Brazilian e-commerce orders using Google BigQuery — covering revenue, delivery performance, customer behaviour, and product category insights.
This analysis covers a Brazilian e-commerce dataset with 100,000 orders across 8 relational tables — customers, orders, order items, payments, reviews, products, sellers, and geolocation. The goal: extract actionable business intelligence using only SQL in Google BigQuery.
| Table | Key Columns | Records |
|---|---|---|
| orders | order_id, customer_id, order_status, timestamps | 99,441 |
| customers | customer_id, customer_unique_id, state, city | 99,441 |
| order_items | order_id, product_id, seller_id, price, freight | 112,650+ |
| payments | order_id, payment_type, installments, value | 103,886 |
| order_reviews | order_id, review_score, comment | 99,224 |
| products | product_id, category, dimensions, weight | 32,951 |
| sellers | seller_id, zip, city, state | 3,095 |
| geolocation | zip_code_prefix, lat, lng, city, state | 1M+ |
-- Customer spend segmentation using CTEs WITH customer_spend AS ( SELECT c.customer_unique_id, ROUND(SUM(p.payment_value), 2) AS total_spent, COUNT(DISTINCT o.order_id) AS total_orders, ROUND(SUM(p.payment_value) / COUNT(DISTINCT o.order_id), 2) AS avg_spend FROM orders o JOIN customers c ON o.customer_id = c.customer_id JOIN payments p ON o.order_id = p.order_id WHERE o.order_status = 'delivered' GROUP BY c.customer_unique_id ), segmented AS ( SELECT *, CASE WHEN avg_spend <= 100 THEN 'Low Spender' WHEN avg_spend <= 500 THEN 'Medium Spender' ELSE 'High Spender' END AS spend_segment FROM customer_spend ) SELECT spend_segment, COUNT(*) AS customers FROM segmented GROUP BY spend_segment ORDER BY customers DESC; -- Medium: 45,167 (48%) · Low: 44,226 (47%) · High: 3,964 (4%)
-- Year-on-Year revenue growth (Jan–Aug comparison) WITH yearly AS ( SELECT EXTRACT(YEAR FROM o.order_purchase_timestamp) AS year, ROUND(SUM(p.payment_value), 2) AS total_payment FROM orders o JOIN payments p ON o.order_id = p.order_id WHERE o.order_status = 'delivered' AND EXTRACT(MONTH FROM order_purchase_timestamp) BETWEEN 1 AND 8 AND EXTRACT(YEAR FROM order_purchase_timestamp) IN (2017, 2018) GROUP BY year ) SELECT MAX(CASE WHEN year=2017 THEN total_payment END) AS y2017, MAX(CASE WHEN year=2018 THEN total_payment END) AS y2018, ROUND((MAX(CASE WHEN year=2018 ...) - MAX(CASE WHEN year=2017 ...)) / MAX(CASE WHEN year=2017 ...) * 100, 2) AS pct_increase FROM yearly; -- 2017: R$3.47M → 2018: R$8.45M → 143.33% growth