DuckDB Cheatsheet

One-page DuckDB cheatsheet — installation, CSV/JSON, SQL, performance, Python, and more

🦆 DuckDB Cheatsheet

One-page quick reference — Installation, data import/export, SQL queries, JSON, performance, Python integration, and common patterns.


1️⃣ Installation

MethodCommand
Homebrew (macOS)brew install duckdb
pip (Python)pip install duckdb
Dockerdocker run --rm -it duckdb/duckdb
CLI (Linux/macOS binary)curl -fsSL https://install.duckdb.org | sh
-- Verify installation
SELECT version();
-- ┌──────────────┐
-- │  version()   │
-- ├──────────────┤
-- │ v1.3.0       │
-- └──────────────┘

2️⃣ CSV & Parquet Import/Export

-- Read CSV (auto-detect schema)
SELECT * FROM read_csv_auto('data.csv');

-- Read multiple CSV files at once
SELECT * FROM read_csv_auto('data/*.csv', header=true, delim=',');

-- Export to CSV
COPY (SELECT * FROM users WHERE active = true) TO 'active_users.csv'
  WITH (HEADER, DELIMITER ',');

-- Read Parquet
SELECT * FROM read_parquet('sales.parquet');

-- Export to Parquet (compressed, columnar)
COPY orders TO 'orders.parquet' (FORMAT PARQUET);

-- Combine CSV + Parquet in a single query
SELECT * FROM read_csv_auto('recent.csv')
UNION ALL
SELECT * FROM read_parquet('archive.parquet');

3️⃣ SQL Queries

SELECT & WHERE

SELECT name, salary, department
FROM employees
WHERE salary > 80000
  AND department IN ('Engineering', 'Product')
ORDER BY salary DESC
LIMIT 10;

GROUP BY with HAVING

SELECT department, COUNT(*) AS headcount, ROUND(AVG(salary), 0) AS avg_sal
FROM employees
GROUP BY department
HAVING COUNT(*) > 5
ORDER BY avg_sal DESC;

Window Functions

SELECT name, department, salary,
  RANK()       OVER (PARTITION BY department ORDER BY salary DESC) AS rank,
  DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dense_rank,
  LAG(salary)  OVER (PARTITION BY department ORDER BY salary)      AS prev_salary,
  AVG(salary)  OVER (PARTITION BY department)                      AS dept_avg
FROM employees;

JOIN Types

-- INNER / LEFT / RIGHT / FULL OUTER / CROSS / ANTI / SEMI
SELECT e.name, d.name AS dept
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.id;

-- ANTI JOIN: employees without a department
SELECT * FROM employees e
ANTI JOIN departments d ON e.dept_id = d.id;

-- SEMI JOIN: employees whose dept is in list
SELECT * FROM employees e
SEMI JOIN departments d ON e.dept_id = d.id;

4️⃣ Performance Tips

-- 🔍 Explain + Analyze to see query plan & timing
EXPLAIN ANALYZE
SELECT department, SUM(salary) FROM employees GROUP BY department;

-- 🧊 Materialized CTEs (avoid recomputation)
WITH high_earners AS MATERIALIZED (
  SELECT * FROM employees WHERE salary > 100000
)
SELECT department, COUNT(*) FROM high_earners GROUP BY department;

-- 📊 Create indexes on filtered/joined columns
CREATE INDEX idx_employees_dept ON employees(dept_id);
CREATE INDEX idx_employees_salary ON employees(salary);

-- 🚀 Use ART index for point lookups
SET enable_progress_bar = false;
SELECT * FROM employees WHERE id = 42;  -- uses ART index

-- 🧠 Sample for quick exploration on large datasets
SELECT * FROM huge_table USING SAMPLE 1%;

5️⃣ JSON Processing

-- Read JSON files directly
SELECT * FROM read_json_auto('logs.json');

-- Parse inline JSON
SELECT json_extract('{"name": "Alice", "age": 30}', '$.name');
-- "Alice"

-- Extract multiple fields with json_transform
SELECT json_transform(
  '{"name":"Alice","address":{"city":"NYC"}}',
  '{"name":"VARCHAR","city":"$.address.city"}'
);

-- VARIANT type: store mixed-type JSON
CREATE TABLE logs AS
SELECT '{"event":"click","userId":42}'::VARIANT AS data
UNION ALL
SELECT '{"event":"pageview","duration":2.5}'::VARIANT;

-- Query VARIANT column with nested access
SELECT data['event']::VARCHAR AS event_type FROM logs;

-- JSON array → rows with UNNEST
SELECT unnest(json_transform(
  '[{"x":1},{"x":2}]',
  '["VARCHAR"]'
)) AS items;

6️⃣ Python Integration

import duckdb
import pandas as pd

# In-memory database
con = duckdb.connect()

# Query pandas DataFrames directly
df = pd.DataFrame({"id": [1, 2, 3], "value": [10, 20, 30]})
result = con.execute("SELECT * FROM df WHERE value > 15").fetchdf()
print(result)
#    id  value
# 0   2     20
# 1   3     30

# Register a DataFrame as a named view
con.register("my_view", df)
con.execute("SELECT AVG(value) FROM my_view").fetchone()
# (20.0,)

# Create table from DataFrame
con.execute("CREATE TABLE my_table AS SELECT * FROM df")

# Efficient bulk insert from Python objects
con.executemany(
    "INSERT INTO my_table VALUES (?, ?)",
    [(4, 40), (5, 50), (6, 60)]
)

# Write query results to CSV/Parquet from Python
con.execute("COPY (SELECT * FROM df) TO 'output.parquet' (FORMAT PARQUET)")

7️⃣ Common Patterns

PIVOT / UNPIVOT

-- PIVOT: rows → columns
PIVOT sales ON quarter USING SUM(amount)
  GROUP BY product;

-- UNPIVOT: columns → rows
UNPivot monthly_sales
  ON jan, feb, mar
  INTO NAME month VALUE amount;

LIST & STRUCT

-- Aggregate into a list
SELECT department, LIST(name ORDER BY name) AS employees
FROM employees GROUP BY department;

-- Build a struct
SELECT {'name': name, 'salary': salary, 'level': 'senior'} AS profile
FROM employees WHERE salary > 120000;

UNNEST (array → rows)

-- Flatten a list column
SELECT department, unnest(projects) AS project
FROM employee_projects;

-- Generate a date series
SELECT unnest(generate_series(DATE '2024-01-01', DATE '2024-01-10', INTERVAL 1 DAY)) AS dt;

Friendly Quantities

SELECT
  total_sales,
  total_sales // 1e6 AS sales_millions,
  ROUND(total_sales / 1e6, 2) AS sales_millions_rounded
FROM quarterly_report;

8️⃣ 💰 Monetization Ideas

Turn DuckDB skills into income — check out our detailed guides:

PostTopic
DuckDB as new jqReplace jq for JSON processing in data pipelines
DuckDB Online ShellBuild a web-based SQL service
DuckDB x Delta LakePower analytics with lakehouse architecture
DuckDB + DeepSeek / SmallPondDistributed data processing
DuckDB Cross-Database JoinsMulti-source data federation
DuckDB Spatial / GeometryLocation analytics with built-in spatial
DuckLake v1 IntroData lakehouse on DuckDB

📬 Subscribe to Olap Studio

Get the latest DuckDB tips, benchmarks, and monetization strategies delivered to your inbox.

👉 Subscribe via Email — or visit our Contact page.


Last updated: 2026-05-11 · DuckDB v1.3

📺 Watch video tutorials → Olap Studio YouTube

Subscribe for more DuckDB & AI automation tutorials

Built with Hugo
Theme Stack designed by Jimmy

⚠️ This site is an independent community project, not affiliated with, endorsed by, or sponsored by the DuckDB Foundation or official DuckDB project.

"DuckDB" is a registered trademark of the DuckDB Foundation. This site uses the name solely for factual description purposes.

All content is for educational and community promotion purposes only and does not constitute any commercial service.