Featured image of post DuckDB SQL 语法速查指南:从 SELECT 到 PIVOT 的完整参考

DuckDB SQL 语法速查指南:从 SELECT 到 PIVOT 的完整参考

DuckDB SQL 语法完整速查指南,涵盖 SELECT、GROUP BY、窗口函数、CTE、UNION、PIVOT 等核心语法,以及 DuckDB 特有的 GROUP BY ALL、COLUMNS()、EXCLUDE、REPLACE 等高级特性。

DuckDB SQL 语法概述

DuckDB 的 SQL 语法基于 PostgreSQL 标准,并在其基础上做了大量针对分析场景的增强。本文覆盖了 DuckDB SQL 语法的核心内容,包括标准 SQL 操作和 DuckDB 独有语法特性。

如果你是 SQL 新手,建议先阅读 DuckDB 入门教程;如果已经有一定基础,本文可作为日常使用的 DuckDB SQL 语法速查手册。


数据定义语言 (DDL)

创建表

-- 标准建表
CREATE TABLE employees (
    id INTEGER PRIMARY KEY,
    name VARCHAR(100),
    department VARCHAR,
    salary DECIMAL(10,2),
    hire_date DATE
);

-- 从查询结果创建表
CREATE TABLE high_earners AS
SELECT * FROM employees WHERE salary > 100000;

-- 临时表(会话结束后自动删除)
CREATE TEMP TABLE temp_results AS
SELECT department, AVG(salary) AS avg_salary
FROM employees GROUP BY department;

修改表结构

-- 添加列
ALTER TABLE employees ADD COLUMN email VARCHAR;

-- 删除列
ALTER TABLE employees DROP COLUMN email;

-- 重命名列
ALTER TABLE employees RENAME COLUMN salary TO base_salary;

数据查询语言 (DQL) — SELECT

基础 SELECT 语法

-- 基本查询
SELECT name, department, salary
FROM employees
WHERE department = 'Engineering'
ORDER BY salary DESC
LIMIT 10;

DuckDB 特有的 SELECT 增强

DuckDB 提供了一些标准 SQL 中没有的便捷语法:

-- GROUP BY ALL:自动按 SELECT 中所有非聚合列分组
SELECT department, year(hire_date) AS hire_year, COUNT(*)
FROM employees
GROUP BY ALL;
-- 等价于 GROUP BY department, year(hire_date)

-- COLUMNS():对多个列应用相同表达式
SELECT COLUMNS('salary|bonus') * 1.1 AS salary_increase
FROM employees;

-- EXCLUDE:排除指定列
SELECT * EXCLUDE (salary, ssn)
FROM employees;

-- REPLACE:替换指定列的表达式
SELECT * REPLACE (salary * 1.1 AS salary)
FROM employees;

数据操作语言 (DML)

INSERT

-- 单行插入
INSERT INTO employees VALUES (5, 'Eve', 'Engineering', 130000, '2026-01-15');

-- 多行插入
INSERT INTO employees VALUES
    (6, 'Frank', 'Sales', 90000, '2026-02-01'),
    (7, 'Grace', 'Marketing', 95000, '2026-02-15');

-- 从查询插入
INSERT INTO high_earners
SELECT * FROM employees WHERE salary > 100000;

-- 从文件插入(DuckDB 特色)
INSERT INTO employees
SELECT * FROM read_csv_auto('new_employees.csv');

UPDATE 和 DELETE

-- 更新
UPDATE employees
SET salary = salary * 1.05
WHERE department = 'Engineering' AND salary < 100000;

-- 删除
DELETE FROM employees WHERE id = 5;

聚合函数与 GROUP BY

标准聚合

SELECT
    department,
    COUNT(*) AS employee_count,
    SUM(salary) AS total_salary,
    AVG(salary) AS avg_salary,
    MIN(salary) AS min_salary,
    MAX(salary) AS max_salary
FROM employees
GROUP BY department
HAVING COUNT(*) > 1
ORDER BY avg_salary DESC;

高级聚合

-- 中位数
SELECT department, MEDIAN(salary) AS median_salary
FROM employees GROUP BY department;

-- 百分位数
SELECT department,
    PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY salary) AS q1,
    PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY salary) AS q3
FROM employees GROUP BY department;

-- 多种统计量
SELECT department,
    AVG(salary) AS mean,
    STDDEV(sample(salary)) AS stddev,
    SKEWNESS(salary) AS skew,
    KURTOSIS(salary) AS kurt
FROM employees GROUP BY department;

窗口函数

基本窗口函数

SELECT
    name,
    department,
    salary,
    -- 排名
    ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rank,
    -- 累计和
    SUM(salary) OVER (PARTITION BY department ORDER BY name) AS running_total,
    -- 全局聚合
    AVG(salary) OVER () AS company_avg,
    -- 与平均值差距
    salary - AVG(salary) OVER () AS diff_from_avg
FROM employees;

滑动窗口

SELECT
    date,
    amount,
    -- 前3天移动平均
    AVG(amount) OVER (
        ORDER BY date
        ROWS BETWEEN 3 PRECEDING AND CURRENT ROW
    ) AS moving_avg_3d,
    -- 年初至今累计
    SUM(amount) OVER (
        PARTITION BY year(date)
        ORDER BY date
    ) AS ytd_total
FROM daily_sales;

窗口过滤与排序

-- 窗口函数中使用 FILTER
SELECT
    department,
    AVG(salary) AS avg_all,
    AVG(salary) FILTER (WHERE salary > 100000) AS avg_high_only
FROM employees
GROUP BY department;

公用表表达式 (CTE)

基础 CTE

WITH department_stats AS (
    SELECT
        department,
        AVG(salary) AS avg_dept_salary
    FROM employees
    GROUP BY department
)
SELECT
    e.name,
    e.department,
    e.salary,
    d.avg_dept_salary,
    e.salary - d.avg_dept_salary AS salary_diff
FROM employees e
JOIN department_stats d ON e.department = d.department
WHERE e.salary > d.avg_dept_salary
ORDER BY salary_diff DESC;

递归 CTE

-- 生成日期序列
WITH RECURSIVE dates AS (
    SELECT '2026-01-01'::DATE AS date
    UNION ALL
    SELECT date + 1
    FROM dates
    WHERE date < '2026-01-31'
)
SELECT * FROM dates;

-- 组织树查询
WITH RECURSIVE org_tree AS (
    SELECT id, name, manager_id, 1 AS level
    FROM org_chart
    WHERE manager_id IS NULL
    UNION ALL
    SELECT e.id, e.name, e.manager_id, t.level + 1
    FROM org_chart e
    JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree ORDER BY level, name;

UNION 和集合操作

-- UNION (去重)
SELECT name, department FROM current_employees
UNION
SELECT name, department FROM former_employees
ORDER BY name;

-- UNION ALL (保留重复,更快)
SELECT region, revenue FROM sales_q1
UNION ALL
SELECT region, revenue FROM sales_q2;

-- INTERSECT 和 EXCEPT
SELECT product FROM products_2025
INTERSECT
SELECT product FROM products_2026;
-- 返回2025和2026都有的产品

SELECT product FROM products_2025
EXCEPT
SELECT product FROM products_2026;
-- 返回2025有但2026没有的产品

PIVOT / UNPIVOT

PIVOT:行转列

-- 将不同部门的工资统计转置为列
PIVOT employees
ON department
USING AVG(salary) AS avg_salary,
      COUNT(*) AS count
GROUP BY hire_year;

-- 使用 PIVOT 的 SQL 标准语法
SELECT *
FROM (SELECT department, salary FROM employees)
PIVOT (
    AVG(salary)
    FOR department IN ('Engineering', 'Sales', 'Marketing')
) AS p;

UNPIVOT:列转行

-- 将季度列转置为行
UNPIVOT quarterly_sales
ON q1, q2, q3, q4
INTO
    NAME quarter
    VALUE revenue;

DuckDB 特有语法与函数

列表和结构体

-- 列表操作
SELECT [1, 2, 3] AS numbers,
       list_value(1, 2, 3) AS also_numbers,
       list_sort([3, 1, 2]) AS sorted;

-- 结构体
SELECT {'name': 'Alice', 'salary': 120000} AS employee,
       (employee).name AS name;

-- UNNEST:展开嵌套数据
SELECT name, unnest(skills) AS skill
FROM (VALUES ('Alice', ['SQL', 'Python', 'Java'])) AS t(name, skills);

日期时间函数

SELECT
    CURRENT_DATE AS today,
    DATE_TRUNC('month', '2026-05-21'::DATE) AS month_start,
    DATE_DIFF('month', '2026-01-01'::DATE, '2026-12-31'::DATE) AS months_diff,
    DATE_ADD('2026-01-01'::DATE, INTERVAL 3 MONTH) AS three_months_later,
    EXTRACT(YEAR FROM '2026-05-21'::DATE) AS year;

字符串函数

SELECT
    UPPER('hello') AS upper,
    LOWER('HELLO') AS lower,
    LENGTH('DuckDB') AS len,
    CONCAT('Hello', ' ', 'DuckDB') AS greeting,
    SPLIT_PART('a,b,c', ',', 2) AS second,
    REGEXP_MATCHES('[email protected]', '\w+@\w+\.\w+') AS is_email;

性能优化提示

-- 1. 使用 EXPLAIN 查看执行计划
EXPLAIN SELECT * FROM employees WHERE department = 'Engineering';

-- 2. 创建索引加速过滤
CREATE INDEX idx_emp_dept ON employees(department);

-- 3. 限制并行度
SET threads = 4;

-- 4. 调整内存
SET memory_limit = '8GB';

-- 5. 使用 Parquet 替代 CSV
COPY (SELECT * FROM employees) TO 'employees.parquet' (FORMAT PARQUET);

架构图

相关文章


📘 博客: https://duckdblab.org #DuckDB #SQL语法 #SQL教程 #数据分析

📺 Watch video tutorials → DuckDB Lab YouTube

Subscribe for more DuckDB & AI automation tutorials

使用 Hugo 构建
主题 StackJimmy 设计