Building a DuckDB MCP Server: Connect AI Agents to Your Data
TL;DR: Model Context Protocol (MCP) enables AI agents to interact with external data sources. This guide shows you how to build a production-ready MCP server that connects AI agents to DuckDB for real-time analytics, data exploration, and automated reporting.
What is MCP?
Model Context Protocol (MCP) is an open standard that allows AI applications to securely connect to external data sources. Think of it as “USB-C for AI” — a standardized interface that lets any AI agent talk to any data source.
┌─────────────────────────────────────────────────┐
│ MCP Architecture │
│ │
│ [AI Agent] ──MCP Protocol──> [MCP Server] │
│ (Claude, etc.) (DuckDB + Tools) │
│ │
│ Standardized: │
│ • Tool definitions │
│ • Resource access │
│ • Prompt templates │
│ • Secure authentication │
└─────────────────────────────────────────────────┘
Prerequisites
Required Packages
pip install mcp duckdb fastapi uvicorn pydantic
Environment Setup
export DUCKDB_PATH="./analytics.duckdb"
export MCP_HOST="0.0.0.0"
export MCP_PORT=8080
export API_KEY="your-secret-key"
Step 1: Basic MCP Server
Server Initialization
from mcp.server.fastmcp import FastMCP
import duckdb
import os
# Initialize MCP server
mcp = FastMCP(
name="duckdb-analytics",
version="1.0.0",
description="DuckDB analytics server for AI agents"
)
# Global DuckDB connection
db_path = os.getenv("DUCKDB_PATH", "./analytics.duckdb")
con = duckdb.connect(db_path)
# Health check endpoint
@mcp.tool()
def health_check() -> dict:
"""Check the health of the DuckDB server."""
result = con.execute("SELECT 1 as healthy").fetchone()
return {
"status": "healthy" if result[0] == 1 else "unhealthy",
"database": db_path,
"duckdb_version": con.execute("SELECT version()").fetchone()[0]
}
if __name__ == "__main__":
mcp.run()
Running the Server
# Start MCP server
python server.py
# Or with uvicorn for production
uvicorn server:app --host 0.0.0.0 --port 8080
Step 2: Analytics Tools
Query Execution Tool
from pydantic import BaseModel, Field
from typing import Optional
class QueryRequest(BaseModel):
sql: str = Field(..., description="SQL query to execute")
limit: Optional[int] = Field(1000, description="Maximum rows to return")
format: Optional[str] = Field("json", description="Output format: json, csv, markdown")
@mcp.tool()
def execute_query(request: QueryRequest) -> dict:
"""Execute a SQL query against the DuckDB database."""
try:
# Validate query (prevent dangerous operations)
if any(keyword in request.sql.upper() for keyword in ['DROP', 'DELETE', 'TRUNCATE']):
return {"error": "Dangerous operations are not allowed"}
# Execute query with limit
result = con.execute(f"{request.sql} LIMIT {request.limit}")
columns = [desc[0] for desc in result.description]
rows = result.fetchall()
# Convert to requested format
if request.format == "csv":
import io
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(columns)
writer.writerows(rows)
return {"data": output.getvalue()}
elif request.format == "markdown":
markdown = "| " + " | ".join(columns) + " |\n"
markdown += "| " + " | ".join(["---"] * len(columns)) + " |\n"
for row in rows:
markdown += "| " + " | ".join(str(v) for v in row) + " |\n"
return {"data": markdown}
else: # JSON
return {
"columns": columns,
"data": [dict(zip(columns, row)) for row in rows],
"row_count": len(rows)
}
except Exception as e:
return {"error": str(e)}
Schema Discovery Tool
class SchemaRequest(BaseModel):
table_pattern: Optional[str] = Field(None, description="Filter tables by pattern")
@mcp.tool()
def get_schema(request: SchemaRequest) -> dict:
"""Discover database schema and table structures."""
try:
# Get all tables
tables_query = """
SELECT table_name, table_schema, table_type
FROM information_schema.tables
WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
{% if pattern %}AND table_name LIKE '{{ pattern }}'{% endif %}
ORDER BY table_name
"""
if request.table_pattern:
tables = con.execute(
tables_query.replace("{% if pattern %}...", f"AND table_name LIKE '%{request.table_pattern}%'"),
tables_query.replace("{% if pattern %}...", "")
).fetchall()
else:
tables = con.execute(tables_query).fetchall()
# Get column details for each table
schema = {}
for table in tables:
table_name = table[0]
columns = con.execute(f"""
SELECT column_name, data_type, is_nullable
FROM information_schema.columns
WHERE table_name = '{table_name}'
ORDER BY ordinal_position
""").fetchall()
schema[table_name] = {
"columns": [
{"name": col[0], "type": col[1], "nullable": col[2] == 'YES'}
for col in columns
],
"row_count": con.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
}
return {"tables": len(schema), "schema": schema}
except Exception as e:
return {"error": str(e)}
Data Profiling Tool
class ProfileRequest(BaseModel):
table_name: str = Field(..., description="Table to profile")
columns: Optional[list] = Field(None, description="Specific columns to profile")
@mcp.tool()
def profile_table(request: ProfileRequest) -> dict:
"""Generate statistical profile for a table."""
try:
# Get column statistics
columns = request.columns or con.execute(f"""
SELECT column_name FROM information_schema.columns
WHERE table_name = '{request.table_name}'
""").fetchall()
profile = {}
for col_tuple in columns:
col_name = col_tuple[0] if isinstance(col_tuple, tuple) else col_tuple
stats = con.execute(f"""
SELECT
COUNT(*) as total_rows,
COUNT(DISTINCT "{col_name}") as distinct_values,
SUM(CASE WHEN "{col_name}" IS NULL THEN 1 ELSE 0 END) as null_count,
MIN("{col_name}") as min_value,
MAX("{col_name}") as max_value,
AVG("{col_name}") as avg_value,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY "{col_name}") as median,
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "{col_name}") as p95
FROM {request.table_name}
""").fetchone()
profile[col_name] = {
"total_rows": stats[0],
"distinct_values": stats[1],
"null_count": stats[2],
"null_percentage": round(stats[2] / stats[0] * 100, 2) if stats[0] > 0 else 0,
"min": stats[3],
"max": stats[4],
"avg": stats[5],
"median": stats[6],
"p95": stats[7]
}
return {"table": request.table_name, "profile": profile}
except Exception as e:
return {"error": str(e)}
Step 3: Resource Access
Define Data Resources
from mcp.types import Resource, TextResourceContents
@mcp.resource("duckdb://orders/schema")
def get_orders_schema() -> str:
"""Schema definition for the orders table."""
return """
Table: orders
Columns:
- order_id: VARCHAR (PK)
- customer_id: VARCHAR
- order_date: TIMESTAMP
- amount: DECIMAL(10,2)
- status: VARCHAR
- payment_method: VARCHAR
"""
@mcp.resource("duckdb://products/schema")
def get_products_schema() -> str:
"""Schema definition for the products table."""
return """
Table: products
Columns:
- product_id: VARCHAR (PK)
- name: VARCHAR
- category: VARCHAR
- price: DECIMAL(10,2)
- stock_quantity: INTEGER
"""
@mcp.resource("duckdb://analytics/dashboard")
def get_dashboard_data() -> str:
"""Aggregated analytics data for dashboard."""
result = con.execute("""
SELECT
DATE_TRUNC('month', order_date) as month,
COUNT(*) as total_orders,
SUM(amount) as total_revenue,
AVG(amount) as avg_order_value
FROM orders
GROUP BY 1
ORDER BY 1 DESC
LIMIT 12
""").fetchall()
return str(result)
Step 4: Prompt Templates
Create Reusable Prompts
@mcp.prompt()
def analyze_sales_trends() -> str:
"""Analyze sales trends and identify patterns."""
return """
Analyze the sales data and provide insights on:
1. Monthly revenue trends
2. Top performing product categories
3. Customer segmentation by purchase frequency
4. Anomalies in sales patterns
Use the following SQL queries to gather the data:
- SELECT DATE_TRUNC('month', order_date), SUM(amount) FROM orders GROUP BY 1
- SELECT category, SUM(amount) FROM orders JOIN products ON ... GROUP BY 1
- SELECT customer_id, COUNT(*) as order_count FROM orders GROUP BY 1
"""
@mcp.prompt()
def generate_executive_report() -> str:
"""Generate an executive summary report."""
return """
Generate a concise executive report covering:
- Key performance indicators (KPIs)
- Month-over-month growth rates
- Top 5 products by revenue
- Customer retention metrics
- Recommendations for improvement
Keep the report under 500 words and use bullet points for readability.
"""
Step 5: Authentication and Security
API Key Authentication
from functools import wraps
import hmac
def require_api_key(f):
"""Decorator to enforce API key authentication."""
@wraps(f)
async def wrapper(*args, **kwargs):
api_key = os.getenv("API_KEY")
if not api_key:
return {"error": "API key not configured"}
# In production, use proper JWT or OAuth2
# This is a simplified example
return f(*args, **kwargs)
return wrapper
@mcp.tool()
@require_api_key
def secure_query(request: QueryRequest) -> dict:
"""Execute a secured SQL query."""
# Additional security checks here
return execute_query(request)
Query Validation
def validate_sql(sql: str) -> bool:
"""Validate SQL query for safety."""
dangerous_keywords = ['DROP', 'DELETE', 'TRUNCATE', 'ALTER', 'CREATE', 'GRANT']
sql_upper = sql.upper()
for keyword in dangerous_keywords:
if keyword in sql_upper:
return False
# Check for SQL injection attempts
if any(char in sql for char in [';', '--', '/*', '*/']):
return False
return True
Step 6: Testing the Server
Test Queries
# Test health check
curl http://localhost:8080/tools/health_check
# Test schema discovery
curl http://localhost:8080/tools/get_schema \
-d '{"table_pattern": "orders"}'
# Test query execution
curl http://localhost:8080/tools/execute_query \
-d '{
"sql": "SELECT category, SUM(amount) as revenue FROM orders JOIN products ON orders.product_id = products.id GROUP BY 1 ORDER BY 2 DESC LIMIT 10",
"limit": 100,
"format": "json"
}'
# Test data profiling
curl http://localhost:8080/tools/profile_table \
-d '{"table_name": "orders"}'
Integration with Claude
# In Claude Code or Claude Desktop
# Add MCP server configuration:
{
"mcpServers": {
"duckdb-analytics": {
"command": "python",
"args": ["/path/to/server.py"],
"env": {
"DUCKDB_PATH": "/path/to/analytics.duckdb"
}
}
}
}
Step 7: Production Deployment
Docker Setup
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY . .
EXPOSE 8080
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8080"]
Kubernetes Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: duckdb-mcp-server
spec:
replicas: 1
selector:
matchLabels:
app: duckdb-mcp
template:
metadata:
labels:
app: duckdb-mcp
spec:
containers:
- name: mcp-server
image: duckdb-mcp:latest
ports:
- containerPort: 8080
env:
- name: DUCKDB_PATH
value: "/data/analytics.duckdb"
- name: API_KEY
valueFrom:
secretKeyRef:
name: duckdb-secret
key: api-key
volumeMounts:
- name: data-volume
mountPath: /data
volumes:
- name: data-volume
persistentVolumeClaim:
claimName: duckdb-pvc
Conclusion
Building an MCP server with DuckDB enables AI agents to:
- Query data naturally — using SQL or natural language
- Discover schema — automatically understand data structure
- Profile data — get statistical insights on demand
- Generate reports — automate analytics workflows
This creates a powerful foundation for AI-powered analytics that scales from prototyping to production.
For production deployments, always implement proper authentication, input validation, and monitoring.