Oct 20, 20184 min read

Optimizing PostgreSQL for High-Traffic Applications

Database

Optimizing PostgreSQL for High-Traffic Applications

PostgreSQL is a powerful relational database, but it requires proper optimization to handle high-traffic production workloads. This guide covers essential techniques to maximize performance.

Understanding Query Performance

Before optimizing, you need to understand what's slow. PostgreSQL provides excellent tools for this.

Using EXPLAIN ANALYZE


 
 
 
sql
EXPLAIN ANALYZE
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at > '2024-01-01'
GROUP BY u.id, u.name
ORDER BY order_count DESC
LIMIT 10;

Key metrics to watch:

Execution Time: Total time to run the query
Planning Time: Time spent planning the query
Seq Scan: Sequential scans (usually bad for large tables)
Index Scan: Using an index (usually good)

Indexing Strategies

Indexes are crucial for query performance.

B-Tree Indexes (Default)


 
 
 
sql
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_orders_created_at ON orders(created_at);

Composite Indexes


 
 
 
sql
CREATE INDEX idx_orders_user_status ON orders(user_id, status);

Partial Indexes


 
 
 
sql
CREATE INDEX idx_active_users ON users(email) WHERE active = true;

Index Maintenance


 
 
 
sql
-- Rebuild indexes
REINDEX TABLE users;

-- Remove unused indexes
DROP INDEX IF EXISTS idx_old_unused;

Query Optimization Techniques

1. Avoid SELECT *


 
 
 
sql
-- Bad
SELECT * FROM users WHERE id = 1;

-- Good
SELECT id, name, email FROM users WHERE id = 1;

2. Use LIMIT for Large Result Sets


 
 
 
sql
SELECT id, name FROM users ORDER BY created_at DESC LIMIT 100;

3. Optimize JOINs


 
 
 
sql
-- Ensure foreign keys are indexed
CREATE INDEX idx_orders_user_id ON orders(user_id);

-- Use INNER JOIN when possible (faster than LEFT JOIN)
SELECT u.name, o.total
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

4. Avoid N+1 Queries


 
 
 
javascript
// Bad: N+1 queries
const users = await db.query('SELECT * FROM users');
for (const user of users) {
  const orders = await db.query('SELECT * FROM orders WHERE user_id = $1', [user.id]);
}

// Good: Single query with JOIN
const result = await db.query(`
  SELECT u.*, json_agg(o.*) as orders
  FROM users u
  LEFT JOIN orders o ON u.id = o.user_id
  GROUP BY u.id
`);

Connection Pooling

Never create a new connection for each query.


 
 
 
javascript
const { Pool } = require('pg');

const pool = new Pool({
  host: 'localhost',
  database: 'myapp',
  max: 20, // Maximum number of connections
  idleTimeoutMillis: 30000,
  connectionTimeoutMillis: 2000,
});

// Use the pool
async function getUser(id) {
  const result = await pool.query('SELECT * FROM users WHERE id = $1', [id]);
  return result.rows[0];
}

Configuration Tuning

Edit

terminal

postgresql.conf

for better performance:


 
 
 
conf
# Memory Settings
shared_buffers = 4GB              # 25% of total RAM
effective_cache_size = 12GB       # 75% of total RAM
work_mem = 64MB                   # Per operation
maintenance_work_mem = 1GB        # For VACUUM, CREATE INDEX

# Checkpoint Settings
checkpoint_completion_target = 0.9
wal_buffers = 16MB
max_wal_size = 4GB

# Query Planning
random_page_cost = 1.1            # For SSDs
effective_io_concurrency = 200    # For SSDs

# Connections
max_connections = 200

Caching Strategies

1. Application-Level Caching


 
 
 
javascript
const Redis = require('ioredis');
const redis = new Redis();

async function getUser(id) {
  // Try cache first
  const cached = await redis.get(`user:${id}`);
  if (cached) return JSON.parse(cached);

  // Query database
  const user = await pool.query('SELECT * FROM users WHERE id = $1', [id]);
  
  // Cache for 1 hour
  await redis.setex(`user:${id}`, 3600, JSON.stringify(user.rows[0]));
  
  return user.rows[0];
}

2. Materialized Views


 
 
 
sql
CREATE MATERIALIZED VIEW user_order_stats AS
SELECT 
  u.id,
  u.name,
  COUNT(o.id) as total_orders,
  SUM(o.total) as total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;

-- Refresh periodically
REFRESH MATERIALIZED VIEW user_order_stats;

Monitoring and Maintenance

Regular VACUUM


 
 
 
sql
-- Automatic vacuum (enabled by default)
-- Manual vacuum for specific tables
VACUUM ANALYZE users;
VACUUM ANALYZE orders;

Monitor Slow Queries


 
 
 
sql
-- Enable slow query logging in postgresql.conf
log_min_duration_statement = 1000  # Log queries taking > 1 second

-- View slow queries
SELECT query, calls, total_time, mean_time
FROM pg_stat_statements
ORDER BY mean_time DESC
LIMIT 10;

Check Index Usage


 
 
 
sql
SELECT 
  schemaname,
  tablename,
  indexname,
  idx_scan,
  idx_tup_read,
  idx_tup_fetch
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY schemaname, tablename;

Partitioning Large Tables

For tables with millions of rows:


 
 
 
sql
-- Create partitioned table
CREATE TABLE orders (
  id SERIAL,
  user_id INTEGER,
  created_at TIMESTAMP,
  total DECIMAL
) PARTITION BY RANGE (created_at);

-- Create partitions
CREATE TABLE orders_2024_q1 PARTITION OF orders
  FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');

CREATE TABLE orders_2024_q2 PARTITION OF orders
  FOR VALUES FROM ('2024-04-01') TO ('2024-07-01');

Conclusion

PostgreSQL optimization is an ongoing process. Monitor your queries, analyze slow operations, and continuously refine your indexes and configuration. With these techniques, you can handle millions of requests efficiently.

Written by Anant Kumar

Systems Engineer & Full Stack Developer

Share Feedback