What are the partition types in MariaDB and how to create and manage partitioned tables? - 面试题

MariaDB table partitioning is a technique that splits large tables into smaller, more manageable parts, significantly improving query performance and management efficiency.

1. Partition Types

RANGE Partitioning

sql
-- Partition by date range
CREATE TABLE orders (
    id INT PRIMARY KEY,
    order_date DATE,
    customer_id INT,
    amount DECIMAL(10,2)
) PARTITION BY RANGE (YEAR(order_date)) (
    PARTITION p2022 VALUES LESS THAN (2023),
    PARTITION p2023 VALUES LESS THAN (2024),
    PARTITION p2024 VALUES LESS THAN (2025),
    PARTITION pmax VALUES LESS THAN MAXVALUE
);

-- Partition by numeric range
CREATE TABLE users (
    id INT PRIMARY KEY,
    username VARCHAR(50),
    age INT
) PARTITION BY RANGE (age) (
    PARTITION p0 VALUES LESS THAN (18),
    PARTITION p1 VALUES LESS THAN (30),
    PARTITION p2 VALUES LESS THAN (50),
    PARTITION p3 VALUES LESS THAN MAXVALUE
);

LIST Partitioning

sql
-- Partition by discrete value list
CREATE TABLE orders (
    id INT PRIMARY KEY,
    order_date DATE,
    region VARCHAR(20),
    amount DECIMAL(10,2)
) PARTITION BY LIST COLUMNS(region) (
    PARTITION p_east VALUES IN ('New York', 'Boston', 'Philadelphia'),
    PARTITION p_west VALUES IN ('Los Angeles', 'San Francisco', 'Seattle'),
    PARTITION p_midwest VALUES IN ('Chicago', 'Detroit', 'Cleveland'),
    PARTITION p_south VALUES IN ('Atlanta', 'Miami', 'Dallas')
);

HASH Partitioning

sql
-- Hash partitioning (evenly distribute data)
CREATE TABLE products (
    id INT PRIMARY KEY,
    name VARCHAR(100),
    category_id INT,
    price DECIMAL(10,2)
) PARTITION BY HASH(id) PARTITIONS 4;

-- Linear hash partitioning (faster but may not be evenly distributed)
CREATE TABLE orders (
    id INT PRIMARY KEY,
    customer_id INT,
    order_date DATE,
    amount DECIMAL(10,2)
) PARTITION BY LINEAR HASH(customer_id) PARTITIONS 8;

KEY Partitioning

sql
-- KEY partitioning (similar to HASH, uses primary or unique key)
CREATE TABLE users (
    id INT PRIMARY KEY,
    username VARCHAR(50),
    email VARCHAR(100)
) PARTITION BY KEY(id) PARTITIONS 4;

2. Partition Management

Adding Partitions

sql
-- Add new RANGE partition
ALTER TABLE orders ADD PARTITION (
    PARTITION p2025 VALUES LESS THAN (2026)
);

-- Add new LIST partition
ALTER TABLE orders ADD PARTITION (
    PARTITION p_other VALUES IN ('Other', 'Unknown')
);

-- Increase HASH/KEY partition count
ALTER TABLE products REORGANIZE PARTITION (
    PARTITION p0, PARTITION p1, PARTITION p2, PARTITION p3
) INTO (
    PARTITION p0, PARTITION p1, PARTITION p2, 
    PARTITION p3, PARTITION p4, PARTITION p5
);

Dropping Partitions

sql
-- Drop partition (also deletes data)
ALTER TABLE orders DROP PARTITION p2022;

-- Remove all partitions (convert to regular table)
ALTER TABLE orders REMOVE PARTITIONING;

Merging Partitions

sql
-- Merge RANGE partitions
ALTER TABLE orders REORGANIZE PARTITION p2022, p2023 INTO (
    PARTITION p2022_2023 VALUES LESS THAN (2024)
);

3. Querying Partitions

sql
-- View partition information
SELECT 
    PARTITION_NAME,
    PARTITION_METHOD,
    PARTITION_EXPRESSION,
    PARTITION_DESCRIPTION,
    TABLE_ROWS
FROM information_schema.PARTITIONS
WHERE TABLE_NAME = 'orders';

-- Query specific partition
SELECT * FROM orders PARTITION (p2023);

-- Use partition pruning for optimized queries
SELECT * FROM orders WHERE order_date >= '2023-01-01' AND order_date < '2024-01-01';

4. Partition Maintenance

sql
-- Check partition
ALTER TABLE orders CHECK PARTITION p2023;

-- Optimize partition
ALTER TABLE orders OPTIMIZE PARTITION p2023;

-- Analyze partition
ALTER TABLE orders ANALYZE PARTITION p2023;

-- Repair partition
ALTER TABLE orders REPAIR PARTITION p2023;

5. Partition Indexes

sql
-- Create local index (independent index for each partition)
CREATE INDEX idx_customer_id ON orders(customer_id);

-- Create global index (MariaDB 10.3+)
CREATE UNIQUE INDEX idx_global ON orders(id);

6. Partition Use Cases

Time Series Data

sql
CREATE TABLE logs (
    id BIGINT PRIMARY KEY,
    log_time TIMESTAMP,
    message TEXT,
    level VARCHAR(10)
) PARTITION BY RANGE (UNIX_TIMESTAMP(log_time)) (
    PARTITION p2024_q1 VALUES LESS THAN (UNIX_TIMESTAMP('2024-04-01')),
    PARTITION p2024_q2 VALUES LESS THAN (UNIX_TIMESTAMP('2024-07-01')),
    PARTITION p2024_q3 VALUES LESS THAN (UNIX_TIMESTAMP('2024-10-01')),
    PARTITION p2024_q4 VALUES LESS THAN (UNIX_TIMESTAMP('2025-01-01'))
);

Large Table Archiving

sql
-- Regularly archive old data
ALTER TABLE orders DROP PARTITION p2022;

7. Partition Considerations

Partition Key Selection: Choose columns frequently used in queries as partition keys
Partition Count: Avoid too many partitions, recommended 10-100 partitions
Primary Key Constraint: Primary key must include partition key
Unique Index: Unique index must include partition key
Data Distribution: Ensure even data distribution across partitions
Maintenance Cost: Partitioned tables require additional maintenance operations

Partitioning is an effective method for handling large data volumes, and proper usage can significantly improve query performance and management efficiency.