Distributed ID Generator

分布式ID生成器

Distributed ID Generator

  • UUID
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    package main

    import (
    "testing"

    "github.com/google/uuid"
    )

    func BenchmarkUUID(t *testing.B) {
    for i := 0; i < t.N; i++ {
    _ = uuid.New()
    }
    }

    ~/Downloads/uuid_demo via 🐹 v1.17 took 13s
    ➜ go test -bench=.
    goos: darwin
    goarch: amd64
    pkg: uuid_demo
    cpu: Intel(R) Core(TM) i5-5257U CPU @ 2.70GHz
    BenchmarkUUID-4 1390710 844.7 ns/op
    PASS
    ok uuid_demo 2.351s

    # UUIDs are 128-bit hexadecimal numbers that are globally unique

    # 优点:
    1. 生成足够简单,本地生成无网络消耗,具有唯一性
    # 缺点:
    1. 无序的字符串,不具备趋势自增特性
    2. 没有具体的业务含义
    3. 长度过长16字节128位,36位长度的字符串,存储以及查询数据库性能消耗较大,数据库建议主键尽量越短越好,作为数据库主键UUID的无序性会导致数据位置频繁变动,严重影响性能
  • 基于数据库自增ID

    基于数据库的auto_increment自增ID完成可以充当分布式ID

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    # 创建数据表
    mysql> CREATE TABLE SEQUENCE_ID (id bigint(20) unsigned NOT NULL auto_increment, value char(10) NOT NULL default '', PRIMARY KEY (id)) ENGINE=MyISAM;
    Query OK, 0 rows affected, 1 warning (0.08 sec)

    # MySQL使用存储过程插入数据
    delimiter $$
    create procedure bench_insert(count int unsigned)
    BEGIN
    declare num int unsigned default 1;
    declare c char(10) default repeat('c',10);
    START TRANSACTION;
    while num <= count DO
    insert into SEQUENCE_ID(`value`) values(c);
    set num=num+1;
    end while;
    COMMIT;
    END$$ delimiter ;


    CALL bench_insert(10000);

    drop PROCEDURE bench_insert;

    # 当需要ID的时候,向表中插入记录返回主键ID.

    # 优点:
    1. 实现简单、ID单调自增、数值类型查询速度快

    缺点:
    1. DB单点存在宕机风险,无法扛住高并发场景
  • 基于数据库集群模式

    多数据库实例单独生产自增ID

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # 设置启始值和自增布长
    DB1:
    set @@auto_increment_offset = 1; -- 起始值
    set @@auto_increment_increment = 2; -- 步长
    DB2:
    set @@auto_increment_offset = 2; -- 起始值
    set @@auto_increment_increment = 2; -- 步长

    # 优点:
    解决DB单点问题

    # 缺点:
    不利于后续扩容,而且实际上单个数据库自身压力还是很大,依旧无法满足高并发场景
  • 基于数据库的号段模式

    号段模式是分布式ID生成器主流实现方式之一,批量获取自增ID,每次从数据库取出一个号段范围,具体业务服务将本号段生成的自增ID加载内存

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
      mysql> CREATE TABLE id_generator (id int(10) NOT NULL, max_id bigint(20) NOT NULL COMMENT '当前最大id', step int(20) NOT NULL COMMENT '号段的布长', biz_type int(20) NOT NULL COMMENT '业务类型', version int(20) NOT NULL COMMENT '版本号', PRIMARY KEY(`id`));
    Query OK, 0 rows affected, 5 warnings (0.11 sec)

    mysql> describe id_generator;
    +----------+--------+------+-----+---------+-------+
    | Field | Type | Null | Key | Default | Extra |
    +----------+--------+------+-----+---------+-------+
    | id | int | NO | PRI | NULL | |
    | max_id | bigint | NO | | NULL | | -- 当前最大的可用id
    | step | int | NO | | NULL | | -- 代表号段的长度
    | biz_type | int | NO | | NULL | | -- 不同业务类型
    | version | int | NO | | NULL | | -- 乐观锁,每次都更新version,保证并发时数据的正确性
    +----------+--------+------+-----+---------+-------+
    5 rows in set (0.04 sec)

    update id_generator set max_id = #{max_id+step}, version = version + 1 where version = # {version} and biz_type = XXX
  • 基于Redis模式

    利用redis Incr 原子性自增

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21

    172.30.1.23:6002> set seq_id 1
    OK
    172.30.1.23:6002> incr seq_id
    (integer) 2
    172.30.1.23:6002> get seq_id
    "2"
    172.30.1.23:6002>

    # Benchmark
    ubuntu in ~ at 3BPlus took 4s
    ➜ redis-benchmark -n 1000000 -t set,get,incr -P 16 -q -h 127.0.0.1 -p 6001 --cluster
    Cluster has 3 master nodes:

    Master 0: a733c21d3b735b9d026eb4d462ef6b367d8ebb98 172.30.1.23:6002
    Master 1: 9c35a4e211f6534861ed768dba592e85539b1377 172.30.1.23:6003
    Master 2: a901e497cb72819cf0765e9e4eb16c36399c437b 172.30.1.23:6001

    SET: 47001.32 requests per second, p50=15.199 msec
    GET: 101936.80 requests per second, p50=5.679 msec
    INCR: 46496.49 requests per second, p50=13.879 msec -- 并发5万/s
  • twitter snowflake

    雪花算法 (Snowflake) Twitter内部分布式采用的ID生成算法

    1
    2
    3
    4
    5
    6
    7
    8
    # The default Twitter format shown below.
    +--------------------------------------------------------------------------+
    | 1 Bit Unused | 41 Bit Timestamp | 10 Bit NodeID | 12 Bit Sequence ID |
    +--------------------------------------------------------------------------+

    41 bits: store a timestamp with millisecond precision
    10 bits: store a node id - a range from 0 through 1023
    12 bits: store a sequence number - a range from 0 through 4095