Skip to content

Commit 31d1fb8

Browse files
committed
distinguish versions
1 parent fb9566e commit 31d1fb8

20 files changed

+425
-0
lines changed
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

13/Dockerfile

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# ref: https://github.com/docker-library/postgres/issues/340
2+
FROM postgres:13 AS extension_builder
3+
4+
# Already download extension code
5+
RUN cd / && mkdir external_extensions && mkdir /external_extensions/sequential-uuids
6+
COPY lib/sequential-uuids/ /external_extensions/sequential-uuids/
7+
WORKDIR /external_extensions/sequential-uuids/
8+
9+
# additional: change sources.list, but postgres source is still slow. don't need if proxy is used.
10+
# RUN echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ buster main contrib non-free" > /etc/apt/sources.list
11+
# RUN echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ buster-updates main contrib non-free" >> /etc/apt/sources.list
12+
# RUN echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ buster-backports main contrib non-free" >> /etc/apt/sources.list
13+
# RUN echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian-security buster/updates main contrib non-free" >> /etc/apt/sources.list
14+
15+
16+
RUN apt-get update && apt install build-essential libicu-dev postgresql-server-dev-all -y --no-install-recommends
17+
RUN make clean && make install
18+
19+
FROM postgres:13
20+
#if use alpine: the paths are different!
21+
22+
# run find / -name 'sequential_uuids*' to find newly compiled files and copy to next stage
23+
COPY --from=extension_builder /usr/lib/postgresql/13/lib /usr/lib/postgresql/13/lib
24+
COPY --from=extension_builder /usr/share/postgresql/13/extension /usr/share/postgresql/13/extension
25+
26+
# after in sql:
27+
# CREATE EXTENSION sequential_uuids;

13/lib/sequential-uuids/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.o
2+
*.so

13/lib/sequential-uuids/LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2018 Tomas Vondra
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

13/lib/sequential-uuids/META.json

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"name": "sequential_uuids",
3+
"abstract": "UUID generators with sequential patterns, which helps to reduce random I/O patterns associated with regular entirely-random UUID.",
4+
"description": "Regular random UUIDs are distributed uniformly over the whole range of possible values. This results in poor locality when inserting data into indexes - all index leaf pages are equally likely to be hit, forcing the whole index into memory. With small indexes that's fine, but once the index size exceeds shared buffers (or RAM), the cache hit ratio quickly deteriorates. The main goal of the two generators implemented by this extension, is generating UUIDS in a more sequential pattern, but without reducing the randomness too much (which could increase the probability of collision and predictability of the generated UUIDs). This idea is not new, and is described as",
5+
"version": "1.0.1",
6+
"maintainer": "Tomas Vondra <[email protected]>",
7+
"license": "bsd",
8+
"prereqs": {
9+
"runtime": {
10+
"requires": {
11+
"PostgreSQL": "10.0.0"
12+
}
13+
}
14+
},
15+
"provides": {
16+
"sequential_uuids": {
17+
"file": "sequential_uuids--1.0.1.sql",
18+
"docfile" : "README.md",
19+
"version": "1.0.1"
20+
}
21+
},
22+
"resources": {
23+
"repository": {
24+
"url": "https://github.com/tvondra/sequential-uuids.git",
25+
"web": "http://github.com/tvondra/sequential-uuids",
26+
"type": "git"
27+
}
28+
},
29+
"tags" : ["UUID", "generator"],
30+
"meta-spec": {
31+
"version": "1.0.0",
32+
"url": "http://pgxn.org/meta/spec.txt"
33+
},
34+
"release_status" : "stable"
35+
}

13/lib/sequential-uuids/Makefile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# sequeantial_uuids/Makefile
2+
#
3+
# Copyright (c) 2014 Citus Data, Inc.
4+
#
5+
6+
MODULE_big = sequential_uuids
7+
8+
OBJS = sequential_uuids.o
9+
10+
EXTENSION = sequential_uuids
11+
DATA = sequential_uuids--1.0.1.sql
12+
13+
PG_CONFIG = pg_config
14+
PGXS := $(shell $(PG_CONFIG) --pgxs)
15+
include $(PGXS)
16+
17+
ifndef MAJORVERSION
18+
MAJORVERSION := $(basename $(VERSION))
19+
endif

13/lib/sequential-uuids/README.md

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
Sequential UUID generators
2+
==========================
3+
4+
This PostgreSQL extension implements two UUID generators with sequential
5+
patterns, which helps to reduce random I/O patterns associated with
6+
regular entirely-random UUID.
7+
8+
Regular random UUIDs are distributed uniformly over the whole range of
9+
possible values. This results in poor locality when inserting data into
10+
indexes - all index leaf pages are equally likely to be hit, forcing
11+
the whole index into memory. With small indexes that's fine, but once
12+
the index size exceeds shared buffers (or RAM), the cache hit ratio
13+
quickly deteriorates.
14+
15+
Compare this to sequences and timestamps, which have a more sequential
16+
pattern and the new data almost always end up in the right-most part of
17+
the index (new sequence value is larger than all preceding values, same
18+
for timestamp). This results in a nicer and cache-friendlier behavior,
19+
but the values are predictable and may easily collide cross machines.
20+
21+
The main goal of the two generators implemented by this extension, is
22+
generating UUIDS in a more sequential pattern, but without reducing the
23+
randomness too much (which could increase the probability of collision
24+
and predictability of the generated UUIDs). This idea is not new, and
25+
is described as
26+
27+
This idea is pretty much what the UUID wikipedia article [1] calls COMB
28+
(combined-time GUID) and is more more thoroughly explained in [2].
29+
30+
31+
Generators
32+
----------
33+
34+
The extension provides two functions generating sequential UUIDs using
35+
either a sequence or timestamp.
36+
37+
* `uuid_sequence_nextval(sequence regclass, block_size int default 65536, block_count int default 65536)`
38+
39+
* `uuid_time_nextval(interval_length int default 60, interval_count int default 65536) RETURNS uuid`
40+
41+
The default values for parameters are selected to work well for a range
42+
of workloads. See the next section explaining the design for additional
43+
information about the meaning of those parameters.
44+
45+
46+
Design
47+
------
48+
49+
The easiest way to make UUIDs more sequential is to use some sequential
50+
value as a prefix. For example, we might take a sequence or a timestamp
51+
and add random data until we have 16B in total. The resulting values
52+
would be almost perfectly sequential, but there are two issues with it:
53+
54+
* reduction of randomness - E.g. with a sequence producing bigint values
55+
this would reduce the randomness from 16B to 8B. Timestamps do reduce
56+
the randomness in a similar way, depending on the accuracy. This
57+
increases both the collision probability and predictability (e.g. it
58+
allows determining which UUIDs were generated close to each other, and
59+
perhaps the exact timestamp).
60+
61+
* bloat - If the values only grow, this may result in bloat in indexes
62+
after deleting historical data. This is a well-known issue e.g. with
63+
indexes on timestamps in log tables.
64+
65+
To address both of these issues, the implemented generators are designed
66+
to wrap-around regularly, either after generating certain number of UUIDs
67+
or some amount of time. In both cases, the UUIDs are generates in blocks
68+
and have the form of
69+
70+
(block ID; random data)
71+
72+
The size of the block ID depends on the number of blocks and is fixed
73+
(depends on generator parameters). For example with the default 64k
74+
blocks we need 2 bytes to store it. The block ID increments regularly,
75+
and eventually wraps around.
76+
77+
For sequence-based generators the block size is determined by number of
78+
UUIDs generated. For example we may use blocks of 256 values, in which
79+
case the two-byte block ID may be computed like this:
80+
81+
(nextval('s') / 256) % 65536
82+
83+
So the generator wraps-around every ~16M UUIDs (because 256 * 65536).
84+
85+
For timestamp-based generators, the block size is defined as interval
86+
length, with the default value 60 seconds. As the default number of
87+
blocks is 64k (same as for sequence-based generators), the bloc may be
88+
computed like this
89+
90+
(timestamp / 60) % 65536
91+
92+
Which means the generator wraps around every ~45 days.
93+
94+
95+
Supported Releases
96+
------------------
97+
98+
Currently, this extension works only on releases since PostgreSQL 10. It
99+
can be made working on older releases with some minor code tweaks if
100+
someone wants to spend a bit of time on that.
101+
102+
103+
[1] https://en.wikipedia.org/wiki/Universally_unique_identifier
104+
105+
[2] http://www.informit.com/articles/article.aspx?p=25862

13/lib/sequential-uuids/sequential_uuids--1.0--1.0.1.sql

Whitespace-only changes.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/* sequential_uuids.sql */
2+
3+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
4+
\echo Use "CREATE EXTENSION sequential_uuids" to load this file. \quit
5+
6+
CREATE FUNCTION uuid_sequence_nextval(regclass, block_size int default 65536, block_count int default 65536) RETURNS uuid
7+
AS 'MODULE_PATHNAME', 'uuid_sequence_nextval'
8+
LANGUAGE C STRICT PARALLEL SAFE;
9+
10+
CREATE FUNCTION uuid_time_nextval(interval_length int default 60, interval_count int default 65536) RETURNS uuid
11+
AS 'MODULE_PATHNAME', 'uuid_time_nextval'
12+
LANGUAGE C STRICT PARALLEL SAFE;

0 commit comments

Comments
 (0)