Skip to content

Commit f642a78

Browse files
committed
add lib
1 parent 2d452af commit f642a78

File tree

9 files changed

+398
-0
lines changed

9 files changed

+398
-0
lines changed

lib/sequential-uuids/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.o
2+
*.so

lib/sequential-uuids/LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2018 Tomas Vondra
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

lib/sequential-uuids/META.json

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"name": "sequential_uuids",
3+
"abstract": "UUID generators with sequential patterns, which helps to reduce random I/O patterns associated with regular entirely-random UUID.",
4+
"description": "Regular random UUIDs are distributed uniformly over the whole range of possible values. This results in poor locality when inserting data into indexes - all index leaf pages are equally likely to be hit, forcing the whole index into memory. With small indexes that's fine, but once the index size exceeds shared buffers (or RAM), the cache hit ratio quickly deteriorates. The main goal of the two generators implemented by this extension, is generating UUIDS in a more sequential pattern, but without reducing the randomness too much (which could increase the probability of collision and predictability of the generated UUIDs). This idea is not new, and is described as",
5+
"version": "1.0.1",
6+
"maintainer": "Tomas Vondra <[email protected]>",
7+
"license": "bsd",
8+
"prereqs": {
9+
"runtime": {
10+
"requires": {
11+
"PostgreSQL": "10.0.0"
12+
}
13+
}
14+
},
15+
"provides": {
16+
"sequential_uuids": {
17+
"file": "sequential_uuids--1.0.1.sql",
18+
"docfile" : "README.md",
19+
"version": "1.0.1"
20+
}
21+
},
22+
"resources": {
23+
"repository": {
24+
"url": "https://github.com/tvondra/sequential-uuids.git",
25+
"web": "http://github.com/tvondra/sequential-uuids",
26+
"type": "git"
27+
}
28+
},
29+
"tags" : ["UUID", "generator"],
30+
"meta-spec": {
31+
"version": "1.0.0",
32+
"url": "http://pgxn.org/meta/spec.txt"
33+
},
34+
"release_status" : "stable"
35+
}

lib/sequential-uuids/Makefile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# sequeantial_uuids/Makefile
2+
#
3+
# Copyright (c) 2014 Citus Data, Inc.
4+
#
5+
6+
MODULE_big = sequential_uuids
7+
8+
OBJS = sequential_uuids.o
9+
10+
EXTENSION = sequential_uuids
11+
DATA = sequential_uuids--1.0.1.sql
12+
13+
PG_CONFIG = pg_config
14+
PGXS := $(shell $(PG_CONFIG) --pgxs)
15+
include $(PGXS)
16+
17+
ifndef MAJORVERSION
18+
MAJORVERSION := $(basename $(VERSION))
19+
endif

lib/sequential-uuids/README.md

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
Sequential UUID generators
2+
==========================
3+
4+
This PostgreSQL extension implements two UUID generators with sequential
5+
patterns, which helps to reduce random I/O patterns associated with
6+
regular entirely-random UUID.
7+
8+
Regular random UUIDs are distributed uniformly over the whole range of
9+
possible values. This results in poor locality when inserting data into
10+
indexes - all index leaf pages are equally likely to be hit, forcing
11+
the whole index into memory. With small indexes that's fine, but once
12+
the index size exceeds shared buffers (or RAM), the cache hit ratio
13+
quickly deteriorates.
14+
15+
Compare this to sequences and timestamps, which have a more sequential
16+
pattern and the new data almost always end up in the right-most part of
17+
the index (new sequence value is larger than all preceding values, same
18+
for timestamp). This results in a nicer and cache-friendlier behavior,
19+
but the values are predictable and may easily collide cross machines.
20+
21+
The main goal of the two generators implemented by this extension, is
22+
generating UUIDS in a more sequential pattern, but without reducing the
23+
randomness too much (which could increase the probability of collision
24+
and predictability of the generated UUIDs). This idea is not new, and
25+
is described as
26+
27+
This idea is pretty much what the UUID wikipedia article [1] calls COMB
28+
(combined-time GUID) and is more more thoroughly explained in [2].
29+
30+
31+
Generators
32+
----------
33+
34+
The extension provides two functions generating sequential UUIDs using
35+
either a sequence or timestamp.
36+
37+
* `uuid_sequence_nextval(sequence regclass, block_size int default 65536, block_count int default 65536)`
38+
39+
* `uuid_time_nextval(interval_length int default 60, interval_count int default 65536) RETURNS uuid`
40+
41+
The default values for parameters are selected to work well for a range
42+
of workloads. See the next section explaining the design for additional
43+
information about the meaning of those parameters.
44+
45+
46+
Design
47+
------
48+
49+
The easiest way to make UUIDs more sequential is to use some sequential
50+
value as a prefix. For example, we might take a sequence or a timestamp
51+
and add random data until we have 16B in total. The resulting values
52+
would be almost perfectly sequential, but there are two issues with it:
53+
54+
* reduction of randomness - E.g. with a sequence producing bigint values
55+
this would reduce the randomness from 16B to 8B. Timestamps do reduce
56+
the randomness in a similar way, depending on the accuracy. This
57+
increases both the collision probability and predictability (e.g. it
58+
allows determining which UUIDs were generated close to each other, and
59+
perhaps the exact timestamp).
60+
61+
* bloat - If the values only grow, this may result in bloat in indexes
62+
after deleting historical data. This is a well-known issue e.g. with
63+
indexes on timestamps in log tables.
64+
65+
To address both of these issues, the implemented generators are designed
66+
to wrap-around regularly, either after generating certain number of UUIDs
67+
or some amount of time. In both cases, the UUIDs are generates in blocks
68+
and have the form of
69+
70+
(block ID; random data)
71+
72+
The size of the block ID depends on the number of blocks and is fixed
73+
(depends on generator parameters). For example with the default 64k
74+
blocks we need 2 bytes to store it. The block ID increments regularly,
75+
and eventually wraps around.
76+
77+
For sequence-based generators the block size is determined by number of
78+
UUIDs generated. For example we may use blocks of 256 values, in which
79+
case the two-byte block ID may be computed like this:
80+
81+
(nextval('s') / 256) % 65536
82+
83+
So the generator wraps-around every ~16M UUIDs (because 256 * 65536).
84+
85+
For timestamp-based generators, the block size is defined as interval
86+
length, with the default value 60 seconds. As the default number of
87+
blocks is 64k (same as for sequence-based generators), the bloc may be
88+
computed like this
89+
90+
(timestamp / 60) % 65536
91+
92+
Which means the generator wraps around every ~45 days.
93+
94+
95+
Supported Releases
96+
------------------
97+
98+
Currently, this extension works only on releases since PostgreSQL 10. It
99+
can be made working on older releases with some minor code tweaks if
100+
someone wants to spend a bit of time on that.
101+
102+
103+
[1] https://en.wikipedia.org/wiki/Universally_unique_identifier
104+
105+
[2] http://www.informit.com/articles/article.aspx?p=25862

lib/sequential-uuids/sequential_uuids--1.0--1.0.1.sql

Whitespace-only changes.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/* sequential_uuids.sql */
2+
3+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
4+
\echo Use "CREATE EXTENSION sequential_uuids" to load this file. \quit
5+
6+
CREATE FUNCTION uuid_sequence_nextval(regclass, block_size int default 65536, block_count int default 65536) RETURNS uuid
7+
AS 'MODULE_PATHNAME', 'uuid_sequence_nextval'
8+
LANGUAGE C STRICT PARALLEL SAFE;
9+
10+
CREATE FUNCTION uuid_time_nextval(interval_length int default 60, interval_count int default 65536) RETURNS uuid
11+
AS 'MODULE_PATHNAME', 'uuid_time_nextval'
12+
LANGUAGE C STRICT PARALLEL SAFE;
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* sequential_uuids.c
4+
* generators of sequential UUID values based on sequence/timestamp
5+
*
6+
*
7+
* Currently, this only works on PostgreSQL 10. Adding support for older
8+
* releases is possible, but it would require solving a couple issues:
9+
*
10+
* 1) pg_uuid_t hidden in uuid.c (can be solved by local struct definition)
11+
*
12+
* 2) pg_strong_random not available (can fallback to random, probably)
13+
*
14+
* 3) functions defined as PARALLEL SAFE, which fails on pre-9.6 releases
15+
*
16+
*-------------------------------------------------------------------------
17+
*/
18+
#include <sys/time.h>
19+
#include <sys/types.h>
20+
#include <unistd.h>
21+
22+
#include "postgres.h"
23+
24+
#include "catalog/namespace.h"
25+
#include "commands/sequence.h"
26+
#include "utils/uuid.h"
27+
28+
PG_MODULE_MAGIC;
29+
30+
PG_FUNCTION_INFO_V1(uuid_sequence_nextval);
31+
PG_FUNCTION_INFO_V1(uuid_time_nextval);
32+
33+
/*
34+
* uuid_sequence_nextval
35+
* generate sequential UUID using a sequence
36+
*
37+
* The sequence-based sequential UUID generator define the group size
38+
* and group count based on number of UUIDs generated.
39+
*
40+
* The block_size (65546 by default) determines the number of UUIDs with
41+
* the same prefix, and block_count (65536 by default) determines the
42+
* number of blocks before wrapping around to 0. This means that with
43+
* the default values, the generator wraps around every ~2B UUIDs.
44+
*
45+
* You may increase (or rather decrease) the parameters if needed, e.g,
46+
* by lowering the block size to 256, in wich case the cycle interval
47+
* is only 16M values.
48+
*/
49+
Datum
50+
uuid_sequence_nextval(PG_FUNCTION_ARGS)
51+
{
52+
int i;
53+
int64 val;
54+
Oid relid = PG_GETARG_OID(0);
55+
int32 block_size = PG_GETARG_INT32(1);
56+
int32 block_count = PG_GETARG_INT32(2);
57+
int64 prefix_bytes;
58+
pg_uuid_t *uuid;
59+
unsigned char *p;
60+
61+
/* some basic sanity checks */
62+
if (block_size < 0)
63+
ereport(ERROR,
64+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
65+
errmsg("block size must be a positive integer")));
66+
67+
if (block_count < 0)
68+
ereport(ERROR,
69+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
70+
errmsg("number of blocks must be a positive integer")));
71+
72+
/* count the number of bytes to keep from the sequence value */
73+
prefix_bytes = 0;
74+
while (block_count > 1)
75+
{
76+
block_count /= 256;
77+
prefix_bytes++;
78+
}
79+
80+
/*
81+
* Read the next value from the sequence and get rid of the least
82+
* significant bytes.
83+
*/
84+
val = nextval_internal(relid, true);
85+
val /= block_size;
86+
87+
p = (unsigned char *) &val;
88+
89+
uuid = palloc(sizeof(pg_uuid_t));
90+
91+
/* copy the desired number of (least significant) bytes as prefix */
92+
for (i = 0; i < prefix_bytes; i++)
93+
uuid->data[i] = p[prefix_bytes - 1 - i];
94+
95+
/* generate the remaining bytes as random (use strong generator) */
96+
if(!pg_strong_random(uuid->data + prefix_bytes, UUID_LEN - prefix_bytes))
97+
ereport(ERROR,
98+
(errcode(ERRCODE_INTERNAL_ERROR),
99+
errmsg("could not generate random values")));
100+
101+
/*
102+
* Set the UUID version flags according to "version 4" (pseudorandom)
103+
* UUID, see http://tools.ietf.org/html/rfc4122#section-4.4
104+
*
105+
* This does reduce the randomness a bit, because it determines the
106+
* value of certain bits, but that should be negligible (certainly
107+
* compared to the reduction due to prefix).
108+
*
109+
* UUID v4 is probably the safest choice here. There is v1 which is
110+
* time-based, but it includes MAC address (which we don't use) and
111+
* works with very special timestamp (starting at 1582 etc.). So we
112+
* just use v4 and claim this is pseudorandom.
113+
*/
114+
uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */
115+
uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
116+
117+
PG_RETURN_UUID_P(uuid);
118+
}
119+
120+
/*
121+
* uuid_time_nextval
122+
* generate sequential UUID using current time
123+
*
124+
* The timestamp-based sequential UUID generator define the group size
125+
* and group count based on data extracted from current timestamp.
126+
*
127+
* The interval_length (60 seconds by default) is defined as number of
128+
* seconds where UUIDs share the same prefix). The prefix length is
129+
* determined by the number of intervals (65536 by default, i.e. 2B).
130+
* With these parameters the generator wraps around every ~45 days.
131+
*/
132+
Datum
133+
uuid_time_nextval(PG_FUNCTION_ARGS)
134+
{
135+
int i;
136+
struct timeval tv;
137+
int64 val;
138+
pg_uuid_t *uuid;
139+
int32 interval_length = PG_GETARG_INT32(0);
140+
int32 interval_count = PG_GETARG_INT32(1);
141+
int64 prefix_bytes;
142+
unsigned char *p;
143+
144+
/* some basic sanity checks */
145+
if (interval_length < 1)
146+
ereport(ERROR,
147+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
148+
errmsg("length of interval must be a positive integer")));
149+
150+
if (interval_count < 1)
151+
ereport(ERROR,
152+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
153+
errmsg("number of intervals must be a positive integer")));
154+
155+
if (gettimeofday(&tv, NULL) != 0)
156+
elog(ERROR, "gettimeofday call failed");
157+
158+
val = (tv.tv_sec / interval_length);
159+
160+
/* count the number of bytes to keep from the timestamp */
161+
prefix_bytes = 0;
162+
while (interval_count > 1)
163+
{
164+
interval_count /= 256;
165+
prefix_bytes++;
166+
}
167+
168+
p = (unsigned char *) &val;
169+
170+
uuid = palloc(sizeof(pg_uuid_t));
171+
172+
/* copy the desired number of (least significant) bytes as prefix */
173+
for (i = 0; i < prefix_bytes; i++)
174+
uuid->data[i] = p[prefix_bytes - 1 - i];
175+
176+
/* generate the remaining bytes as random (use strong generator) */
177+
if(!pg_strong_random(uuid->data + prefix_bytes, UUID_LEN - prefix_bytes))
178+
ereport(ERROR,
179+
(errcode(ERRCODE_INTERNAL_ERROR),
180+
errmsg("could not generate random values")));
181+
182+
/*
183+
* Set the UUID version flags according to "version 4" (pseudorandom)
184+
* UUID, see http://tools.ietf.org/html/rfc4122#section-4.4
185+
*
186+
* This does reduce the randomness a bit, because it determines the
187+
* value of certain bits, but that should be negligible (certainly
188+
* compared to the reduction due to prefix).
189+
*
190+
* UUID v4 is probably the safest choice here. There is v1 which is
191+
* time-based, but it includes MAC address (which we don't use) and
192+
* works with very special timestamp (starting at 1582 etc.). So we
193+
* just use v4 and claim this is pseudorandom.
194+
*/
195+
uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */
196+
uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
197+
198+
PG_RETURN_UUID_P(uuid);
199+
}

0 commit comments

Comments
 (0)