Skip to content

Commit 6658624

Browse files
authored
PARTITION KEY support (asg017#122)
* initial pass at PARTITION KEY support. * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now
1 parent ee36547 commit 6658624

16 files changed

+1522
-245
lines changed

.github/workflows/test.yaml

+80-55
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,100 @@ on:
33
push:
44
branches:
55
- main
6+
- partition-by
67
permissions:
78
contents: read
89
jobs:
910
build-linux-x86_64-extension:
1011
runs-on: ubuntu-20.04
1112
steps:
1213
- uses: actions/checkout@v4
13-
- uses: actions/setup-python@v5
14+
- uses: astral-sh/setup-uv@v3
1415
with:
15-
python-version: "3.12"
16+
enable-cache: true
1617
- run: ./scripts/vendor.sh
1718
- run: make loadable static
18-
- run: pip install pytest numpy; make test-loadable
19+
- run: uv sync --directory tests
20+
- run: make test-loadable python=./tests/.venv/bin/python
1921
- uses: actions/upload-artifact@v4
2022
with:
2123
name: sqlite-vec-linux-x86_64-extension
2224
path: dist/*
25+
build-macos-x86_64-extension:
26+
runs-on: macos-12
27+
steps:
28+
- uses: actions/checkout@v4
29+
- uses: astral-sh/setup-uv@v3
30+
with:
31+
enable-cache: true
32+
- run: uv python install 3.12
33+
- run: ./scripts/vendor.sh
34+
- run: make loadable static
35+
- run: uv sync --directory tests
36+
- run: make test-loadable python=./tests/.venv/bin/python
37+
- uses: actions/upload-artifact@v4
38+
with:
39+
name: sqlite-vec-macos-x86_64-extension
40+
path: dist/*
41+
build-macos-aarch64-extension:
42+
runs-on: macos-14
43+
steps:
44+
- uses: actions/checkout@v4
45+
- uses: astral-sh/setup-uv@v3
46+
with:
47+
enable-cache: true
48+
- run: ./scripts/vendor.sh
49+
- run: make loadable static
50+
- run: uv sync --directory tests
51+
- run: make test-loadable python=./tests/.venv/bin/python
52+
- uses: actions/upload-artifact@v4
53+
with:
54+
name: sqlite-vec-macos-aarch64-extension
55+
path: dist/*
56+
build-windows-x86_64-extension:
57+
runs-on: windows-2019
58+
steps:
59+
- uses: actions/checkout@v4
60+
- uses: ilammy/msvc-dev-cmd@v1
61+
- uses: astral-sh/setup-uv@v3
62+
with:
63+
enable-cache: true
64+
- run: ./scripts/vendor.sh
65+
shell: bash
66+
- run: make sqlite-vec.h
67+
- run: mkdir dist
68+
- run: cl.exe /fPIC -shared /W4 /Ivendor/ /O2 /LD sqlite-vec.c -o dist/vec0.dll
69+
- run: uv sync --directory tests
70+
- run: make test-loadable python=./tests/.venv/Scripts/python.exe
71+
shell: bash
72+
- uses: actions/upload-artifact@v4
73+
with:
74+
name: sqlite-vec-windows-x86_64-extension
75+
path: dist/*
76+
build-linux-aarch64-extension:
77+
runs-on: ubuntu-latest
78+
steps:
79+
- uses: green-coding-solutions/eco-ci-energy-estimation@v4
80+
with:
81+
task: start-measurement
82+
- uses: actions/checkout@v4
83+
with:
84+
version: "latest"
85+
- run: sudo apt-get install gcc-arm-linux-gnueabihf
86+
- run: ./scripts/vendor.sh
87+
- run: make sqlite-vec.h
88+
- run: make CC=arm-linux-gnueabihf-gcc loadable static
89+
- uses: actions/upload-artifact@v4
90+
with:
91+
name: sqlite-vec-linux-aarch64-extension
92+
path: dist/*
93+
- uses: green-coding-solutions/eco-ci-energy-estimation@v4
94+
with:
95+
task: get-measurement
96+
label: "all"
97+
- uses: green-coding-solutions/eco-ci-energy-estimation@v4
98+
with:
99+
task: display-results
23100
build-android-extensions:
24101
runs-on: ubuntu-latest
25102
strategy:
@@ -98,58 +175,6 @@ jobs:
98175
with:
99176
name: sqlite-vec-${{ matrix.platforms.name }}-extension
100177
path: dist/*
101-
build-macos-x86_64-extension:
102-
runs-on: macos-12
103-
steps:
104-
- uses: actions/checkout@v4
105-
- run: ./scripts/vendor.sh
106-
- run: make loadable static
107-
- run: /usr/local/opt/python@3/libexec/bin/python -m pip install --break-system-packages pytest numpy; make test-loadable python=/usr/local/opt/python@3/libexec/bin/python
108-
- uses: actions/upload-artifact@v4
109-
with:
110-
name: sqlite-vec-macos-x86_64-extension
111-
path: dist/*
112-
build-macos-aarch64-extension:
113-
runs-on: macos-14
114-
steps:
115-
- uses: actions/checkout@v4
116-
- run: ./scripts/vendor.sh
117-
- run: make loadable static
118-
- run: /opt/homebrew/opt/python3/libexec/bin/python -m pip install pytest numpy --break-system-packages; make test-loadable python=/opt/homebrew/opt/python3/libexec/bin/python
119-
- uses: actions/upload-artifact@v4
120-
with:
121-
name: sqlite-vec-macos-aarch64-extension
122-
path: dist/*
123-
build-windows-x86_64-extension:
124-
runs-on: windows-2019
125-
steps:
126-
- uses: actions/checkout@v4
127-
- uses: ilammy/msvc-dev-cmd@v1
128-
- uses: actions/setup-python@v5
129-
with:
130-
python-version: "3.12"
131-
- run: ./scripts/vendor.sh
132-
shell: bash
133-
- run: make sqlite-vec.h
134-
- run: mkdir dist
135-
- run: cl.exe /fPIC -shared /W4 /Ivendor/ /O2 /LD sqlite-vec.c -o dist/vec0.dll
136-
- run: pip install pytest numpy; make test-loadable
137-
- uses: actions/upload-artifact@v4
138-
with:
139-
name: sqlite-vec-windows-x86_64-extension
140-
path: dist/*
141-
build-linux-aarch64-extension:
142-
runs-on: ubuntu-latest
143-
steps:
144-
- uses: actions/checkout@v4
145-
- run: sudo apt-get install gcc-arm-linux-gnueabihf
146-
- run: ./scripts/vendor.sh
147-
- run: make sqlite-vec.h
148-
- run: make CC=arm-linux-gnueabihf-gcc loadable static
149-
- uses: actions/upload-artifact@v4
150-
with:
151-
name: sqlite-vec-linux-aarch64-extension
152-
path: dist/*
153178
build-wasm32-emscripten:
154179
runs-on: ubuntu-latest
155180
steps:

.gitignore

-1
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,3 @@ sqlite-vec.h
2626
tmp/
2727

2828
poetry.lock
29-
pyproject.toml

ARCHITECTURE.md

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
## `vec0`
2+
3+
### idxStr
4+
5+
The `vec0` idxStr is a string composed of single "header" character and 0 or
6+
more "blocks" of 4 characters each.
7+
8+
The "header" charcter denotes the type of query plan, as determined by the
9+
`enum vec0_query_plan` values. The current possible values are:
10+
11+
| Name | Value | Description |
12+
| -------------------------- | ----- | ---------------------------------------------------------------------- |
13+
| `VEC0_QUERY_PLAN_FULLSCAN` | `'1'` | Perform a full-scan on all rows |
14+
| `VEC0_QUERY_PLAN_POINT` | `'2'` | Perform a single-lookup point query for the provided rowid |
15+
| `VEC0_QUERY_PLAN_KNN` | `'3'` | Perform a KNN-style query on the provided query vector and parameters. |
16+
17+
Each 4-character "block" is associated with a corresponding value in `argv[]`. For example, the 1st block at byte offset `1-4` (inclusive) is the 1st block and is associated with `argv[1]`. The 2nd block at byte offset `5-8` (inclusive) is associated with `argv[2]` and so on. Each block describes what kind of value or filter the given `argv[i]` value is.
18+
19+
20+
#### `VEC0_IDXSTR_KIND_KNN_MATCH` (`'{'`)
21+
22+
`argv[i]` is the query vector of the KNN query.
23+
24+
The remaining 3 characters of the block are `_` fillers.
25+
26+
#### `VEC0_IDXSTR_KIND_KNN_K` (`'}'`)
27+
28+
`argv[i]` is the limit/k value of the KNN query.
29+
30+
The remaining 3 characters of the block are `_` fillers.
31+
32+
#### `VEC0_IDXSTR_KIND_KNN_ROWID_IN` (`'['`)
33+
34+
`argv[i]` is the optional `rowid in (...)` value, and must be handled with [`sqlite3_vtab_in_first()` /
35+
`sqlite3_vtab_in_next()`](https://www.sqlite.org/c3ref/vtab_in_first.html).
36+
37+
The remaining 3 characters of the block are `_` fillers.
38+
39+
#### `VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT` (`']'`)
40+
41+
`argv[i]` is a "constraint" on a specific partition key.
42+
43+
The second character of the block denotes which partition key to filter on, using `A` to denote the first partition key column, `B` for the second, etc. It is encoded with `'A' + partition_idx` and can be decoded with `c - 'A'`.
44+
45+
The third character of the block denotes which operator is used in the constraint. It will be one of the values of `enum vec0_partition_operator`, as only a subset of operations are supported on partition keys.
46+
47+
The fourth character of the block is a `_` filler.
48+
49+
50+
#### `VEC0_IDXSTR_KIND_POINT_ID` (`'!'`)
51+
52+
`argv[i]` is the value of the rowid or id to match against for the point query.
53+
54+
The remaining 3 characters of the block are `_` fillers.

Makefile

+5-2
Original file line numberDiff line numberDiff line change
@@ -185,13 +185,16 @@ publish-release:
185185

186186
# -k test_vec0_update
187187
test-loadable: loadable
188-
$(PYTHON) -m pytest -vv -s -x tests/test-loadable.py
188+
$(PYTHON) -m pytest -vv -s -x tests/test-*.py
189189

190190
test-loadable-snapshot-update: loadable
191191
$(PYTHON) -m pytest -vv tests/test-loadable.py --snapshot-update
192192

193193
test-loadable-watch:
194-
watchexec -w sqlite-vec.c -w tests/test-loadable.py -w Makefile --clear -- make test-loadable
194+
watchexec --exts c,py,Makefile --clear -- make test-loadable
195+
196+
test-unit:
197+
$(CC) tests/test-unit.c sqlite-vec.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit
195198

196199
site-dev:
197200
npm --prefix site run dev

TODO

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# partition
2+
3+
- [ ] UPDATE on partition key values
4+
- remove previous row from chunk, insert into new one?
5+
- [ ] properly sqlite3_vtab_nochange / sqlite3_value_nochange handling

0 commit comments

Comments
 (0)