Merge branch 'main' into test-all-types-duckdb

duckdb · Feb 14, 2025 · b259a10 · b259a10
2 parents 18b655d + 64685a4
commit b259a10
Show file tree

Hide file tree

Showing 49 changed files with 810 additions and 280 deletions.
diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml
@@ -22,7 +22,7 @@ jobs:
       - name: Install clang-format and ruff
         run: python3 -m pip install -r dev_requirements.txt
       - name: Run clang-format
-        run: git clang-format refs/remotes/origin/main --diff
+        run: find src include -iname '*.hpp' -o -iname '*.h' -o -iname '*.cpp' -o -iname '*.c' | xargs git clang-format --diff origin/main
       - name: Run ruff check
         run: ruff check --output-format=github .
       - name: Run ruff format

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -1,5 +1,9 @@
 name: Build Docker
 
+permissions:
+  id-token: write
+  contents: write
+
 on:
   push:
     tags: ["v*"]
@@ -13,40 +17,114 @@ on:
   workflow_dispatch:
 
 jobs:
-  docker:
-    name: Build Docker
-    runs-on: ubuntu-24.04
+  docker_build:
+    name: Build Docker image for Postgres ${{ matrix.postgres }} on ${{ matrix.runner }}
     strategy:
       matrix:
         postgres: ["14", "15", "16", "17"]
+        runner: ["ubuntu-24.04", "ubuntu-24.04-arm"]
 
+    runs-on: ${{ matrix.runner }}
+
+    env:
+      BUILDKIT_PROGRESS: plain
+      POSTGRES_VERSION: ${{ matrix.postgres }}
+    outputs:
+      branch_tag: ${{ steps.params.outputs.branch_tag }}
+      target_repo: ${{ steps.params.outputs.target_repo }}
     steps:
       - name: Login to Docker Hub
         uses: docker/login-action@v3
         with:
           username: pgduckdb
           password: ${{ secrets.DOCKERHUB_TOKEN }}
+
       - name: Checkout pg_duckdb extension code
         uses: actions/checkout@v4
         with:
           submodules: "recursive"
-      - name: Set env
+
+      - name: Compute job parameters
+        id: params
         run: |
-          echo "POSTGRES_VERSION=${{ matrix.postgres }}" >> $GITHUB_ENV
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
+          # Tag is XX-YYYYY-<branch>-latest so 16 + branch name length
+          # since maximum docker tag is 128 characters, we need to truncate the branch name to 112
+          BRANCH=$(echo "${{ github.head_ref || github.ref_name }}" \
+              | sed 's/[^a-zA-Z0-9\-\.]/-/g' \
+              | cut -c 1-112 | tr '[:upper:]' '[:lower:]' \
+              | sed -e 's/-*$//')
+
+          # Set platform depending on which runner we're using
+          if [ "${{ matrix.runner }}" = "ubuntu-24.04" ]; then
+            PLATFORM=amd64
+          else
+            PLATFORM=arm64
+          fi
+
+          # If main or tag, then push to `pgduckdb/pgduckdb`
+          git fetch --tags --force
+          if [ "$BRANCH" = "main" ] || git rev-parse --verify $BRANCH^{tag} > /dev/null 2>&1; then
+            TARGET_REPO='pgduckdb/pgduckdb'
+          else
+            TARGET_REPO='pgduckdb/ci-builds'
+          fi
+
+          echo "platform=$PLATFORM" >> "$GITHUB_OUTPUT"
+          echo "branch_tag=$BRANCH" >> "$GITHUB_OUTPUT"
+          echo "target_repo=$TARGET_REPO" >> "$GITHUB_OUTPUT"
+          echo "latest_image=pgduckdb/ci-builds:${{ matrix.postgres }}-${PLATFORM}-${BRANCH}-latest" >> "$GITHUB_OUTPUT"
+
+      - name: Attempt to pull previous image
+        run: |
+          docker pull ${{ steps.params.outputs.latest_image }} || true
+          docker pull moby/buildkit:buildx-stable-1
+
       - name: Set up Docker buildx
         uses: docker/setup-buildx-action@v3
         with:
-          platforms: linux/amd64,linux/arm64
+          platforms: linux/${{ steps.params.outputs.platform }}
+
       - name: docker bake
         uses: docker/bake-action@v5
         with:
           targets: pg_duckdb_${{ matrix.postgres }}
           push: true
           set: |
-            *.platform=linux/amd64,linux/arm64
-            *.cache-to=type=gha,mode=max
-            *.cache-from=type=gha
-            postgres.tags=pgduckdb/pgduckdb:${{ matrix.postgres }}-${{ github.sha }}
-            ${{ !contains(github.ref_name, '/') && format('postgres.tags=pgduckdb/pgduckdb:{0}-{1}', matrix.postgres, github.ref_name) || '' }}
+            *.platform=linux/${{ steps.params.outputs.platform }}
+            *.cache-from=type=registry,ref=${{ steps.params.outputs.latest_image }}
+            *.cache-from=type=gha,scope=${{ github.workflow }}
+            *.cache-to=type=gha,mode=max,scope=${{ github.workflow }}
+            postgres.tags=pgduckdb/ci-builds:${{ matrix.postgres }}-${{ steps.params.outputs.platform }}-${{ github.sha }}
+            postgres.tags=${{ steps.params.outputs.latest_image }}
+
+  docker_merge:
+    name: Merge Docker image for Postgres ${{ matrix.postgres }}
+    strategy:
+      matrix:
+        postgres: ["14", "15", "16", "17"]
+
+    runs-on: ubuntu-24.04
+    needs: docker_build
+    steps:
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: pgduckdb
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Merge images
+        run: |
+          docker pull --platform linux/amd64 pgduckdb/ci-builds:${{ matrix.postgres }}-amd64-${{ github.sha }}
+          docker pull --platform linux/arm64 pgduckdb/ci-builds:${{ matrix.postgres }}-arm64-${{ github.sha }}
+
+          BRANCH="${{ needs.docker_build.outputs.branch_tag }}"
+          TARGET_REPO="${{ needs.docker_build.outputs.target_repo }}"
+
+          echo "Will push merged image to '${TARGET_REPO}'."
+          docker buildx imagetools create \
+            --tag     ${TARGET_REPO}:${{ matrix.postgres }}-${BRANCH}         \
+            --tag pgduckdb/ci-builds:${{ matrix.postgres }}-${{ github.sha }} \
+            pgduckdb/ci-builds:${{ matrix.postgres }}-amd64-${{ github.sha }} \
+            pgduckdb/ci-builds:${{ matrix.postgres }}-arm64-${{ github.sha }}
+
+          docker buildx imagetools inspect pgduckdb/ci-builds:${{ matrix.postgres }}-${{ github.sha }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,68 @@
+# 0.3.1 (2025-02-13)
+
+## Fixed
+
+- Fixed CI so docker images are built and pushed to Docker Hub for tags. ([#589])
+
+[#589]: https://github.com/duckdb/pg_duckdb/pull/589
+
+# 0.3.0 (2025-02-13)
+
+## Added
+
+- Support using Postgres indexes and reading from partitioned tables. ([#477])
+- The `AS (id bigint, name text)` syntax is no longer supported when using `read_parquet`, `iceberg_scan`, etc. The new syntax is as follows: ([#531])
+
+  ```sql
+  SELECT * FROM read_parquet('file.parquet');
+  SELECT r['id'], r['name'] FROM read_parquet('file.parquet') r WHERE r['age'] > 21;
+  ```
+
+- Add a `duckdb.query` function which allows using DuckDB query syntax in Postgres. ([#531])
+- Support the `approx_count_distinct` DuckDB aggregate. ([#499])
+- Support the `bytea` (aka blob), `uhugeint`,`jsonb`, `timestamp_ns`, `timestamp_ms`, `timestamp_s` & `interval` types. ([#511], [#525], [#513], [#534], [(#573)])
+- Support DuckDB [json functions and aggregates](https://duckdb.org/docs/data/json/json_functions.html). ([#546])
+- Add support for the `duckdb.allow_community_extensions` setting.
+- We have an official logo! 🎉 ([#575])
+
+## Changed
+
+- Update to DuckDB 1.2.0. ([#548])
+- Allow executing `duckdb.raw_query`, `duckdb.cache_info`, `duckdb.cache_delete` and `duckdb.recycle_db` as non-superusers. ([#572])
+- Only sync MotherDuck catalogs when there is DuckDB query activity. ([#582])
+
+## Fixed
+
+- Correctly parse parameter lists in `COPY` commands. This allows using `PARTITION_BY` as one of the `COPY` options. ([#465])
+- Correctly read cache metadata for files larger than 4GB. ([#494])
+- Fix bug in parameter handling for prepared statements and PL/pgSQL functions. ([#491])
+- Fix comparisons and operators on the `timestamp with timezone` field by enabling DuckDB its `icu` extension by default. ([#512])
+- Allow using `read_parquet` functions when not using superuser privileges. ([#550])
+- Fix some case insensitivity issues when reading from Postgres tables. ([#563])
+- Fix case where cancel requests (e.g. triggered by pressing Ctrl+C in `psql`) would be ignored ([#548], [#584], [#587])
+
+[#477]: https://github.com/duckdb/pg_duckdb/pull/477
+[#531]: https://github.com/duckdb/pg_duckdb/pull/531
+[#499]: https://github.com/duckdb/pg_duckdb/pull/499
+[#511]: https://github.com/duckdb/pg_duckdb/pull/511
+[#525]: https://github.com/duckdb/pg_duckdb/pull/525
+[#513]: https://github.com/duckdb/pg_duckdb/pull/513
+[#534]: https://github.com/duckdb/pg_duckdb/pull/534
+[#573]: https://github.com/duckdb/pg_duckdb/pull/573
+[#546]: https://github.com/duckdb/pg_duckdb/pull/546
+[#575]: https://github.com/duckdb/pg_duckdb/pull/575
+[#548]: https://github.com/duckdb/pg_duckdb/pull/548
+[#572]: https://github.com/duckdb/pg_duckdb/pull/572
+[#582]: https://github.com/duckdb/pg_duckdb/pull/582
+[#465]: https://github.com/duckdb/pg_duckdb/pull/465
+[#494]: https://github.com/duckdb/pg_duckdb/pull/494
+[#491]: https://github.com/duckdb/pg_duckdb/pull/491
+[#512]: https://github.com/duckdb/pg_duckdb/pull/512
+[#550]: https://github.com/duckdb/pg_duckdb/pull/550
+[#563]: https://github.com/duckdb/pg_duckdb/pull/563
+[#584]: https://github.com/duckdb/pg_duckdb/pull/584
+[#587]: https://github.com/duckdb/pg_duckdb/pull/587
+
 # 0.2.0 (2024-12-10)
 
 ## Added

diff --git a/Dockerfile b/Dockerfile
@@ -27,7 +27,7 @@ USER postgres
 # Selectively copy the files that we need. Sadly we need separate COPY commands
 # for each directory, because docker puts only the contents of the source
 # directory into the target directory, and not the directory itself too.
-COPY --chown=postgres:postgres Makefile Makefile.global pg_duckdb.control .
+COPY --chown=postgres:postgres Makefile Makefile.global pg_duckdb.control ./
 COPY --chown=postgres:postgres .git/modules/third_party/duckdb/HEAD .git/modules/third_party/duckdb/HEAD
 COPY --chown=postgres:postgres sql sql
 COPY --chown=postgres:postgres src src

diff --git a/Makefile b/Makefile
@@ -13,7 +13,7 @@ OBJS += $(subst .c,.o, $(C_SRCS))
 # set to `make` to disable ninja
 DUCKDB_GEN ?= ninja
 # used to know what version of extensions to download
-DUCKDB_VERSION = v1.1.3
+DUCKDB_VERSION = v1.2.0
 # duckdb build tweaks
 DUCKDB_CMAKE_VARS = -DBUILD_SHELL=0 -DBUILD_PYTHON=0 -DBUILD_UNITTESTS=0
 # set to 1 to disable asserts in DuckDB. This is particularly useful in combinition with MotherDuck.
@@ -119,7 +119,7 @@ lintcheck:
 	ruff check
 
 format:
-	git clang-format origin/main
+	find src include -iname '*.hpp' -o -iname '*.h' -o -iname '*.cpp' -o -iname '*.c' | xargs git clang-format origin/main
 	ruff format
 
 format-all:

diff --git a/README.md b/README.md
@@ -1,8 +1,11 @@
 <p align="center">
-    <img width="500" src="logo.png" alt="temporary logo" />
+    <picture>
+        <source media="(prefers-color-scheme: dark)" srcset="logo-dark.svg">
+        <img width="800" src="logo-light.svg" alt="pg_duckdb logo" />
+    </picture>
 </p>
 
-0.2.0 release is here 🎉 Please [try](#installation) it out!
+0.3.0 release is here 🎉 Please [try](#installation) it out!
 
 # pg_duckdb: Official Postgres extension for DuckDB
 
@@ -19,9 +22,9 @@ See our [official documentation][docs] for further details.
 	- If DuckDB cannot support the query for any reason, execution falls back to Postgres.
 - Read and Write support for object storage (AWS S3, Azure, Cloudflare R2, or Google GCS):
 	- Read parquet, CSV and JSON files:
-		- `SELECT n FROM read_parquet('s3://bucket/file.parquet') AS (n int)`
-		- `SELECT n FROM read_csv('s3://bucket/file.csv') AS (n int)`
-		- `SELECT n FROM read_json('s3://bucket/file.json') AS (n int)`
+		- `SELECT * FROM read_parquet('s3://bucket/file.parquet')`
+		- `SELECT r['id'], r['name'] FROM read_csv('s3://bucket/file.csv') r`
+		- `SELECT count(*) FROM read_json('s3://bucket/file.json')`
 		- You can pass globs and arrays to these functions, just like in DuckDB
 	- Enable the DuckDB Iceberg extension using `SELECT duckdb.install_extension('iceberg')` and read Iceberg files with `iceberg_scan`.
 	- Enable the DuckDB Delta extension using `SELECT duckdb.install_extension('delta')` and read Delta files with `delta_scan`.
@@ -32,8 +35,8 @@ See our [official documentation][docs] for further details.
 
 			```sql
 			COPY (
-				SELECT count(*), name
-				FROM read_parquet('s3://bucket/file.parquet') AS (name text)
+				SELECT count(*), r['name']
+				FROM read_parquet('s3://bucket/file.parquet') r
 				GROUP BY name
 				ORDER BY count DESC
 			) TO 's3://bucket/results.parquet';
@@ -149,9 +152,8 @@ Querying data stored in Parquet, CSV, JSON, Iceberg and Delta format can be done
 3. Perform analytics on your data.
 
 	```sql
-	SELECT SUM(price) AS total, item_id
-	FROM read_parquet('s3://your-bucket/purchases.parquet')
-	  AS (price float, item_id int)
+	SELECT SUM(r['price']) AS total, r['item_id']
+	FROM read_parquet('s3://your-bucket/purchases.parquet') r
 	GROUP BY item_id
 	ORDER BY total DESC
 	LIMIT 100;