Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support the PostgreSQL domain #532

Merged
merged 11 commits into from
Feb 24, 2025
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pg_duckdb was developed in collaboration with our partners, [Hydra][] and [Mothe
See our [official documentation][docs] for further details.

- `SELECT` queries executed by the DuckDB engine can directly read Postgres tables. (If you only query Postgres tables you need to run `SET duckdb.force_execution TO true`, see the **IMPORTANT** section above for details)
- Able to read [data types](https://www.postgresql.org/docs/current/datatype.html) that exist in both Postgres and DuckDB. The following data types are supported: numeric, character, binary, date/time, boolean, uuid, json, and arrays.
- Able to read [data types](https://www.postgresql.org/docs/current/datatype.html) that exist in both Postgres and DuckDB. The following data types are supported: numeric, character, binary, date/time, boolean, uuid, json, domain, and arrays.
- If DuckDB cannot support the query for any reason, execution falls back to Postgres.
- Read and Write support for object storage (AWS S3, Azure, Cloudflare R2, or Google GCS):
- Read parquet, CSV and JSON files:
Expand Down
3 changes: 2 additions & 1 deletion docs/types.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Able to read many [data types](https://www.postgresql.org/docs/current/datatype.
- `boolean`
- `uuid`
- `json`/`jsonb`
- `domain`
- `arrays` for all of the above types, but see limitations below about multi-dimensional arrays

## Known limitations
Expand All @@ -31,7 +32,7 @@ to fix these limitations:
-- This configures the column to be a 3-dimensional array of text
ALTER TABLE s ALTER COLUMN a SET DATA TYPE text[][][];
```

9. For the `domain` actually, during the execution of the INSERT operation, the check regarding `domain` is conducted by PostgreSQL rather than DuckDB. When we execute the SELECT operation and the type of the queried field is a `domain`, we will convert it to the corresponding base type and let DuckDB handle it.
## Special types

pg_duckdb introduces a few special Postgres types. You shouldn't create these types explicitly and normally you don't need to know about their existence, but they might show up in error messages from Postgres. These are explained below:
Expand Down
3 changes: 3 additions & 0 deletions include/pgduckdb/pg/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@

namespace pgduckdb::pg {
bool IsArrayType(Oid type_oid);
bool IsDomainType(Oid type_oid);
bool IsArrayDomainType(Oid type_oid);
Oid GetBaseDuckColumnType(Oid attribute_typoid);
}
36 changes: 36 additions & 0 deletions src/pg/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
extern "C" {
#include "postgres.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "catalog/pg_type.h"
#include "executor/tuptable.h"
}

namespace pgduckdb::pg {
Expand All @@ -13,4 +16,37 @@ IsArrayType(Oid type_oid) {
return PostgresFunctionGuard(get_element_type, type_oid) != InvalidOid;
}

bool
IsDomainType(Oid type_oid) {
return PostgresFunctionGuard(get_typtype, type_oid) == TYPTYPE_DOMAIN;
}

bool
IsArrayDomainType(Oid type_oid) {
bool is_array_domain = false;
if (IsDomainType(type_oid)) {
if (PostgresFunctionGuard(get_base_element_type, type_oid) != InvalidOid) {
is_array_domain = true;
}
}
return is_array_domain;
}

Oid
GetBaseDuckColumnType(Oid attribute_typoid) {
std::lock_guard<std::recursive_mutex> lock(pgduckdb::GlobalProcessLock::GetLock());
Oid typoid = attribute_typoid;
if (get_typtype(attribute_typoid) == TYPTYPE_DOMAIN) {
/* It is a domain type that needs to be reduced to its base type */
typoid = getBaseType(attribute_typoid);
} else if (type_is_array(attribute_typoid)) {
Oid eltoid = get_base_element_type(attribute_typoid);
if (OidIsValid(eltoid) && get_typtype(eltoid) == TYPTYPE_DOMAIN) {
/* When the member type of an array is domain, you need to build a base array type */
typoid = get_array_type(getBaseType(eltoid));
}
}
return typoid;
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bunch of these functions can throw Postgres errors (like get_typtype and getBaseType). They all need to be wrapped in PostgresFunctionGuard. It's probably easiest to create a small wrapper function like this:

Suggested change
Oid
GetBaseDuckColumnType(Oid attribute_typoid) {
std::lock_guard<std::recursive_mutex> lock(pgduckdb::GlobalProcessLock::GetLock());
Oid typoid = attribute_typoid;
if (get_typtype(attribute_typoid) == TYPTYPE_DOMAIN) {
/* It is a domain type that needs to be reduced to its base type */
typoid = getBaseType(attribute_typoid);
} else if (type_is_array(attribute_typoid)) {
Oid eltoid = get_base_element_type(attribute_typoid);
if (OidIsValid(eltoid) && get_typtype(eltoid) == TYPTYPE_DOMAIN) {
/* When the member type of an array is domain, you need to build a base array type */
typoid = get_array_type(getBaseType(eltoid));
}
}
return typoid;
}
static Oid
GetBaseDuckColumnType_C(Oid attribute_typoid) {
std::lock_guard<std::recursive_mutex> lock(pgduckdb::GlobalProcessLock::GetLock());
Oid typoid = attribute_typoid;
if (get_typtype(attribute_typoid) == TYPTYPE_DOMAIN) {
/* It is a domain type that needs to be reduced to its base type */
typoid = getBaseType(attribute_typoid);
} else if (type_is_array(attribute_typoid)) {
Oid eltoid = get_base_element_type(attribute_typoid);
if (OidIsValid(eltoid) && get_typtype(eltoid) == TYPTYPE_DOMAIN) {
/* When the member type of an array is domain, you need to build a base array type */
typoid = get_array_type(getBaseType(eltoid));
}
}
return typoid;
}
Oid
GetBaseDuckColumnType(Oid attribute_type_oid) {
return PostgresFunctionGuard(GetBaseDuckColumnType_C, attribute_type_oid);
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, thank you very much for your guidance. I feel that the lock in the c function seems to have no effect at this time, so I try to remove it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally agreed.


} // namespace pgduckdb::pg
9 changes: 6 additions & 3 deletions src/pgduckdb_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ extern "C" {
}

#include "pgduckdb/pgduckdb_detoast.hpp"
#include "pgduckdb/pgduckdb_process_lock.hpp"

namespace pgduckdb {

Expand Down Expand Up @@ -909,7 +910,8 @@ numeric_typmod_scale(int32 typmod) {

duckdb::LogicalType
ConvertPostgresToBaseDuckColumnType(Form_pg_attribute &attribute) {
switch (attribute->atttypid) {
Oid typoid = pg::GetBaseDuckColumnType(attribute->atttypid);
switch (typoid) {
case BOOLOID:
case BOOLARRAYOID:
return duckdb::LogicalTypeId::BOOLEAN;
Expand Down Expand Up @@ -983,9 +985,10 @@ ConvertPostgresToBaseDuckColumnType(Form_pg_attribute &attribute) {
duckdb::LogicalType
ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) {
auto base_type = ConvertPostgresToBaseDuckColumnType(attribute);

if (!pg::IsArrayType(attribute->atttypid)) {
return base_type;
if (!pg::IsArrayDomainType(attribute->atttypid)) {
return base_type;
}
}

auto dimensions = attribute->attndims;
Expand Down
95 changes: 95 additions & 0 deletions test/regression/expected/domain.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
create domain domainvarchar varchar(5) check (value is not null);
create domain domainnumeric numeric(8,2) check (value is not null);
create domain domainint4 int4 check (value > 0);
create domain domaintext text check (value is not null);
-- Test tables using domains
create table basictest
( testint4 domainint4
, testtext domaintext
, testvarchar domainvarchar
, testnumeric domainnumeric
);
-- In fact, when we carry out an INSERT operation, the check regarding "domain" is conducted by PostgreSQL rather than DuckDB.
-- When we execute a SELECT operation and the field of the query is "domain", we will convert it to the corresponding base type
-- and let DuckDB handle it.
INSERT INTO basictest values ('88', 'haha', 'short', '123.12'); -- Good
INSERT INTO basictest values ('88', 'haha', 'short text', '123.12'); -- Bad varchar
ERROR: value too long for type character varying(5)
INSERT INTO basictest values ('88', 'haha', 'short', '123.1212'); -- Truncate numeric
-- not support. It will be converted to the following statement
-- SELECT ('-5'::integer)::domainint4 AS domainint4 FROM pgduckdb.xxx.basictest
SELECT (-5)::domainint4 FROM basictest;
WARNING: (PGDuckDB/CreatePlan) Prepared query returned an error: 'Catalog Error: Type with name domainint4 does not exist!
Did you mean "tinyint"?
ERROR: value for domain domainint4 violates check constraint "domainint4_check"
select * from basictest;
testint4 | testtext | testvarchar | testnumeric
----------+----------+-------------+-------------
88 | haha | short | 123.12
88 | haha | short | 123.12
(2 rows)

select testtext || testvarchar as concat, testnumeric + 42 as sum
from basictest;
concat | sum
-----------+---------
hahashort | 165.120
hahashort | 165.120
(2 rows)

select * from basictest where testtext = 'haha';
testint4 | testtext | testvarchar | testnumeric
----------+----------+-------------+-------------
88 | haha | short | 123.12
88 | haha | short | 123.12
(2 rows)

select * from basictest where testvarchar = 'short';
testint4 | testtext | testvarchar | testnumeric
----------+----------+-------------+-------------
88 | haha | short | 123.12
88 | haha | short | 123.12
(2 rows)

-- array_domain
create domain domain_int_array as INT[];
CREATE TABLE domain_int_array_1d(a domain_int_array);
INSERT INTO domain_int_array_1d SELECT CAST(a as domain_int_array) FROM (VALUES
('{1, 2, 3}'),
(NULL),
('{4, 5, NULL, 7}'),
('{}')
) t(a);
SELECT * FROM domain_int_array_1d;
a
--------------
{1,2,3}

{4,5,NULL,7}
{}
(4 rows)

CREATE TABLE domain_int_array_2d(a domainint4[]);
INSERT INTO domain_int_array_2d SELECT CAST(a as domain_int_array) FROM (VALUES
('{1, 2, 3}'),
(NULL),
('{4, 5, NULL, 7}'),
('{}')
) t(a);
SELECT * FROM domain_int_array_2d;
a
--------------
{1,2,3}

{4,5,NULL,7}
{}
(4 rows)

drop table domain_int_array_2d;
drop table domain_int_array_1d;
drop domain domain_int_array;
drop table basictest;
drop domain domainvarchar restrict;
drop domain domainnumeric restrict;
drop domain domainint4 restrict;
drop domain domaintext;
1 change: 1 addition & 0 deletions test/regression/schedule
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ test: hugeint_conversion
test: read_functions
test: duckdb_only_functions
test: duckdb_recycle
test: domain
test: cte
test: create_schema
test: create_table_as
Expand Down
60 changes: 60 additions & 0 deletions test/regression/sql/domain.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
create domain domainvarchar varchar(5) check (value is not null);
create domain domainnumeric numeric(8,2) check (value is not null);
create domain domainint4 int4 check (value > 0);
create domain domaintext text check (value is not null);

-- Test tables using domains
create table basictest
( testint4 domainint4
, testtext domaintext
, testvarchar domainvarchar
, testnumeric domainnumeric
);

-- In fact, when we carry out an INSERT operation, the check regarding "domain" is conducted by PostgreSQL rather than DuckDB.
-- When we execute a SELECT operation and the field of the query is "domain", we will convert it to the corresponding base type
-- and let DuckDB handle it.
INSERT INTO basictest values ('88', 'haha', 'short', '123.12'); -- Good
INSERT INTO basictest values ('88', 'haha', 'short text', '123.12'); -- Bad varchar
INSERT INTO basictest values ('88', 'haha', 'short', '123.1212'); -- Truncate numeric

-- not support. It will be converted to the following statement
-- SELECT ('-5'::integer)::domainint4 AS domainint4 FROM pgduckdb.xxx.basictest
SELECT (-5)::domainint4 FROM basictest;

select * from basictest;

select testtext || testvarchar as concat, testnumeric + 42 as sum
from basictest;

select * from basictest where testtext = 'haha';
select * from basictest where testvarchar = 'short';

-- array_domain
create domain domain_int_array as INT[];
CREATE TABLE domain_int_array_1d(a domain_int_array);
INSERT INTO domain_int_array_1d SELECT CAST(a as domain_int_array) FROM (VALUES
('{1, 2, 3}'),
(NULL),
('{4, 5, NULL, 7}'),
('{}')
) t(a);
SELECT * FROM domain_int_array_1d;

CREATE TABLE domain_int_array_2d(a domainint4[]);
INSERT INTO domain_int_array_2d SELECT CAST(a as domain_int_array) FROM (VALUES
('{1, 2, 3}'),
(NULL),
('{4, 5, NULL, 7}'),
('{}')
) t(a);
SELECT * FROM domain_int_array_2d;

drop table domain_int_array_2d;
drop table domain_int_array_1d;
drop domain domain_int_array;
drop table basictest;
drop domain domainvarchar restrict;
drop domain domainnumeric restrict;
drop domain domainint4 restrict;
drop domain domaintext;