Skip to content

Commit 858a83e

Browse files
authored
Update the library for Data Package (v2) (#13)
* Added sources.version * Added sources to DCAT mapper * Added `contributor.given/familyName` * Replace `contributor.role` by `contributor.roles` * Added contributor.role mapper * Support resource.url (v0) * Support field.format (v0) * Renamed `IDict` to `IData` * Rebased on mode_validator for compat * Updated Table Dialect * Added `schema.fieldsMatch` * Moved `field` model to its own folder * Simplified foreignKey * Better separate models/types * Added `schema.uniqueKeys` * Added `list` field type * Added `constraints.jsonSchema` * Support `groupChart` for integers * Added exclusive constraints * Support `primaryKey` from v1 * Improved compat code * Support `foreignKeys` from v1 * Removed profile rules * Removed metadata profile * Added changelog * Simplified dialect defaults * Updated profiles * Removed version from compat * Rebased on `$schema` property * Removed profile model * Fixed plugin tests * Updated actions * Fixed tests * Fixed typo * Updated changelog * Fixed CI tests * Fixed CI tests
1 parent 8f1a9e9 commit 858a83e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+8028
-670
lines changed

.github/workflows/general.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
- name: Prepare variables
3434
run: cp .env.example .env
3535
- name: Test software
36-
run: hatch run ci:test +py=${{ matrix.py || matrix.python-version }}
36+
run: hatch run +py=${{ matrix.py || matrix.python-version }} ci:test
3737
- name: Report coverage
3838
uses: codecov/codecov-action@v2
3939

@@ -55,7 +55,7 @@ jobs:
5555
run: cp .env.example .env
5656
- name: Test software
5757
# https://stackoverflow.com/questions/9678408/cant-install-psycopg2-with-pip-in-virtualenv-on-mac-os-x-10-7
58-
run: LDFLAGS=`echo $(pg_config --ldflags)` hatch run ci:test
58+
run: LDFLAGS=`echo $(pg_config --ldflags)` hatch run +py=3.10 ci:test
5959

6060
# Test (Windows)
6161

@@ -74,7 +74,7 @@ jobs:
7474
- name: Prepare variables
7575
run: cp .env.example .env
7676
- name: Test software
77-
run: hatch run ci:test
77+
run: hatch run +py=3.10 ci:test
7878

7979
# Deploy
8080

data/package-custom-profile.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"profile": "https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/profiles/required.json",
2+
"$schema": "https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/profiles/required.json",
33
"name": "name",
44
"resources": [
55
{

docs/changelog.md

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Changelog
2+
3+
This document covers main `dplib-py` releases:
4+
5+
## v0.7
6+
7+
- Updated to Data Package (v2)
8+
9+
## v0.6
10+
11+
- Initial public release

dplib/actions/dialect/check.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from ..metadata.check import check_metadata
99

1010

11-
def check_dialect(dialect: Union[str, types.IDict, Dialect]) -> List[MetadataError]:
11+
def check_dialect(dialect: Union[str, types.IData, Dialect]) -> List[MetadataError]:
1212
"""Check the validity of a Table Dialect descriptor
1313
1414
This validates the descriptor against the JSON Schema profiles to ensure

dplib/actions/metadata/check.py

+17-13
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,32 @@
22

33
from typing import List, Union
44

5-
from ... import types
5+
from ... import settings, types
66
from ...errors.metadata import MetadataError
77
from ...helpers.data import read_data
8-
from ...helpers.path import is_remote_path
9-
from ...helpers.profile import check_metadata_against_jsonschema, read_profile
10-
from ...models import Profile
8+
from ...helpers.profile import check_profile
119

1210

1311
def check_metadata(
14-
metadata: Union[str, types.IDict], *, type: str
12+
metadata: Union[str, types.IData], *, type: types.IMetadataType
1513
) -> List[MetadataError]:
1614
if isinstance(metadata, str):
1715
metadata = read_data(metadata)
1816

19-
# Base profile
20-
profile = Profile.from_dict(read_profile(metadata_type=type))
21-
errors = check_metadata_against_jsonschema(metadata, profile.jsonSchema)
17+
# Get default profile
18+
if type == "dialect":
19+
default_profile = settings.PROFILE_DEFAULT_DIALECT
20+
elif type == "package":
21+
default_profile = settings.PROFILE_DEFAULT_PACKAGE
22+
elif type == "resource":
23+
default_profile = settings.PROFILE_DEFAULT_RESOURCE
24+
elif type == "schema":
25+
default_profile = settings.PROFILE_DEFAULT_SCHEMA
26+
else:
27+
raise ValueError(f"Invalid metadata type: {type}")
2228

23-
# Custom profile
24-
custom_profile = metadata.get("profile")
25-
if custom_profile and is_remote_path(custom_profile):
26-
custom_profile = Profile.from_path(custom_profile)
27-
errors += check_metadata_against_jsonschema(metadata, custom_profile.jsonSchema)
29+
# Validate metadata
30+
profile = metadata.get("$schema", default_profile)
31+
errors = check_profile(metadata=metadata, profile=profile)
2832

2933
return errors

dplib/actions/package/__spec__/test_check.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,8 @@ def test_check_package_invalid_dereferencing():
2323
errors = check_package("data/package-invalid-dereferencing.json")
2424
assert len(errors) == 1
2525
error = errors[0]
26-
assert (
27-
error.full_message == "[/resources/0/dialect/delimiter] 1 is not of type 'string'"
28-
)
26+
# TODO: extend error path so it shows the full path from the package root
27+
assert error.full_message == "[/delimiter] 1 is not of type 'string'"
2928

3029

3130
@pytest.mark.vcr

dplib/actions/package/check.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from ..metadata.check import check_metadata
1111

1212

13-
def check_package(package: Union[str, types.IDict, Package]) -> List[MetadataError]:
13+
def check_package(package: Union[str, types.IData, Package]) -> List[MetadataError]:
1414
"""Check the validity of a Data Package descriptor
1515
1616
This validates the descriptor against the JSON Schema profiles to ensure
@@ -30,13 +30,15 @@ def check_package(package: Union[str, types.IDict, Package]) -> List[MetadataErr
3030
basepath = package.basepath
3131
package = package.to_dict()
3232

33-
# Dereference resources[].dialect/schema
33+
# Validate (including nested descriptors)
34+
errors = check_metadata(package, type="package")
3435
resources = package.get("resources", [])
3536
if isinstance(resources, list):
3637
for resource in resources: # type: ignore
37-
for name in ["dialect", "schema"]:
38-
value = resource.get(name) # type: ignore
39-
if value and isinstance(value, str):
40-
resource[name] = read_data(value, basepath=basepath)
38+
for type in ["dialect", "schema"]:
39+
value = resource.get(type) # type: ignore
40+
if isinstance(value, str):
41+
metadata = read_data(value, basepath=basepath)
42+
errors.extend(check_metadata(metadata, type=type)) # type: ignore
4143

42-
return check_metadata(package, type="package")
44+
return errors

dplib/actions/resource/__spec__/test_check.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def test_check_resource_invalid_dereferencing():
2121
errors = check_resource("data/resource-invalid-dereferencing.json")
2222
assert len(errors) == 1
2323
error = errors[0]
24-
assert error.full_message == "[/dialect/delimiter] 1 is not of type 'string'"
24+
# TODO: extend error path so it shows the full path from the resource root
25+
assert error.full_message == "[/delimiter] 1 is not of type 'string'"
2526

2627

2728
def test_check_resource_from_model():

dplib/actions/resource/check.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from ..metadata.check import check_metadata
1111

1212

13-
def check_resource(resource: Union[str, types.IDict, Resource]) -> List[MetadataError]:
13+
def check_resource(resource: Union[str, types.IData, Resource]) -> List[MetadataError]:
1414
"""Check the validity of a Data Resource descriptor
1515
1616
This validates the descriptor against the JSON Schema profiles to ensure
@@ -30,10 +30,12 @@ def check_resource(resource: Union[str, types.IDict, Resource]) -> List[Metadata
3030
basepath = resource.basepath
3131
resource = resource.to_dict()
3232

33-
# Dereference dialect/schema
34-
for name in ["dialect", "schema"]:
35-
value = resource.get(name)
36-
if value and isinstance(value, str):
37-
resource[name] = read_data(value, basepath=basepath)
33+
# Validate (including nested descriptors)
34+
errors = check_metadata(resource, type="resource")
35+
for type in ["dialect", "schema"]:
36+
value = resource.get(type)
37+
if isinstance(value, str):
38+
metadata = read_data(value, basepath=basepath)
39+
errors.extend(check_metadata(metadata, type=type)) # type: ignore
3840

39-
return check_metadata(resource, type="resource")
41+
return errors

dplib/actions/schema/check.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from ..metadata.check import check_metadata
99

1010

11-
def check_schema(schema: Union[str, types.IDict, Schema]) -> List[MetadataError]:
11+
def check_schema(schema: Union[str, types.IData, Schema]) -> List[MetadataError]:
1212
"""Check the validity of a Table Schema descriptor
1313
1414
This validates the descriptor against the JSON Schema profiles to ensure

dplib/helpers/data.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,22 @@
1212

1313
def read_data(
1414
path: str, *, format: Optional[str] = None, basepath: Optional[str] = None
15-
) -> types.IDict:
15+
) -> types.IData:
1616
if not format:
1717
format = infer_format(path, raise_missing=True)
1818
text = read_file(path, basepath=basepath)
1919
data = load_data(text, format=format)
2020
return data
2121

2222

23-
def write_data(path: str, data: types.IDict, *, format: Optional[str] = None):
23+
def write_data(path: str, data: types.IData, *, format: Optional[str] = None):
2424
if not format:
2525
format = infer_format(path, raise_missing=True)
2626
text = dump_data(data, format=format)
2727
write_file(path, text)
2828

2929

30-
def load_data(text: str, *, format: str) -> types.IDict:
30+
def load_data(text: str, *, format: str) -> types.IData:
3131
try:
3232
if format == "json":
3333
return json.loads(text)
@@ -39,7 +39,7 @@ def load_data(text: str, *, format: str) -> types.IDict:
3939
raise Error(f"Cannot load data from text with format: {format}")
4040

4141

42-
def dump_data(data: types.IDict, *, format: str) -> str:
42+
def dump_data(data: types.IData, *, format: str) -> str:
4343
try:
4444
if format == "json":
4545
return json.dumps(data, indent=2)
@@ -51,7 +51,7 @@ def dump_data(data: types.IDict, *, format: str) -> str:
5151
raise Error(f"Cannot dump data to text with format: {format}")
5252

5353

54-
def clean_data(data: types.IDict):
54+
def clean_data(data: types.IData):
5555
for key, value in list(data.items()):
5656
if isinstance(value, dict):
5757
clean_data(value) # type: ignore

dplib/helpers/profile.py

+42-27
Original file line numberDiff line numberDiff line change
@@ -6,44 +6,59 @@
66

77
from jsonschema.validators import validator_for # type: ignore
88

9-
from .. import types
9+
from .. import settings, types
1010
from ..error import Error
1111
from ..errors.metadata import MetadataError
1212
from .data import load_data
1313
from .file import read_file
1414

15+
# TODO: implement additional user-side profile caching
1516

16-
def select_profile(*, metadata_type: types.IMetadataType) -> str:
17-
if metadata_type == "package":
18-
return "data-package"
19-
elif metadata_type == "resource":
20-
return "data-resource"
21-
elif metadata_type == "dialect":
22-
return "table-dialect"
23-
elif metadata_type == "schema":
24-
return "table-schema"
25-
raise Error(f'Invalid metadata type "{metadata_type}"')
17+
18+
def check_profile(*, metadata: types.IData, profile: str) -> List[MetadataError]:
19+
# Prepare validator
20+
jsonSchema = read_profile(profile=profile)
21+
Validator = validator_for(jsonSchema) # type: ignore
22+
validator = Validator(jsonSchema) # type: ignore
23+
24+
# Validate metadata
25+
errors: List[MetadataError] = []
26+
for validation_error in validator.iter_errors(metadata): # type: ignore
27+
errors.append(MetadataError(validation_error)) # type: ignore
28+
29+
return errors
2630

2731

2832
@lru_cache
29-
def read_profile(*, metadata_type: types.IMetadataType) -> types.IDict:
30-
format = "json"
31-
name = select_profile(metadata_type=metadata_type)
32-
path = os.path.join(os.path.dirname(__file__), "..", "profiles", f"{name}.{format}")
33+
def read_profile(*, profile: str) -> types.IData:
34+
parts = parse_profile(profile)
35+
36+
# Replace with builtin copy
37+
if parts:
38+
version, filename = parts
39+
profile = os.path.join(settings.PROFILE_BASEDIR, version, filename)
40+
41+
# Read jsonSchema
3342
try:
34-
text = read_file(path)
35-
data = load_data(text, format=format)
43+
text = read_file(profile)
44+
data = load_data(text, format="json")
3645
except Exception:
37-
raise Error(f'Cannot read profile "{name}" at "{path}"')
46+
raise Error(f'Cannot read profile: "{profile}"')
47+
3848
return data
3949

4050

41-
def check_metadata_against_jsonschema(
42-
metadata: types.IDict, jsonSchema: types.IDict
43-
) -> List[MetadataError]:
44-
Validator = validator_for(jsonSchema) # type: ignore
45-
validator = Validator(jsonSchema) # type: ignore
46-
errors: List[MetadataError] = []
47-
for validation_error in validator.iter_errors(metadata): # type: ignore
48-
errors.append(MetadataError(validation_error)) # type: ignore
49-
return errors
51+
def parse_profile(profile: str):
52+
parts = profile.rsplit("/", 3)
53+
54+
# Ensure builtin copy exists
55+
if len(parts) != 3:
56+
return None
57+
if parts[0] != settings.PROFILE_BASEURL:
58+
return None
59+
if parts[1] not in os.listdir(settings.PROFILE_BASEDIR):
60+
return None
61+
if parts[2] not in os.listdir(os.path.join(settings.PROFILE_BASEDIR, parts[1])):
62+
return None
63+
64+
return parts[1], parts[2]

dplib/model.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import pprint
44
import warnings
5-
from functools import cached_property
65
from typing import Optional
76

87
from pydantic import BaseModel
@@ -22,8 +21,8 @@ def __str__(self) -> str:
2221
def __repr__(self) -> str:
2322
return pprint.pformat(self.to_dict(), sort_dicts=False)
2423

25-
@cached_property
26-
def custom(self) -> types.IDict:
24+
@property
25+
def custom(self) -> types.IData:
2726
assert self.model_extra is not None
2827
return self.model_extra
2928

@@ -63,7 +62,7 @@ def to_dict(self):
6362
return data
6463

6564
@classmethod
66-
def from_dict(cls, data: types.IDict, *, basepath: Optional[str] = None) -> Self:
65+
def from_dict(cls, data: types.IData, *, basepath: Optional[str] = None) -> Self:
6766
if basepath and cls.model_fields.get("basepath"):
6867
data["basepath"] = basepath
6968
return cls(**data)

dplib/models/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from .contributor import Contributor
22
from .dialect import Dialect
3+
from .field import Constraints, Field
34
from .license import License
45
from .package import Package
5-
from .profile import Profile
66
from .resource import Resource
7-
from .schema import Constraints, Field, ForeignKey, ForeignKeyReference, Schema
7+
from .schema import ForeignKey, ForeignKeyReference, Schema
88
from .source import Source

dplib/models/contributor.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,32 @@
1-
from typing import Optional
1+
from typing import List, Optional
22

3+
import pydantic
4+
5+
from .. import types
36
from ..model import Model
47

58

69
class Contributor(Model):
710
title: Optional[str] = None
11+
givenName: Optional[str] = None
12+
familyName: Optional[str] = None
813
path: Optional[str] = None
914
email: Optional[str] = None
10-
role: Optional[str] = None
15+
roles: List[str] = []
1116
organization: Optional[str] = None
17+
18+
# Compat
19+
20+
@pydantic.model_validator(mode="before")
21+
@classmethod
22+
def compat(cls, data: types.IData):
23+
if not isinstance(data, dict): # type: ignore
24+
return data
25+
26+
# contributor.role
27+
if not data.get("roles"):
28+
role = data.pop("role", None)
29+
if role:
30+
data["roles"] = [role]
31+
32+
return data

0 commit comments

Comments
 (0)