Skip to content

Commit 4ea9d94

Browse files
lazebnyioctavia-squidington-iiiaaronsteers
authored
feat(low-code): add items and property mappings to dynamic schemas (#256)
Co-authored-by: octavia-squidington-iii <[email protected]> Co-authored-by: Aaron ("AJ") Steers <[email protected]>
1 parent 0d22bdd commit 4ea9d94

File tree

6 files changed

+123
-16
lines changed

6 files changed

+123
-16
lines changed

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1800,6 +1800,19 @@ definitions:
18001800
$parameters:
18011801
type: object
18021802
additionalProperties: true
1803+
ComplexFieldType:
1804+
title: Schema Field Type
1805+
description: (This component is experimental. Use at your own risk.) Represents a complex field type.
1806+
type: object
1807+
required:
1808+
- field_type
1809+
properties:
1810+
field_type:
1811+
type: string
1812+
items:
1813+
anyOf:
1814+
- type: string
1815+
- "$ref": "#/definitions/ComplexFieldType"
18031816
TypesMap:
18041817
title: Types Map
18051818
description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
@@ -1814,6 +1827,7 @@ definitions:
18141827
- type: array
18151828
items:
18161829
type: string
1830+
- "$ref": "#/definitions/ComplexFieldType"
18171831
current_type:
18181832
anyOf:
18191833
- type: string

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -736,8 +736,13 @@ class HttpResponseFilter(BaseModel):
736736
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
737737

738738

739+
class ComplexFieldType(BaseModel):
740+
field_type: str
741+
items: Optional[Union[str, ComplexFieldType]] = None
742+
743+
739744
class TypesMap(BaseModel):
740-
target_type: Union[str, List[str]]
745+
target_type: Union[str, List[str], ComplexFieldType]
741746
current_type: Union[str, List[str]]
742747
condition: Optional[str] = None
743748

@@ -2260,6 +2265,7 @@ class DynamicDeclarativeStream(BaseModel):
22602265
)
22612266

22622267

2268+
ComplexFieldType.update_forward_refs()
22632269
CompositeErrorHandler.update_forward_refs()
22642270
DeclarativeSource1.update_forward_refs()
22652271
DeclarativeSource2.update_forward_refs()

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@
133133
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
134134
CheckStream as CheckStreamModel,
135135
)
136+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
137+
ComplexFieldType as ComplexFieldTypeModel,
138+
)
136139
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
137140
ComponentMappingDefinition as ComponentMappingDefinitionModel,
138141
)
@@ -429,6 +432,7 @@
429432
SimpleRetrieverTestReadDecorator,
430433
)
431434
from airbyte_cdk.sources.declarative.schema import (
435+
ComplexFieldType,
432436
DefaultSchemaLoader,
433437
DynamicSchemaLoader,
434438
InlineSchemaLoader,
@@ -572,6 +576,7 @@ def _init_mappings(self) -> None:
572576
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
573577
SchemaTypeIdentifierModel: self.create_schema_type_identifier,
574578
TypesMapModel: self.create_types_map,
579+
ComplexFieldTypeModel: self.create_complex_field_type,
575580
JwtAuthenticatorModel: self.create_jwt_authenticator,
576581
LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
577582
ListPartitionRouterModel: self.create_list_partition_router,
@@ -1894,10 +1899,26 @@ def create_inline_schema_loader(
18941899
) -> InlineSchemaLoader:
18951900
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
18961901

1897-
@staticmethod
1898-
def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
1902+
def create_complex_field_type(
1903+
self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
1904+
) -> ComplexFieldType:
1905+
items = (
1906+
self._create_component_from_model(model=model.items, config=config)
1907+
if isinstance(model.items, ComplexFieldTypeModel)
1908+
else model.items
1909+
)
1910+
1911+
return ComplexFieldType(field_type=model.field_type, items=items)
1912+
1913+
def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
1914+
target_type = (
1915+
self._create_component_from_model(model=model.target_type, config=config)
1916+
if isinstance(model.target_type, ComplexFieldTypeModel)
1917+
else model.target_type
1918+
)
1919+
18991920
return TypesMap(
1900-
target_type=model.target_type,
1921+
target_type=target_type,
19011922
current_type=model.current_type,
19021923
condition=model.condition if model.condition is not None else "True",
19031924
)

airbyte_cdk/sources/declarative/schema/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from airbyte_cdk.sources.declarative.schema.default_schema_loader import DefaultSchemaLoader
66
from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import (
7+
ComplexFieldType,
78
DynamicSchemaLoader,
89
SchemaTypeIdentifier,
910
TypesMap,
@@ -18,6 +19,7 @@
1819
"SchemaLoader",
1920
"InlineSchemaLoader",
2021
"DynamicSchemaLoader",
22+
"ComplexFieldType",
2123
"TypesMap",
2224
"SchemaTypeIdentifier",
2325
]

airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from airbyte_cdk.sources.source import ExperimentalClassWarning
1919
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
2020

21-
AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
21+
AIRBYTE_DATA_TYPES: Mapping[str, MutableMapping[str, Any]] = {
2222
"string": {"type": ["null", "string"]},
2323
"boolean": {"type": ["null", "boolean"]},
2424
"date": {"type": ["null", "string"], "format": "date"},
@@ -45,14 +45,33 @@
4545
}
4646

4747

48+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
49+
@dataclass(frozen=True)
50+
class ComplexFieldType:
51+
"""
52+
Identifies complex field type
53+
"""
54+
55+
field_type: str
56+
items: Optional[Union[str, "ComplexFieldType"]] = None
57+
58+
def __post_init__(self) -> None:
59+
"""
60+
Enforces that `items` is only used when `field_type` is a array
61+
"""
62+
# `items_type` is valid only for array target types
63+
if self.items and self.field_type != "array":
64+
raise ValueError("'items' can only be used when 'field_type' is an array.")
65+
66+
4867
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
4968
@dataclass(frozen=True)
5069
class TypesMap:
5170
"""
5271
Represents a mapping between a current type and its corresponding target type.
5372
"""
5473

55-
target_type: Union[List[str], str]
74+
target_type: Union[List[str], str, ComplexFieldType]
5675
current_type: Union[List[str], str]
5776
condition: Optional[str]
5877

@@ -135,8 +154,9 @@ def get_json_schema(self) -> Mapping[str, Any]:
135154
transformed_properties = self._transform(properties, {})
136155

137156
return {
138-
"$schema": "http://json-schema.org/draft-07/schema#",
157+
"$schema": "https://json-schema.org/draft-07/schema#",
139158
"type": "object",
159+
"additionalProperties": True,
140160
"properties": transformed_properties,
141161
}
142162

@@ -188,18 +208,36 @@ def _get_type(
188208
first_type = self._get_airbyte_type(mapped_field_type[0])
189209
second_type = self._get_airbyte_type(mapped_field_type[1])
190210
return {"oneOf": [first_type, second_type]}
211+
191212
elif isinstance(mapped_field_type, str):
192213
return self._get_airbyte_type(mapped_field_type)
214+
215+
elif isinstance(mapped_field_type, ComplexFieldType):
216+
return self._resolve_complex_type(mapped_field_type)
217+
193218
else:
194219
raise ValueError(
195220
f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
196221
)
197222

223+
def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Mapping[str, Any]:
224+
if not complex_type.items:
225+
return self._get_airbyte_type(complex_type.field_type)
226+
227+
field_type = self._get_airbyte_type(complex_type.field_type)
228+
field_type["items"] = (
229+
self._get_airbyte_type(complex_type.items)
230+
if isinstance(complex_type.items, str)
231+
else self._resolve_complex_type(complex_type.items)
232+
)
233+
234+
return field_type
235+
198236
def _replace_type_if_not_valid(
199237
self,
200238
field_type: Union[List[str], str],
201239
raw_schema: MutableMapping[str, Any],
202-
) -> Union[List[str], str]:
240+
) -> Union[List[str], str, ComplexFieldType]:
203241
"""
204242
Replaces a field type if it matches a type mapping in `types_map`.
205243
"""
@@ -216,7 +254,7 @@ def _replace_type_if_not_valid(
216254
return field_type
217255

218256
@staticmethod
219-
def _get_airbyte_type(field_type: str) -> Mapping[str, Any]:
257+
def _get_airbyte_type(field_type: str) -> MutableMapping[str, Any]:
220258
"""
221259
Maps a field type to its corresponding Airbyte type definition.
222260
"""

unit_tests/sources/declarative/schema/test_dynamic_schema_loader.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,17 @@
8484
"schema_pointer": ["fields"],
8585
"key_pointer": ["name"],
8686
"type_pointer": ["type"],
87-
"types_mapping": [{"target_type": "string", "current_type": "singleLineText"}],
87+
"types_mapping": [
88+
{"target_type": "string", "current_type": "singleLineText"},
89+
{
90+
"target_type": {
91+
"field_type": "array",
92+
"items": {"field_type": "array", "items": "integer"},
93+
},
94+
"current_type": "formula",
95+
"condition": "{{ raw_schema['result']['type'] == 'customInteger' }}",
96+
},
97+
],
8898
},
8999
},
90100
},
@@ -150,7 +160,8 @@ def dynamic_schema_loader(mock_retriever, mock_schema_type_identifier):
150160
]
151161
),
152162
{
153-
"$schema": "http://json-schema.org/draft-07/schema#",
163+
"$schema": "https://json-schema.org/draft-07/schema#",
164+
"additionalProperties": True,
154165
"type": "object",
155166
"properties": {
156167
"name": {"type": ["null", "string"]},
@@ -171,7 +182,8 @@ def dynamic_schema_loader(mock_retriever, mock_schema_type_identifier):
171182
]
172183
),
173184
{
174-
"$schema": "http://json-schema.org/draft-07/schema#",
185+
"$schema": "https://json-schema.org/draft-07/schema#",
186+
"additionalProperties": True,
175187
"type": "object",
176188
"properties": {
177189
"name": {"type": ["null", "string"]},
@@ -191,7 +203,8 @@ def dynamic_schema_loader(mock_retriever, mock_schema_type_identifier):
191203
]
192204
),
193205
{
194-
"$schema": "http://json-schema.org/draft-07/schema#",
206+
"$schema": "https://json-schema.org/draft-07/schema#",
207+
"additionalProperties": True,
195208
"type": "object",
196209
"properties": {
197210
"address": {
@@ -204,7 +217,8 @@ def dynamic_schema_loader(mock_retriever, mock_schema_type_identifier):
204217
# Test case: Empty record set
205218
iter([]),
206219
{
207-
"$schema": "http://json-schema.org/draft-07/schema#",
220+
"$schema": "https://json-schema.org/draft-07/schema#",
221+
"additionalProperties": True,
208222
"type": "object",
209223
"properties": {},
210224
},
@@ -242,7 +256,8 @@ def test_dynamic_schema_loader_invalid_type(dynamic_schema_loader):
242256

243257
def test_dynamic_schema_loader_manifest_flow():
244258
expected_schema = {
245-
"$schema": "http://json-schema.org/draft-07/schema#",
259+
"$schema": "https://json-schema.org/draft-07/schema#",
260+
"additionalProperties": True,
246261
"type": "object",
247262
"properties": {
248263
"id": {"type": ["null", "integer"]},
@@ -314,7 +329,8 @@ def test_dynamic_schema_loader_with_type_conditions():
314329
]["types_mapping"].append({"target_type": "array", "current_type": "formula"})
315330

316331
expected_schema = {
317-
"$schema": "http://json-schema.org/draft-07/schema#",
332+
"$schema": "https://json-schema.org/draft-07/schema#",
333+
"additionalProperties": True,
318334
"type": "object",
319335
"properties": {
320336
"id": {"type": ["null", "integer"]},
@@ -324,6 +340,10 @@ def test_dynamic_schema_loader_with_type_conditions():
324340
"currency": {"type": ["null", "number"]},
325341
"salary": {"type": ["null", "number"]},
326342
"working_days": {"type": ["null", "array"]},
343+
"avg_salary": {
344+
"type": ["null", "array"],
345+
"items": {"type": ["null", "array"], "items": {"type": ["null", "integer"]}},
346+
},
327347
},
328348
}
329349
source = ConcurrentDeclarativeSource(
@@ -365,6 +385,12 @@ def test_dynamic_schema_loader_with_type_conditions():
365385
{"name": "FirstName", "type": "string"},
366386
{"name": "Description", "type": "singleLineText"},
367387
{"name": "Salary", "type": "formula", "result": {"type": "number"}},
388+
{
389+
"name": "AvgSalary",
390+
"type": "formula",
391+
"result": {"type": "customInteger"},
392+
},
393+
{"name": "Currency", "type": "formula", "result": {"type": "currency"}},
368394
{"name": "Currency", "type": "formula", "result": {"type": "currency"}},
369395
{"name": "WorkingDays", "type": "formula"},
370396
]

0 commit comments

Comments
 (0)