Skip to content

fix: (CDK) (Manifest) - Deduplicate common components to shared + ref #447

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 22 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a488ab3
deduplication version 1
bazarnov Mar 26, 2025
7d910ee
deduplication version 2
bazarnov Mar 26, 2025
691d16a
updated duplicates collection
bazarnov Mar 27, 2025
081e7a8
deduplicate most frequent tags, use existing refs if definitions.shar…
bazarnov Mar 31, 2025
180af86
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
bazarnov Mar 31, 2025
138b607
formatted"
bazarnov Mar 31, 2025
f10e601
updated to account type for the given duplicated key
bazarnov Mar 31, 2025
66fe38e
add the reduce_commons: true, for Connector Builder case
bazarnov Mar 31, 2025
8798042
enabled the reduce_commons: True for Connector Builder case
bazarnov Mar 31, 2025
1d425ee
refactorred and cleaned up the code, moved to use the class instead
bazarnov Apr 1, 2025
06b183a
formatted
bazarnov Apr 1, 2025
1fa891c
formatted
bazarnov Apr 1, 2025
00e31a7
cleaned up
bazarnov Apr 1, 2025
a5aba82
added the dedicated tests
bazarnov Apr 1, 2025
e017e92
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
bazarnov Apr 1, 2025
0e8394f
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
bazarnov Apr 2, 2025
9f7d498
formatted
bazarnov Apr 2, 2025
6ec240a
updated normalizer
bazarnov Apr 8, 2025
acdecdb
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
bazarnov Apr 8, 2025
5f5c6b1
attempt to fix the Connector Builder tests
bazarnov Apr 8, 2025
e97afa5
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
bazarnov Apr 11, 2025
be3bab1
revert test
bazarnov Apr 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1583,6 +1583,7 @@ definitions:
title: URL Base
description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
type: string
sharable: True
url_path_pattern:
title: URL Path Pattern
description: A regular expression pattern to match the URL path.
Expand Down Expand Up @@ -1841,6 +1842,7 @@ definitions:
- "{{ config['base_url'] or 'https://app.posthog.com'}}/api"
- "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups"
- "https://example.com/api/v1/resource/{{ next_page_token['id'] }}"
sharable: True
path:
title: URL Path
description: Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.
Expand Down Expand Up @@ -1872,6 +1874,7 @@ definitions:
- "$ref": "#/definitions/SessionTokenAuthenticator"
- "$ref": "#/definitions/LegacySessionTokenAuthenticator"
- "$ref": "#/definitions/SelectiveAuthenticator"
sharable: True
error_handler:
title: Error Handler
description: Error handler component that defines how to handle errors.
Expand Down
49 changes: 32 additions & 17 deletions airbyte_cdk/sources/declarative/manifest_declarative_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import (
ManifestComponentTransformer,
)
from airbyte_cdk.sources.declarative.parsers.manifest_normalizer import (
ManifestNormalizer,
)
from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import (
ManifestReferenceResolver,
)
Expand All @@ -57,6 +60,24 @@
from airbyte_cdk.utils.traced_exception import AirbyteTracedException


def _get_declarative_component_schema() -> Dict[str, Any]:
try:
raw_component_schema = pkgutil.get_data(
"airbyte_cdk", "sources/declarative/declarative_component_schema.yaml"
)
if raw_component_schema is not None:
declarative_component_schema = yaml.load(raw_component_schema, Loader=yaml.SafeLoader)
return declarative_component_schema # type: ignore
else:
raise RuntimeError(
"Failed to read manifest component json schema required for deduplication"
)
except FileNotFoundError as e:
raise FileNotFoundError(
f"Failed to read manifest component json schema required for deduplication: {e}"
)


class ManifestDeclarativeSource(DeclarativeSource):
"""Declarative source defined by a manifest of low-code components that define source connector behavior"""

Expand All @@ -78,6 +99,8 @@ def __init__(
component_factory: optional factory if ModelToComponentFactory's default behavior needs to be tweaked.
"""
self.logger = logging.getLogger(f"airbyte.{self.name}")

self._declarative_component_schema = _get_declarative_component_schema()
# For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing
manifest = dict(source_config)
if "type" not in manifest:
Expand All @@ -87,6 +110,14 @@ def __init__(
self.components_module: ModuleType | None = get_registered_components_module(config=config)

resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest)

if emit_connector_builder_messages:
# reduce commonalities in the manifest after the references have been resolved,
# used mostly for Connector Builder use cases.
resolved_source_config = ManifestNormalizer(
resolved_source_config, self._declarative_component_schema
).normalize()

propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters(
"", resolved_source_config, {}
)
Expand Down Expand Up @@ -266,22 +297,6 @@ def _validate_source(self) -> None:
"""
Validates the connector manifest against the declarative component schema
"""
try:
raw_component_schema = pkgutil.get_data(
"airbyte_cdk", "sources/declarative/declarative_component_schema.yaml"
)
if raw_component_schema is not None:
declarative_component_schema = yaml.load(
raw_component_schema, Loader=yaml.SafeLoader
)
else:
raise RuntimeError(
"Failed to read manifest component json schema required for validation"
)
except FileNotFoundError as e:
raise FileNotFoundError(
f"Failed to read manifest component json schema required for validation: {e}"
)

streams = self._source_config.get("streams")
dynamic_streams = self._source_config.get("dynamic_streams")
Expand All @@ -291,7 +306,7 @@ def _validate_source(self) -> None:
)

try:
validate(self._source_config, declarative_component_schema)
validate(self._source_config, self._declarative_component_schema)
except ValidationError as e:
raise ValidationError(
"Validation against json schema defined in declarative_component_schema.yaml schema failed"
Expand Down
9 changes: 9 additions & 0 deletions airbyte_cdk/sources/declarative/parsers/custom_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,12 @@ class UndefinedReferenceException(Exception):

def __init__(self, path: str, reference: str) -> None:
super().__init__(f"Undefined reference {reference} from {path}")


class ManifestNormalizationException(Exception):
"""
Raised when a circular reference is detected in a manifest.
"""

def __init__(self, message: str) -> None:
super().__init__(f"Failed to deduplicate manifest: {message}")
Loading
Loading