airbytehq · maxi297 · May 1, 2025 · May 2, 2025 · May 2, 2025 · May 2, 2025
diff --git a/airbyte_cdk/destinations/destination.py b/airbyte_cdk/destinations/destination.py
@@ -10,24 +10,93 @@
 from typing import Any, Iterable, List, Mapping
 
 import orjson
+from airbyte_protocol_dataclasses.models import (
+    AirbyteMessage,
+    ConfiguredAirbyteCatalog,
+    DestinationCatalog,
+    Type,
+)
 
 from airbyte_cdk.connector import Connector
 from airbyte_cdk.exception_handler import init_uncaught_exception_handler
 from airbyte_cdk.models import (
-    AirbyteMessage,
     AirbyteMessageSerializer,
-    ConfiguredAirbyteCatalog,
     ConfiguredAirbyteCatalogSerializer,
-    Type,
 )
 from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit
 from airbyte_cdk.utils.traced_exception import AirbyteTracedException
 
 logger = logging.getLogger("airbyte")
 
 
+def parse_args(args: List[str]) -> argparse.Namespace:
+    """
+    :param args: commandline arguments
+    :return:
+    """
+
+    parent_parser = argparse.ArgumentParser(add_help=False)
+    parent_parser.add_argument(
+        "--debug", action="store_true", help="enables detailed debug logs related to the sync"
+    )
+    main_parser = argparse.ArgumentParser()
+    subparsers = main_parser.add_subparsers(title="commands", dest="command")
+
+    # spec
+    subparsers.add_parser(
+        "spec", help="outputs the json configuration specification", parents=[parent_parser]
+    )
+
+    # check
+    check_parser = subparsers.add_parser(
+        "check", help="checks the config can be used to connect", parents=[parent_parser]
+    )
+    required_check_parser = check_parser.add_argument_group("required named arguments")
+    required_check_parser.add_argument(
+        "--config", type=str, required=True, help="path to the json configuration file"
+    )
+
+    # discover
+    discover_parser = subparsers.add_parser(
+        "discover",
+        help="discover the objects available in the destination",
+        parents=[parent_parser],
+    )
+    required_discover_parser = discover_parser.add_argument_group("required named arguments")
+    required_discover_parser.add_argument(
+        "--config", type=str, required=True, help="path to the json configuration file"
+    )
+
+    # write
+    write_parser = subparsers.add_parser(
+        "write", help="Writes data to the destination", parents=[parent_parser]
+    )
+    write_required = write_parser.add_argument_group("required named arguments")
+    write_required.add_argument(
+        "--config", type=str, required=True, help="path to the JSON configuration file"
+    )
+    write_required.add_argument(
+        "--catalog", type=str, required=True, help="path to the configured catalog JSON file"
+    )
+
+    parsed_args = main_parser.parse_args(args)
+    cmd = parsed_args.command
+    if not cmd:
+        raise Exception("No command entered. ")
+    elif cmd not in ["spec", "check", "discover", "write"]:
+        # This is technically dead code since parse_args() would fail if this was the case
+        # But it's non-obvious enough to warrant placing it here anyways
+        raise Exception(f"Unknown command entered: {cmd}")
+
+    return parsed_args
+
+
 class Destination(Connector, ABC):
-    VALID_CMDS = {"spec", "check", "write"}
+    VALID_CMDS = {"spec", "check", "discover", "write"}
+
+    def discover(self) -> DestinationCatalog:
+        """Implement to define what objects are available in the destination"""
+        raise NotImplementedError("Discover method is not implemented")
 
     @abstractmethod
     def write(
@@ -68,52 +137,9 @@ def _run_write(
         )
         logger.info("Writing complete.")
 
-    def parse_args(self, args: List[str]) -> argparse.Namespace:
-        """
-        :param args: commandline arguments
-        :return:
-        """
-
-        parent_parser = argparse.ArgumentParser(add_help=False)
-        main_parser = argparse.ArgumentParser()
-        subparsers = main_parser.add_subparsers(title="commands", dest="command")
-
-        # spec
-        subparsers.add_parser(
-            "spec", help="outputs the json configuration specification", parents=[parent_parser]
-        )
-
-        # check
-        check_parser = subparsers.add_parser(
-            "check", help="checks the config can be used to connect", parents=[parent_parser]
-        )
-        required_check_parser = check_parser.add_argument_group("required named arguments")
-        required_check_parser.add_argument(
-            "--config", type=str, required=True, help="path to the json configuration file"
-        )
-
-        # write
-        write_parser = subparsers.add_parser(
-            "write", help="Writes data to the destination", parents=[parent_parser]
-        )
-        write_required = write_parser.add_argument_group("required named arguments")
-        write_required.add_argument(
-            "--config", type=str, required=True, help="path to the JSON configuration file"
-        )
-        write_required.add_argument(
-            "--catalog", type=str, required=True, help="path to the configured catalog JSON file"
-        )
-
-        parsed_args = main_parser.parse_args(args)
-        cmd = parsed_args.command
-        if not cmd:
-            raise Exception("No command entered. ")
-        elif cmd not in ["spec", "check", "write"]:
-            # This is technically dead code since parse_args() would fail if this was the case
-            # But it's non-obvious enough to warrant placing it here anyways
-            raise Exception(f"Unknown command entered: {cmd}")
-
-        return parsed_args
+    @staticmethod
+    def parse_args(args: List[str]) -> argparse.Namespace:
+        return parse_args(args)
 
     def run_cmd(self, parsed_args: argparse.Namespace) -> Iterable[AirbyteMessage]:
         cmd = parsed_args.command
@@ -137,6 +163,8 @@ def run_cmd(self, parsed_args: argparse.Namespace) -> Iterable[AirbyteMessage]:
 
         if cmd == "check":
             yield self._run_check(config=config)
+        elif cmd == "discover":
+            yield AirbyteMessage(type=Type.DESTINATION_CATALOG, destination_catalog=self.discover())
         elif cmd == "write":
             # Wrap in UTF-8 to override any other input encodings
             wrapped_stdin = io.TextIOWrapper(sys.stdin.buffer, encoding="utf-8")

diff --git a/airbyte_cdk/logger.py b/airbyte_cdk/logger.py
@@ -8,14 +8,14 @@
 from typing import Any, Callable, Mapping, Optional, Tuple
 
 import orjson
-
-from airbyte_cdk.models import (
+from airbyte_protocol_dataclasses.models import (
     AirbyteLogMessage,
     AirbyteMessage,
-    AirbyteMessageSerializer,
     Level,
     Type,
 )
+
+from airbyte_cdk.models import AirbyteMessageSerializer
 from airbyte_cdk.utils import PrintBuffer
 from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
 

diff --git a/airbyte_cdk/models/__init__.py b/airbyte_cdk/models/__init__.py
@@ -35,6 +35,8 @@
     ConfiguredAirbyteCatalog,
     ConfiguredAirbyteStream,
     ConnectorSpecification,
+    DestinationCatalog,
+    DestinationOperation,
     DestinationSyncMode,
     EstimateType,
     FailureType,

diff --git a/airbyte_cdk/models/airbyte_protocol.py b/airbyte_cdk/models/airbyte_protocol.py
@@ -2,6 +2,10 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 
+"""
+This file is necessary because the `AirbyteStateBlob` implementation in the protocol lib is incomplete and given we use the incomplete implementation, we will get `TypeError: AirbyteStateBlob.__init__() takes 1 positional argument but 2 were given`. Hence, we need to redefine all the classes that could serialize AirbyteStateBlob to use the CDK implementation, not the protocol lib one.
+"""
+
 from dataclasses import InitVar, dataclass
 from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
 
@@ -86,3 +90,4 @@ class AirbyteMessage:
     state: Optional[AirbyteStateMessage] = None
     trace: Optional[AirbyteTraceMessage] = None  # type: ignore [name-defined]
     control: Optional[AirbyteControlMessage] = None  # type: ignore [name-defined]
+    destination_catalog: Optional[DestinationCatalog] = None  # type: ignore [name-defined]
diff --git a/airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py b/airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py
@@ -1,11 +1,10 @@
 #
 # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
 #
-
-
+from abc import ABC, abstractmethod
 from copy import deepcopy
 from dataclasses import InitVar, dataclass, field
-from typing import Any, List, Mapping, MutableMapping, Optional, Union
+from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Union
 
 import dpath
 from typing_extensions import deprecated
@@ -16,7 +15,7 @@
 from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
 from airbyte_cdk.sources.declarative.transformations import RecordTransformation
 from airbyte_cdk.sources.source import ExperimentalClassWarning
-from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
+from airbyte_cdk.sources.types import Config
 
 AIRBYTE_DATA_TYPES: Mapping[str, MutableMapping[str, Any]] = {
     "string": {"type": ["null", "string"]},
@@ -114,6 +113,38 @@ def _update_pointer(
         )
 
 
+@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
+class AdditionalPropertyFieldsInferrer(ABC):
+    """
+    Infers additional fields to be added to each property. For example, if this inferrer returns {"toto": "tata"}, a property that would have looked like this:
+    ```
+        "properties": {
+            "Id": {
+                "type": ["null", "string"],
+            },
+            <...>
+        }
+    ```
+    ... will look like this:
+        ```
+        "properties": {
+            "Id": {
+                "type": ["null", "string"],
+                "toto": "tata"
+            },
+            <...>
+        }
+    ```
+    """
+
+    @abstractmethod
+    def infer(self, property_definition: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
+        """
+        Infers additional property fields from the given property definition.
+        """
+        pass
+
-@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
-class AdditionalPropertyFieldsInferrer(ABC):
-    """
-    Infers additional fields to be added to each property. For example, if this inferrer returns {"toto": "tata"}, a property that would have looked like this:
-    ```
-        "properties": {
-            "Id": {
-                "type": ["null", "string"],
-            },
-            <...>
-        }
-    ```
-    ... will look like this:
-        ```
-        "properties": {
-            "Id": {
-                "type": ["null", "string"],
-                "toto": "tata"
-            },
-            <...>
-        }
-    ```
-    """
-
-    @abstractmethod
-    def infer(self, property_definition: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
-        """
-        Infers additional property fields from the given property definition.
-        """
-        pass
+@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
+class AdditionalPropertyFieldsInferrer(ABC):
+    """
+    Infers additional fields to be added to each property. For example, if this inferrer returns {"toto": "tata"}, a property that would have looked like this:
-@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
-class AdditionalPropertyFieldsInferrer(ABC):
-    """
-    Infers additional fields to be added to each property. For example, if this inferrer returns {"toto": "tata"}, a property that would have looked like this:
-    ```
-        "properties": {
-            "Id": {
-                "type": ["null", "string"],
-            },
-            <...>
-        }
-    ```
-    ... will look like this:
-        ```
-        "properties": {
-            "Id": {
-                "type": ["null", "string"],
-                "toto": "tata"
-            },
-            <...>
-        }
-    ```
-    """
-
-    @abstractmethod
-    def infer(self, property_definition: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
-        """
-        Infers additional property fields from the given property definition.
-        """
-        pass
+@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
+class AdditionalPropertyFieldsInferrer(ABC):
+    """
+    Infers additional fields to be added to each property. For example, if this inferrer returns {"toto": "tata"}, a property that would have looked like this:
+
 @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
 @dataclass
 class DynamicSchemaLoader(SchemaLoader):
@@ -126,6 +157,8 @@ class DynamicSchemaLoader(SchemaLoader):
     parameters: InitVar[Mapping[str, Any]]
     schema_type_identifier: SchemaTypeIdentifier
     schema_transformations: List[RecordTransformation] = field(default_factory=lambda: [])
+    additional_property_fields_inferrer: Optional[AdditionalPropertyFieldsInferrer] = None
+    allow_additional_properties: bool = True
 
     def get_json_schema(self) -> Mapping[str, Any]:
         """
@@ -149,22 +182,26 @@ def get_json_schema(self) -> Mapping[str, Any]:
                 property_definition,
                 self.schema_type_identifier.type_pointer,
             )
+
+            value.update(
+                self.additional_property_fields_inferrer.infer(property_definition)
+                if self.additional_property_fields_inferrer
+                else {}
+            )
             properties[key] = value
 
-        transformed_properties = self._transform(properties, {})
+        transformed_properties = self._transform(properties)
 
         return {
             "$schema": "https://json-schema.org/draft-07/schema#",
             "type": "object",
-            "additionalProperties": True,
+            "additionalProperties": self.allow_additional_properties,
             "properties": transformed_properties,
         }
 
     def _transform(
         self,
         properties: Mapping[str, Any],
-        stream_state: StreamState,
-        stream_slice: Optional[StreamSlice] = None,
     ) -> Mapping[str, Any]:
         for transformation in self.schema_transformations:
             transformation.transform(
@@ -190,7 +227,7 @@ def _get_type(
         self,
         raw_schema: MutableMapping[str, Any],
         field_type_path: Optional[List[Union[InterpolatedString, str]]],
-    ) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]:
+    ) -> Dict[str, Any]:
         """
         Determines the JSON Schema type for a field, supporting nullable and combined types.
         """
@@ -220,7 +257,7 @@ def _get_type(
                 f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
             )
 
-    def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Mapping[str, Any]:
+    def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Dict[str, Any]:
         if not complex_type.items:
             return self._get_airbyte_type(complex_type.field_type)
 
@@ -255,14 +292,14 @@ def _replace_type_if_not_valid(
         return field_type
 
     @staticmethod
-    def _get_airbyte_type(field_type: str) -> MutableMapping[str, Any]:
+    def _get_airbyte_type(field_type: str) -> Dict[str, Any]:
         """
         Maps a field type to its corresponding Airbyte type definition.
         """
         if field_type not in AIRBYTE_DATA_TYPES:
             raise ValueError(f"Invalid Airbyte data type: {field_type}")
 
-        return deepcopy(AIRBYTE_DATA_TYPES[field_type])
+        return deepcopy(AIRBYTE_DATA_TYPES[field_type])  # type: ignore  # a copy of a dict should be a dict, not a MutableMapping
 
     def _extract_data(
         self,

diff --git a/airbyte_cdk/test/catalog_builder.py b/airbyte_cdk/test/catalog_builder.py
@@ -2,6 +2,8 @@
 
 from typing import Any, Dict, List, Union, overload
 
+from airbyte_protocol_dataclasses.models import DestinationSyncMode
+
 from airbyte_cdk.models import (
     ConfiguredAirbyteCatalog,
     ConfiguredAirbyteStream,
@@ -19,7 +21,7 @@ def __init__(self) -> None:
                 "supported_sync_modes": ["full_refresh", "incremental"],
                 "source_defined_primary_key": [["id"]],
             },
-            "primary_key": [["id"]],
+            "primary_key": None,
             "sync_mode": "full_refresh",
             "destination_sync_mode": "overwrite",
         }
@@ -32,6 +34,16 @@ def with_sync_mode(self, sync_mode: SyncMode) -> "ConfiguredAirbyteStreamBuilder
         self._stream["sync_mode"] = sync_mode.name
         return self
 
+    def with_destination_sync_mode(
+        self, sync_mode: DestinationSyncMode
+    ) -> "ConfiguredAirbyteStreamBuilder":
+        self._stream["destination_sync_mode"] = sync_mode.name
+        return self
+
+    def with_destination_object_name(self, name: str) -> "ConfiguredAirbyteStreamBuilder":
+        self._stream["destination_object_name"] = name
+        return self
+
     def with_primary_key(self, pk: List[List[str]]) -> "ConfiguredAirbyteStreamBuilder":
         self._stream["primary_key"] = pk
         self._stream["stream"]["source_defined_primary_key"] = pk  # type: ignore  # we assume that self._stream["stream"] is a Dict[str, Any]
@@ -58,7 +70,7 @@ def with_stream(self, name: str, sync_mode: SyncMode) -> "CatalogBuilder": ...
     def with_stream(
         self,
         name: Union[str, ConfiguredAirbyteStreamBuilder],
-        sync_mode: Union[SyncMode, None] = None,
+        sync_mode: SyncMode = SyncMode.full_refresh,
     ) -> "CatalogBuilder":
         # As we are introducing a fully fledge ConfiguredAirbyteStreamBuilder, we would like to deprecate the previous interface
         # with_stream(str, SyncMode)

diff --git a/airbyte_cdk/test/mock_http/request.py b/airbyte_cdk/test/mock_http/request.py
@@ -72,7 +72,11 @@ def _to_mapping(
         elif isinstance(body, bytes):
             return json.loads(body.decode())  # type: ignore  # assumes return type of Mapping[str, Any]
         elif isinstance(body, str):
-            return json.loads(body)  # type: ignore  # assumes return type of Mapping[str, Any]
+            try:
+                return json.loads(body)  # type: ignore  # assumes return type of Mapping[str, Any]
+            except json.JSONDecodeError:
+                # one of the body is a mapping while the other isn't so comparison should fail anyway
+                return None
         return None
 
     @staticmethod