File tree 3 files changed +54
-3
lines changed
airbyte_cdk/sources/declarative
unit_tests/sources/declarative/incremental
3 files changed +54
-3
lines changed Original file line number Diff line number Diff line change 19
19
from airbyte_cdk .sources .declarative .extractors .record_filter import (
20
20
ClientSideIncrementalRecordFilterDecorator ,
21
21
)
22
- from airbyte_cdk .sources .declarative .incremental import ConcurrentPerPartitionCursor
22
+ from airbyte_cdk .sources .declarative .incremental import (
23
+ ConcurrentPerPartitionCursor ,
24
+ GlobalSubstreamCursor ,
25
+ )
23
26
from airbyte_cdk .sources .declarative .incremental .datetime_based_cursor import DatetimeBasedCursor
24
27
from airbyte_cdk .sources .declarative .incremental .per_partition_with_global import (
25
28
PerPartitionWithGlobalCursor ,
@@ -361,7 +364,8 @@ def _group_streams(
361
364
== DatetimeBasedCursorModel .__name__
362
365
and hasattr (declarative_stream .retriever , "stream_slicer" )
363
366
and isinstance (
364
- declarative_stream .retriever .stream_slicer , PerPartitionWithGlobalCursor
367
+ declarative_stream .retriever .stream_slicer ,
368
+ (GlobalSubstreamCursor , PerPartitionWithGlobalCursor ),
365
369
)
366
370
):
367
371
stream_state = self ._connector_state_manager .get_stream_state (
Original file line number Diff line number Diff line change @@ -1439,7 +1439,9 @@ def create_concurrent_cursor_from_perpartition_cursor(
1439
1439
stream_state = self .apply_stream_state_migrations (stream_state_migrations , stream_state )
1440
1440
1441
1441
# Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
1442
- use_global_cursor = isinstance (partition_router , GroupingPartitionRouter )
1442
+ use_global_cursor = isinstance (
1443
+ partition_router , GroupingPartitionRouter
1444
+ ) or component_definition .get ("global_substream_cursor" , False )
1443
1445
1444
1446
# Return the concurrent cursor and state converter
1445
1447
return ConcurrentPerPartitionCursor (
Original file line number Diff line number Diff line change @@ -3449,3 +3449,48 @@ def test_semaphore_cleanup():
3449
3449
assert '{"id":"2"}' not in cursor ._semaphore_per_partition
3450
3450
assert len (cursor ._partition_parent_state_map ) == 0 # All parent states should be popped
3451
3451
assert cursor ._parent_state == {"parent" : {"state" : "state2" }} # Last parent state
3452
+
3453
+
3454
+ def test_given_global_state_when_read_then_state_is_not_per_partition () -> None :
3455
+ manifest = deepcopy (SUBSTREAM_MANIFEST )
3456
+ manifest ["definitions" ]["post_comments_stream" ]["incremental_sync" ][
3457
+ "global_substream_cursor"
3458
+ ] = True
3459
+ manifest ["streams" ].remove ({"$ref" : "#/definitions/post_comment_votes_stream" })
3460
+ record = {
3461
+ "id" : 9 ,
3462
+ "post_id" : 1 ,
3463
+ "updated_at" : COMMENT_10_UPDATED_AT ,
3464
+ }
3465
+ mock_requests = [
3466
+ (
3467
+ f"https://api.example.com/community/posts?per_page=100&start_time={ START_DATE } " ,
3468
+ {
3469
+ "posts" : [
3470
+ {"id" : 1 , "updated_at" : POST_1_UPDATED_AT },
3471
+ ],
3472
+ },
3473
+ ),
3474
+ # Fetch the first page of comments for post 1
3475
+ (
3476
+ "https://api.example.com/community/posts/1/comments?per_page=100" ,
3477
+ {
3478
+ "comments" : [record ],
3479
+ },
3480
+ ),
3481
+ ]
3482
+
3483
+ run_mocked_test (
3484
+ mock_requests ,
3485
+ manifest ,
3486
+ CONFIG ,
3487
+ "post_comments" ,
3488
+ {},
3489
+ [record ],
3490
+ {
3491
+ "lookback_window" : 1 ,
3492
+ "parent_state" : {"posts" : {"updated_at" : "2024-01-30T00:00:00Z" }},
3493
+ "state" : {"updated_at" : "2024-01-25T00:00:00Z" },
3494
+ "use_global_cursor" : True , # ensures that it is running the Concurrent CDK version as this is not populated in the declarative implementation
3495
+ }, # this state does have per partition which would be under `states`
3496
+ )
You can’t perform that action at this time.
0 commit comments