@@ -70,6 +70,8 @@ def has_pending_requests(self: ApifyScheduler) -> bool:
70
70
def enqueue_request (self : ApifyScheduler , request : Request ) -> bool :
71
71
"""Add a request to the scheduler.
72
72
73
+ This could be called from either from a spider or a downloader middleware (e.g. redirect, retry, ...).
74
+
73
75
Args:
74
76
request: The request to add to the scheduler.
75
77
@@ -94,7 +96,7 @@ def enqueue_request(self: ApifyScheduler, request: Request) -> bool:
94
96
traceback .print_exc ()
95
97
raise
96
98
97
- Actor .log .debug (f'[{ call_id } ]: apify_request was added to the RQ (apify_request= { apify_request } ) ' )
99
+ Actor .log .debug (f'[{ call_id } ]: rq.add_request.result= { result } ... ' )
98
100
return bool (result ['wasAlreadyPresent' ])
99
101
100
102
def next_request (self : ApifyScheduler ) -> Request | None :
@@ -109,6 +111,7 @@ def next_request(self: ApifyScheduler) -> Request | None:
109
111
if not isinstance (self ._rq , RequestQueue ):
110
112
raise TypeError ('self._rq must be an instance of the RequestQueue class' )
111
113
114
+ # Fetch the next request from the Request Queue
112
115
try :
113
116
apify_request = nested_event_loop .run_until_complete (self ._rq .fetch_next_request ())
114
117
except BaseException :
@@ -123,6 +126,14 @@ def next_request(self: ApifyScheduler) -> Request | None:
123
126
if not isinstance (self .spider , Spider ):
124
127
raise TypeError ('self.spider must be an instance of the Spider class' )
125
128
129
+ # Let the Request Queue know that the request is being handled. Every request should be marked as handled,
130
+ # retrying is handled by the Scrapy's RetryMiddleware.
131
+ try :
132
+ nested_event_loop .run_until_complete (self ._rq .mark_request_as_handled (apify_request ))
133
+ except BaseException :
134
+ traceback .print_exc ()
135
+ raise
136
+
126
137
scrapy_request = to_scrapy_request (apify_request , spider = self .spider )
127
138
Actor .log .debug (
128
139
f'[{ call_id } ]: apify_request was transformed to the scrapy_request which is gonna be returned (scrapy_request={ scrapy_request } )' ,
0 commit comments