23
23
* Called on hook 'wprss_fetch_single_feed_hook'.
24
24
*
25
25
* @since 3.2
26
+ *
27
+ * @throws Exception
26
28
*/
27
29
function wprss_fetch_insert_single_feed_items ( $ feed_ID ) {
28
30
set_transient ('wpra/feeds/importing/ ' . $ feed_ID , true , 0 );
@@ -104,56 +106,72 @@ function wprss_fetch_insert_single_feed_items( $feed_ID ) {
104
106
? wprss_get_general_setting ('unique_titles ' )
105
107
: $ unique_titles_only ;
106
108
$ unique_titles_only = filter_var ($ unique_titles_only , FILTER_VALIDATE_BOOLEAN );
107
- // Gather the titles of the items that are imported
108
- // The import process will check not only the titles in the DB but the titles currently in the feed
109
- $ existing_titles = [];
110
109
111
110
// Gather the existing feed item IDs for this feed source
112
111
$ useGuids = get_post_meta ($ feed_ID , 'wprss_use_guids ' , true );
113
112
$ useGuids = filter_var ($ useGuids , FILTER_VALIDATE_BOOLEAN );
114
- $ existingIds = $ useGuids
115
- ? wprss_get_existing_guids ()
116
- : wprss_get_existing_permalinks ();
113
+
114
+ // Gather the IDs and titles of the items that are imported
115
+ // The import process will not only check the IDs and titles against the DB, but also against the feed
116
+ // itself. This prevents duplicate items in the feed from importing duplicates.
117
+ $ existingIds = [];
118
+ $ existingTitles = [];
117
119
118
120
// Generate a list of items fetched, that are not already in the DB
119
121
$ new_items = array ();
120
122
foreach ( $ items_to_insert as $ item ) {
121
- $ item_title = $ item ->get_title ();
123
+ $ itemTitle = $ item ->get_title ();
122
124
$ guid = $ item ->get_id ();
123
125
$ permalink = $ item ->get_permalink ();
124
126
$ permalink = wprss_normalize_permalink ( $ permalink , $ item , $ feed_ID );
125
127
126
128
// Check if blacklisted
127
129
if (wprss_is_blacklisted ($ permalink )) {
128
- $ logger ->debug ('Item "{0}" is blacklisted ' , [$ item_title ]);
129
-
130
+ $ logger ->debug ('Item "{0}" is blacklisted ' , [$ itemTitle ]);
130
131
continue ;
131
132
}
132
133
133
- // Check if already imported
134
- $ idToCheck = $ useGuids ? $ guid : $ permalink ;
135
- if (array_key_exists ($ idToCheck , $ existingIds )) {
136
- $ logger ->debug ('Item "{0}" already exists in the database ' , [$ item_title ]);
134
+ $ itemId = $ useGuids ? $ guid : $ permalink ;
135
+
136
+ // Check if already imported in database
137
+ //-----------------------------------------
138
+ $ itemIdExists = $ useGuids ? wprss_guid_exists ($ guid ) : wprss_permalink_exists ($ permalink );
139
+ $ itemsTitleExists = $ unique_titles_only && wprss_item_title_exists ($ item ->get_title ());
140
+
141
+ if ($ itemIdExists || $ itemsTitleExists ) {
142
+ $ reason = $ itemIdExists
143
+ ? ($ useGuids ? 'GUID ' : 'permalink ' )
144
+ : 'Non-unique title ' ;
145
+
146
+ $ logger ->debug ('Item "{title}" already exists in the database. Reason: {reason} ' , [
147
+ 'title ' => $ itemTitle ,
148
+ 'reason ' => $ reason
149
+ ]);
137
150
138
151
continue ;
139
152
}
140
153
141
- // Check if title exists (if the option is enabled)
142
- if ($ unique_titles_only ) {
143
- $ title_exists_db = wprss_item_title_exists ($ item ->get_title ());
144
- $ title_exists_feed = array_key_exists ($ item_title , $ existing_titles );
145
- $ title_exists = $ title_exists_db || $ title_exists_feed ;
146
- // Add this item's title to the list to check against
147
- $ existing_titles [$ item_title ] = 1 ;
154
+ // Check if item is duplicated in the feed
155
+ //-----------------------------------------
156
+ $ itemIdIsDuped = array_key_exists ($ itemId , $ existingIds );
157
+ $ itemTitleIsDuped = $ unique_titles_only && array_key_exists ($ itemTitle , $ existingTitles );
148
158
149
- if ($ title_exists ) {
150
- $ logger ->debug ('An item with the title "{0}" already exists ' , [$ item_title ]);
159
+ if ($ itemIdIsDuped || $ itemTitleIsDuped ) {
160
+ $ reason = $ itemIdIsDuped
161
+ ? ($ useGuids ? 'GUID ' : 'permalink ' )
162
+ : 'Non-unique title ' ;
151
163
152
- continue ;
153
- }
164
+ $ logger ->debug ('Item "{title}" is duplicated in the feed. Reason: {reason} ' , [
165
+ 'title ' => $ itemTitle ,
166
+ 'reason ' => $ reason ,
167
+ ]);
168
+
169
+ continue ;
170
+ } else {
171
+ $ existingIds [$ itemId ] = 1 ;
172
+ $ existingTitles [$ itemTitle ] = 1 ;
154
173
}
155
174
156
- $ existingIds [$ idToCheck ] = 1 ;
157
175
$ new_items [] = $ item ;
158
176
}
159
177
@@ -184,14 +202,14 @@ function wprss_fetch_insert_single_feed_items( $feed_ID ) {
184
202
? 0
185
203
: $ num_new_items - $ num_can_insert ;
186
204
187
- // Get an array with the DB feed items in reverse order (oldest first)
205
+ // Get an array with the DB feed items in reverse order (the oldest first)
188
206
$ db_feed_items_reversed = array_reverse ( $ db_feed_items ->posts );
189
207
// Cut the array to get only the first few that are to be deleted ( equal to $num_feed_items_to_delete )
190
208
$ feed_items_to_delete = array_slice ( $ db_feed_items_reversed , 0 , $ num_feed_items_to_delete );
191
209
192
210
// Iterate the feed items and delete them
193
211
$ num_items_deleted = 0 ;
194
- foreach ( $ feed_items_to_delete as $ key => $ post ) {
212
+ foreach ( $ feed_items_to_delete as $ post ) {
195
213
wp_delete_post ( $ post ->ID , TRUE );
196
214
$ num_items_deleted ++;
197
215
}
@@ -201,7 +219,7 @@ function wprss_fetch_insert_single_feed_items( $feed_ID ) {
201
219
}
202
220
}
203
221
204
- update_post_meta ( $ feed_ID , 'wprss_last_update ' , $ last_update_time = time () );
222
+ update_post_meta ( $ feed_ID , 'wprss_last_update ' , time () );
205
223
update_post_meta ( $ feed_ID , 'wprss_last_update_items ' , 0 );
206
224
207
225
// Insert the items into the db
@@ -270,34 +288,31 @@ function wprss_get_feed_items( $feed_url, $source, $force_feed = FALSE ) {
270
288
}
271
289
272
290
if (defined ('WP_DEBUG ' ) && WP_DEBUG ) {
273
- add_action ('cron_request ' , 'wprss_cron_add_xdebug_cookie ' , 10 );
274
- }
275
-
276
- /**
277
- * Allow debugging of wp_cron jobs using xDebug.
278
- *
279
- * This is done by taking the XDEBUG cookie received from the browser (which enables an xDebug session) and passing it
280
- * to WP Cron. That way, code initiated from a cron job will be debuggable.
281
- *
282
- * @param array $cronRequest
283
- *
284
- * @return array $cron_request_array with the current XDEBUG_SESSION cookie added if set
285
- */
286
- function wprss_cron_add_xdebug_cookie ($ cronRequest )
287
- {
288
- if (empty ($ _COOKIE ['XDEBUG_SESSION ' ])) {
289
- return ($ cronRequest );
290
- }
291
+ /**
292
+ * Allow debugging of wp_cron jobs using xDebug.
293
+ *
294
+ * This is done by taking the XDEBUG cookie received from the browser (which enables an xDebug session) and passing it
295
+ * to WP Cron. That way, code initiated from a cron job will be debuggable.
296
+ *
297
+ * @param array $cronRequest
298
+ *
299
+ * @return array $cron_request_array with the current XDEBUG_SESSION cookie added if set
300
+ */
301
+ add_action ('cron_request ' , function ($ cronRequest ) {
302
+ if (empty ($ _COOKIE ['XDEBUG_SESSION ' ])) {
303
+ return ($ cronRequest );
304
+ }
291
305
292
- $ cookie = filter_var ($ _COOKIE ['XDEBUG_SESSION ' ], FILTER_SANITIZE_STRING );
306
+ $ cookie = filter_var ($ _COOKIE ['XDEBUG_SESSION ' ], FILTER_SANITIZE_STRING );
293
307
294
- if (empty ($ cronRequest ['args ' ]['cookies ' ])) {
295
- $ cronRequest ['args ' ]['cookies ' ] = [];
296
- }
308
+ if (empty ($ cronRequest ['args ' ]['cookies ' ])) {
309
+ $ cronRequest ['args ' ]['cookies ' ] = [];
310
+ }
297
311
298
- $ cronRequest ['args ' ]['cookies ' ]['XDEBUG_SESSION ' ] = $ cookie ;
312
+ $ cronRequest ['args ' ]['cookies ' ]['XDEBUG_SESSION ' ] = $ cookie ;
299
313
300
- return $ cronRequest ;
314
+ return $ cronRequest ;
315
+ });
301
316
}
302
317
303
318
/**
@@ -364,7 +379,7 @@ function wprss_fetch_feed($url, $source = null, $param_force_feed = false)
364
379
365
380
// If a feed source was passed
366
381
if ($ source !== null || $ param_force_feed ) {
367
- // Get the force feed option for the feed source
382
+ // Get the force- feed option for the feed source
368
383
$ force_feed = get_post_meta ($ source , 'wprss_force_feed ' , true );
369
384
// If turned on, force the feed
370
385
if ($ force_feed == 'true ' || $ param_force_feed ) {
0 commit comments