@@ -143,11 +143,56 @@ public function saveJsonFromApi($endpoint, $site, $sort) {
143
143
], 60 );
144
144
//Log::debug($responseJson);
145
145
$ filename = $ this ->clean ($ site ['site_name ' ]) . '/ ' . $ endpoint . '/page_ ' . str_pad ($ page , 4 , '0 ' , STR_PAD_LEFT ) . '.json ' ;
146
- $ this ->saveToStorage ($ filename , json_decode ($ responseJson , true ));
147
146
$ responseArray = json_decode ($ responseJson , true );
147
+ $ this ->saveToStorage ($ filename , $ responseArray );
148
+ foreach ($ responseArray ['items ' ] as $ item ){
149
+ $ url ='' ;
150
+ if (!isset ($ item ['question_id ' ])){
151
+ if ($ endpoint =="comments " ){
152
+ $ url =$ site ["site_url " ].'/a/ ' .$ item ['post_id ' ];
153
+ #$this->writeToOuput($endpoint.": ".$url);
154
+ } else if ($ endpoint =="mentioned " ){
155
+ # mentions are not needed, cause they are usually replies to own q,a or comments
156
+ # $this->writeToOuput($endpoint.": ".$site["site_url"].'/a/'.$item['post_id']);
157
+ } else {
158
+ #$this->writeToOuput("$endpoint has no question id and no post_id\n"; var_dump($filename));;die
159
+ }
160
+ }else {
161
+ $ url =$ site ["site_url " ].'/questions/ ' .$ item ['question_id ' ];
162
+ # $this->writeToOuput($endpoint.": ".$url);
163
+ }
164
+ if ($ url ){
165
+ $ decodedURL =$ this ->doShortURLDecode ($ url );
166
+ $ this ->appendToStorage ("urls.html " , '<a href=" ' .$ decodedURL .'"> ' .$ url .'</a> ' );
167
+ echo '. ' ;
168
+ sleep (1 ); // otherwise, you get rate limited on SE
169
+ // $this->writeToOuput('saved '.$decodedURL);
170
+ }
171
+ }
148
172
} while ($ responseArray ['has_more ' ]); //https://api.stackexchange.com/docs/paging
149
173
}
150
174
175
+ /**
176
+ * reads the final URL for redirects
177
+ * @param string $url short URL
178
+ * @return string final URL
179
+ */
180
+ public function doShortURLDecode ($ url ) {
181
+ $ ch = @curl_init ($ url );
182
+ @curl_setopt ($ ch , CURLOPT_HEADER , TRUE );
183
+ @curl_setopt ($ ch , CURLOPT_NOBODY , TRUE );
184
+ @curl_setopt ($ ch , CURLOPT_FOLLOWLOCATION , FALSE );
185
+ @curl_setopt ($ ch , CURLOPT_RETURNTRANSFER , TRUE );
186
+ $ response = @curl_exec ($ ch );
187
+ // clean the response of any strange special escape characters, that can occur in the curl output:
188
+ $ cleanresponse = preg_replace ('/[^A-Za-z0-9\- _,.:\n\/]/ ' , '' , $ response );
189
+ preg_match ('/Location: (.*)[\n\r]/ ' , $ cleanresponse , $ a );
190
+ if (!isset ($ a [1 ])) {
191
+ echo '- ' ;
192
+ return $ url ;
193
+ }
194
+ return parse_url ($ url , PHP_URL_SCHEME ).':// ' .parse_url ($ url , PHP_URL_HOST ).$ a [1 ];
195
+ }
151
196
/**
152
197
* Each of these methods operates on a single site at a time, identified by the site parameter. This parameter can be the full domain name (ie. "stackoverflow.com"), or a short form identified by api_site_parameter on the site object.
153
198
*
@@ -168,7 +213,8 @@ public function saveToStorage($filename, $data) {
168
213
Log::debug ('saveToStorage ' . $ filename );
169
214
$ filename = $ this ->filename_prefix . $ filename ;
170
215
Storage::disk ('local ' )->put ($ filename , json_encode ($ data ));
171
- $ this ->writeToOuput ('Saved to ' . $ filename );
216
+ $ this ->writeToOuput ("\nSaved to " . $ filename );
217
+ echo "resolving URLS " ;
172
218
try {
173
219
Storage::disk ('s3 ' )->put ($ filename , json_encode ($ data ));
174
220
$ this ->writeToOuput ('Saved to AWS S3 ' . $ filename );
@@ -179,6 +225,17 @@ public function saveToStorage($filename, $data) {
179
225
}
180
226
}
181
227
228
+ /**
229
+ * appends plain data to a file
230
+ * @param string $filename
231
+ * @param string $data
232
+ */
233
+ public function appendToStorage ($ filename , $ data ) {
234
+ $ filename = $ this ->filename_prefix . $ filename ;
235
+ Storage::disk ('local ' )->append ($ filename , $ data );
236
+ #$this->writeToOuput('Added '.$data.' to ' . $filename);
237
+ }
238
+
182
239
/**
183
240
*
184
241
* @param string $siteName
0 commit comments