@@ -143,11 +143,52 @@ public function saveJsonFromApi($endpoint, $site, $sort) {
143
143
], 60 );
144
144
//Log::debug($responseJson);
145
145
$ filename = $ this ->clean ($ site ['site_name ' ]) . '/ ' . $ endpoint . '/page_ ' . str_pad ($ page , 4 , '0 ' , STR_PAD_LEFT ) . '.json ' ;
146
- $ this ->saveToStorage ($ filename , json_decode ($ responseJson , true ));
147
146
$ responseArray = json_decode ($ responseJson , true );
147
+ $ this ->saveToStorage ($ filename , $ responseArray );
148
+ foreach ($ responseArray ['items ' ] as $ item ){
149
+ $ url ='' ;
150
+ if (!isset ($ item ['question_id ' ])){
151
+ if ($ endpoint =="comments " ){
152
+ $ url =$ site ["site_url " ].'/a/ ' .$ item ['post_id ' ];
153
+ #$this->writeToOuput($endpoint.": ".$url);
154
+ } else if ($ endpoint =="mentioned " ){
155
+ # mentions are not needed, cause they are usually replies to own q,a or comments
156
+ # $this->writeToOuput($endpoint.": ".$site["site_url"].'/a/'.$item['post_id']);
157
+ } else {
158
+ #$this->writeToOuput("$endpoint has no question id and no post_id\n"; var_dump($filename));;die
159
+ }
160
+ }else {
161
+ $ url =$ site ["site_url " ].'/questions/ ' .$ item ['question_id ' ];
162
+ # $this->writeToOuput($endpoint.": ".$url);
163
+ }
164
+ if ($ url ){
165
+ $ decodedURL =$ this ->doShortURLDecode ($ url );
166
+ $ this ->appendToStorage ("urls.html " , '<a href=" ' .$ decodedURL .'"> ' .$ url .'</a> ' );
167
+ echo '. ' ;
168
+ // $this->writeToOuput('saved '.$decodedURL);
169
+ }
170
+ }
148
171
} while ($ responseArray ['has_more ' ]); //https://api.stackexchange.com/docs/paging
149
172
}
150
173
174
+ /**
175
+ * reads the final URL for redirects
176
+ * @param string $url short URL
177
+ * @return string final URL
178
+ */
179
+ public function doShortURLDecode ($ url ) {
180
+ $ ch = @curl_init ($ url );
181
+ @curl_setopt ($ ch , CURLOPT_HEADER , TRUE );
182
+ @curl_setopt ($ ch , CURLOPT_NOBODY , TRUE );
183
+ @curl_setopt ($ ch , CURLOPT_FOLLOWLOCATION , FALSE );
184
+ @curl_setopt ($ ch , CURLOPT_RETURNTRANSFER , TRUE );
185
+ $ response = @curl_exec ($ ch );
186
+ // clean the response of any strange special escape characters, that can occur in the curl output:
187
+ $ cleanresponse = preg_replace ('/[^A-Za-z0-9\- _,.:\n\/]/ ' , '' , $ response );
188
+ preg_match ('/Location: (.*)[\n\r]/ ' , $ cleanresponse , $ a );
189
+ if (!isset ($ a [1 ])) return $ url ;
190
+ return parse_url ($ url , PHP_URL_SCHEME ).':// ' .parse_url ($ url , PHP_URL_HOST ).$ a [1 ];
191
+ }
151
192
/**
152
193
* Each of these methods operates on a single site at a time, identified by the site parameter. This parameter can be the full domain name (ie. "stackoverflow.com"), or a short form identified by api_site_parameter on the site object.
153
194
*
@@ -168,7 +209,8 @@ public function saveToStorage($filename, $data) {
168
209
Log::debug ('saveToStorage ' . $ filename );
169
210
$ filename = $ this ->filename_prefix . $ filename ;
170
211
Storage::disk ('local ' )->put ($ filename , json_encode ($ data ));
171
- $ this ->writeToOuput ('Saved to ' . $ filename );
212
+ $ this ->writeToOuput ("\nSaved to " . $ filename );
213
+ echo "resolving URLS " ;
172
214
try {
173
215
Storage::disk ('s3 ' )->put ($ filename , json_encode ($ data ));
174
216
$ this ->writeToOuput ('Saved to AWS S3 ' . $ filename );
@@ -179,6 +221,17 @@ public function saveToStorage($filename, $data) {
179
221
}
180
222
}
181
223
224
+ /**
225
+ * appends plain data to a file
226
+ * @param string $filename
227
+ * @param string $data
228
+ */
229
+ public function appendToStorage ($ filename , $ data ) {
230
+ $ filename = $ this ->filename_prefix . $ filename ;
231
+ Storage::disk ('local ' )->append ($ filename , $ data );
232
+ #$this->writeToOuput('Added '.$data.' to ' . $filename);
233
+ }
234
+
182
235
/**
183
236
*
184
237
* @param string $siteName
0 commit comments