Skip to content

Commit c287f33

Browse files
committed
Store all links to all related questions in file
1 parent 1b02f0e commit c287f33

File tree

1 file changed

+59
-2
lines changed

1 file changed

+59
-2
lines changed

src/ExportStackExchangeHelper.php

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,56 @@ public function saveJsonFromApi($endpoint, $site, $sort) {
143143
], 60);
144144
//Log::debug($responseJson);
145145
$filename = $this->clean($site['site_name']) . '/' . $endpoint . '/page_' . str_pad($page, 4, '0', STR_PAD_LEFT) . '.json';
146-
$this->saveToStorage($filename, json_decode($responseJson, true));
147146
$responseArray = json_decode($responseJson, true);
147+
$this->saveToStorage($filename, $responseArray);
148+
foreach($responseArray['items'] as $item){
149+
$url='';
150+
if(!isset($item['question_id'])){
151+
if($endpoint=="comments"){
152+
$url=$site["site_url"].'/a/'.$item['post_id'];
153+
#$this->writeToOuput($endpoint.": ".$url);
154+
} else if($endpoint=="mentioned"){
155+
# mentions are not needed, cause they are usually replies to own q,a or comments
156+
# $this->writeToOuput($endpoint.": ".$site["site_url"].'/a/'.$item['post_id']);
157+
} else {
158+
#$this->writeToOuput("$endpoint has no question id and no post_id\n"; var_dump($filename));;die
159+
}
160+
}else{
161+
$url=$site["site_url"].'/questions/'.$item['question_id'];
162+
# $this->writeToOuput($endpoint.": ".$url);
163+
}
164+
if($url){
165+
$decodedURL=$this->doShortURLDecode($url);
166+
$this->appendToStorage("urls.html", '<a href="'.$decodedURL.'">'.$url.'</a>');
167+
echo '.';
168+
sleep(1); // otherwise, you get rate limited on SE
169+
// $this->writeToOuput('saved '.$decodedURL);
170+
}
171+
}
148172
} while ($responseArray['has_more']); //https://api.stackexchange.com/docs/paging
149173
}
150174

175+
/**
176+
* reads the final URL for redirects
177+
* @param string $url short URL
178+
* @return string final URL
179+
*/
180+
public function doShortURLDecode($url) {
181+
$ch = @curl_init($url);
182+
@curl_setopt($ch, CURLOPT_HEADER, TRUE);
183+
@curl_setopt($ch, CURLOPT_NOBODY, TRUE);
184+
@curl_setopt($ch, CURLOPT_FOLLOWLOCATION, FALSE);
185+
@curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
186+
$response = @curl_exec($ch);
187+
// clean the response of any strange special escape characters, that can occur in the curl output:
188+
$cleanresponse= preg_replace('/[^A-Za-z0-9\- _,.:\n\/]/', '', $response);
189+
preg_match('/Location: (.*)[\n\r]/', $cleanresponse, $a);
190+
if (!isset($a[1])) {
191+
echo '-';
192+
return $url;
193+
}
194+
return parse_url($url, PHP_URL_SCHEME).'://'.parse_url($url, PHP_URL_HOST).$a[1];
195+
}
151196
/**
152197
* Each of these methods operates on a single site at a time, identified by the site parameter. This parameter can be the full domain name (ie. "stackoverflow.com"), or a short form identified by api_site_parameter on the site object.
153198
*
@@ -168,7 +213,8 @@ public function saveToStorage($filename, $data) {
168213
Log::debug('saveToStorage ' . $filename);
169214
$filename = $this->filename_prefix . $filename;
170215
Storage::disk('local')->put($filename, json_encode($data));
171-
$this->writeToOuput('Saved to ' . $filename);
216+
$this->writeToOuput("\nSaved to " . $filename);
217+
echo "resolving URLS";
172218
try {
173219
Storage::disk('s3')->put($filename, json_encode($data));
174220
$this->writeToOuput('Saved to AWS S3' . $filename);
@@ -179,6 +225,17 @@ public function saveToStorage($filename, $data) {
179225
}
180226
}
181227

228+
/**
229+
* appends plain data to a file
230+
* @param string $filename
231+
* @param string $data
232+
*/
233+
public function appendToStorage($filename, $data) {
234+
$filename = $this->filename_prefix . $filename;
235+
Storage::disk('local')->append($filename, $data);
236+
#$this->writeToOuput('Added '.$data.' to ' . $filename);
237+
}
238+
182239
/**
183240
*
184241
* @param string $siteName

0 commit comments

Comments
 (0)