Skip to content

Commit 6fce886

Browse files
committed
Store all links to all related questions in file
1 parent 1b02f0e commit 6fce886

File tree

1 file changed

+56
-2
lines changed

1 file changed

+56
-2
lines changed

src/ExportStackExchangeHelper.php

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,53 @@ public function saveJsonFromApi($endpoint, $site, $sort) {
143143
], 60);
144144
//Log::debug($responseJson);
145145
$filename = $this->clean($site['site_name']) . '/' . $endpoint . '/page_' . str_pad($page, 4, '0', STR_PAD_LEFT) . '.json';
146-
$this->saveToStorage($filename, json_decode($responseJson, true));
147146
$responseArray = json_decode($responseJson, true);
147+
$this->saveToStorage($filename, $responseArray);
148+
foreach($responseArray['items'] as $item){
149+
$url='';
150+
if(!isset($item['question_id'])){
151+
if($endpoint=="comments"){
152+
$url=$site["site_url"].'/a/'.$item['post_id'];
153+
#$this->writeToOuput($endpoint.": ".$url);
154+
} else if($endpoint=="mentioned"){
155+
# mentions are not needed, cause they are usually replies to own q,a or comments
156+
# $this->writeToOuput($endpoint.": ".$site["site_url"].'/a/'.$item['post_id']);
157+
} else {
158+
#$this->writeToOuput("$endpoint has no question id and no post_id\n"; var_dump($filename));;die
159+
}
160+
}else{
161+
$url=$site["site_url"].'/questions/'.$item['question_id'];
162+
# $this->writeToOuput($endpoint.": ".$url);
163+
}
164+
if($url){
165+
$decodedURL=$this->doShortURLDecode($url);
166+
$this->appendToStorage("urls.html", '<a href="'.$decodedURL.'">'.$url.'</a>');
167+
echo '.';
168+
sleep(1); // otherwise, you get rate limited on SE
169+
// $this->writeToOuput('saved '.$decodedURL);
170+
}
171+
}
148172
} while ($responseArray['has_more']); //https://api.stackexchange.com/docs/paging
149173
}
150174

175+
/**
176+
* reads the final URL for redirects
177+
* @param string $url short URL
178+
* @return string final URL
179+
*/
180+
public function doShortURLDecode($url) {
181+
$ch = @curl_init($url);
182+
@curl_setopt($ch, CURLOPT_HEADER, TRUE);
183+
@curl_setopt($ch, CURLOPT_NOBODY, TRUE);
184+
@curl_setopt($ch, CURLOPT_FOLLOWLOCATION, FALSE);
185+
@curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
186+
$response = @curl_exec($ch);
187+
// clean the response of any strange special escape characters, that can occur in the curl output:
188+
$cleanresponse= preg_replace('/[^A-Za-z0-9\- _,.:\n\/]/', '', $response);
189+
preg_match('/Location: (.*)[\n\r]/', $cleanresponse, $a);
190+
if (!isset($a[1])) return $url;
191+
return parse_url($url, PHP_URL_SCHEME).'://'.parse_url($url, PHP_URL_HOST).$a[1];
192+
}
151193
/**
152194
* Each of these methods operates on a single site at a time, identified by the site parameter. This parameter can be the full domain name (ie. "stackoverflow.com"), or a short form identified by api_site_parameter on the site object.
153195
*
@@ -168,7 +210,8 @@ public function saveToStorage($filename, $data) {
168210
Log::debug('saveToStorage ' . $filename);
169211
$filename = $this->filename_prefix . $filename;
170212
Storage::disk('local')->put($filename, json_encode($data));
171-
$this->writeToOuput('Saved to ' . $filename);
213+
$this->writeToOuput("\nSaved to " . $filename);
214+
echo "resolving URLS";
172215
try {
173216
Storage::disk('s3')->put($filename, json_encode($data));
174217
$this->writeToOuput('Saved to AWS S3' . $filename);
@@ -179,6 +222,17 @@ public function saveToStorage($filename, $data) {
179222
}
180223
}
181224

225+
/**
226+
* appends plain data to a file
227+
* @param string $filename
228+
* @param string $data
229+
*/
230+
public function appendToStorage($filename, $data) {
231+
$filename = $this->filename_prefix . $filename;
232+
Storage::disk('local')->append($filename, $data);
233+
#$this->writeToOuput('Added '.$data.' to ' . $filename);
234+
}
235+
182236
/**
183237
*
184238
* @param string $siteName

0 commit comments

Comments
 (0)