Skip to content

Commit 32abf06

Browse files
committed
Store all links to all related questions in file
1 parent 1b02f0e commit 32abf06

File tree

1 file changed

+55
-2
lines changed

1 file changed

+55
-2
lines changed

src/ExportStackExchangeHelper.php

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,52 @@ public function saveJsonFromApi($endpoint, $site, $sort) {
143143
], 60);
144144
//Log::debug($responseJson);
145145
$filename = $this->clean($site['site_name']) . '/' . $endpoint . '/page_' . str_pad($page, 4, '0', STR_PAD_LEFT) . '.json';
146-
$this->saveToStorage($filename, json_decode($responseJson, true));
147146
$responseArray = json_decode($responseJson, true);
147+
$this->saveToStorage($filename, $responseArray);
148+
foreach($responseArray['items'] as $item){
149+
$url='';
150+
if(!isset($item['question_id'])){
151+
if($endpoint=="comments"){
152+
$url=$site["site_url"].'/a/'.$item['post_id'];
153+
#$this->writeToOuput($endpoint.": ".$url);
154+
} else if($endpoint=="mentioned"){
155+
# mentions are not needed, cause they are usually replies to own q,a or comments
156+
# $this->writeToOuput($endpoint.": ".$site["site_url"].'/a/'.$item['post_id']);
157+
} else {
158+
#$this->writeToOuput("$endpoint has no question id and no post_id\n"; var_dump($filename));;die
159+
}
160+
}else{
161+
$url=$site["site_url"].'/questions/'.$item['question_id'];
162+
# $this->writeToOuput($endpoint.": ".$url);
163+
}
164+
if($url){
165+
$decodedURL=$this->doShortURLDecode($url);
166+
$this->appendToStorage("urls.html", '<a href="'.$decodedURL.'">'.$url.'</a>');
167+
echo '.';
168+
// $this->writeToOuput('saved '.$decodedURL);
169+
}
170+
}
148171
} while ($responseArray['has_more']); //https://api.stackexchange.com/docs/paging
149172
}
150173

174+
/**
175+
* reads the final URL for redirects
176+
* @param string $url short URL
177+
* @return string final URL
178+
*/
179+
public function doShortURLDecode($url) {
180+
$ch = @curl_init($url);
181+
@curl_setopt($ch, CURLOPT_HEADER, TRUE);
182+
@curl_setopt($ch, CURLOPT_NOBODY, TRUE);
183+
@curl_setopt($ch, CURLOPT_FOLLOWLOCATION, FALSE);
184+
@curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
185+
$response = @curl_exec($ch);
186+
// clean the response of any strange special escape characters, that can occur in the curl output:
187+
$cleanresponse= preg_replace('/[^A-Za-z0-9\- _,.:\n\/]/', '', $response);
188+
preg_match('/Location: (.*)[\n\r]/', $cleanresponse, $a);
189+
if (!isset($a[1])) return $url;
190+
return parse_url($url, PHP_URL_SCHEME).'://'.parse_url($url, PHP_URL_HOST).$a[1];
191+
}
151192
/**
152193
* Each of these methods operates on a single site at a time, identified by the site parameter. This parameter can be the full domain name (ie. "stackoverflow.com"), or a short form identified by api_site_parameter on the site object.
153194
*
@@ -168,7 +209,8 @@ public function saveToStorage($filename, $data) {
168209
Log::debug('saveToStorage ' . $filename);
169210
$filename = $this->filename_prefix . $filename;
170211
Storage::disk('local')->put($filename, json_encode($data));
171-
$this->writeToOuput('Saved to ' . $filename);
212+
$this->writeToOuput("\nSaved to " . $filename);
213+
echo "resolving URLS";
172214
try {
173215
Storage::disk('s3')->put($filename, json_encode($data));
174216
$this->writeToOuput('Saved to AWS S3' . $filename);
@@ -179,6 +221,17 @@ public function saveToStorage($filename, $data) {
179221
}
180222
}
181223

224+
/**
225+
* appends plain data to a file
226+
* @param string $filename
227+
* @param string $data
228+
*/
229+
public function appendToStorage($filename, $data) {
230+
$filename = $this->filename_prefix . $filename;
231+
Storage::disk('local')->append($filename, $data);
232+
#$this->writeToOuput('Added '.$data.' to ' . $filename);
233+
}
234+
182235
/**
183236
*
184237
* @param string $siteName

0 commit comments

Comments
 (0)