Skip to content

Commit 9806302

Browse files
committed
Modifications to the download files, trying to get complete files when there are a lot of resources
1 parent 0d4e5bb commit 9806302

File tree

4 files changed

+42
-79
lines changed

4 files changed

+42
-79
lines changed

.gitignore

+4-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,7 @@ npm-debug.log*
33

44
# Dependency directory
55
# https://docs.npmjs.com/misc/faq#should-i-check-my-node-modules-folder-into-git
6-
node_modules
6+
node_modules
7+
8+
# Downloaded files
9+
files

routes.js

+4-3
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@ module.exports = function (app, fourChanService) {
22

33
app.get('/', function (req, res) {
44

5-
fourChanService.getCategories(function (data) {
5+
res.render('index.jade');
6+
/*fourChanService.getCategories(function (data) {
67
78
res.render('index.jade', { categories : data });
8-
});
9+
}); */
910

1011
});
1112

@@ -25,7 +26,7 @@ module.exports = function (app, fourChanService) {
2526
var surl = req.params.surl;
2627
fourChanService.getThread(section, id, surl, function (data) {
2728

28-
res.render('thread.jade');
29+
res.render('thread.jade', { threadName : surl, resources : data });
2930
});
3031
});
3132
};

services/fourChanService.js

+30-74
Original file line numberDiff line numberDiff line change
@@ -21,68 +21,12 @@ var fourChanService = {
2121
categories.splice(-23, 23);
2222
callback(categories);
2323
});
24-
25-
26-
/*var noodle = require('noodlejs');
27-
noodle.query({
28-
url: 'http://www.4chan.org/',
29-
type: 'html',
30-
selector: 'a.boardlink',
31-
extract: 'href'
32-
})
33-
.then(function (data) {
34-
console.log(typeof(data.results));
35-
//data.results.splice(3 , 5, 'hola' );
36-
console.log('========');
37-
console.log(data.results);
38-
console.log('========');
39-
var keys = Object.keys(data.results);
40-
console.log(keys);
41-
});*/
42-
43-
44-
/*var Xray = require('x-ray');
45-
var x = Xray();
46-
47-
x('https://4chan.org/', {title:'.boardlink'})
48-
(function (err, result) {
49-
console.log(result)
50-
});*/
51-
52-
/*[{
53-
'title': '.boardlink a@title',
54-
'href': '.boardlink a@href',
55-
}]*/
56-
57-
58-
/* var request = require("request");
59-
var cheerio = require("cheerio");
60-
var url = 'http://4chan.org/';
61-
62-
request(url, function (error, response, body) {
63-
if (!error) {
64-
var $ = cheerio.load(body),
65-
temperature = $('.boardlink').html();
66-
67-
console.log("It’s " + temperature + " degrees Fahrenheit.");
68-
} else {
69-
console.log("We’ve encountered an error: " + error);
70-
}
71-
});*/
72-
73-
/*console.log('this.categories');
74-
console.log(this.categories);
75-
var data = {
76-
categories : ['uno', 'dos']
77-
};
78-
return data;*/
7924
},
8025

8126
getBoard : function (boardName, callback) {
8227

8328
// Proccess to scrap
8429
var osmosis = require('osmosis');
85-
console.log('http://boards.4chan.org/' + boardName + '/catalog');
8630
osmosis.get('http://boards.4chan.org/' + boardName + '/catalog')
8731
.set({
8832
'threads': ['script']
@@ -127,6 +71,7 @@ var fourChanService = {
12771
getThread : function(boardName, id, surl, callback) {
12872

12973
var self = this; // Saving the reference to the main object
74+
13075
// Proccess to scrap
13176
var osmosis = require('osmosis');
13277
var url = 'http://boards.4chan.org/' + boardName + '/thread/' + id + '/' + surl;
@@ -137,6 +82,7 @@ var fourChanService = {
13782
.data(function (results) {
13883

13984
var resources = results.resources;
85+
console.log('There going to be downloaded ' + resources.length + ' resources.' );
14086
for(var cont = 0; cont < resources.length; cont++)
14187
{
14288
var url = 'http:' + resources[cont];
@@ -148,17 +94,11 @@ var fourChanService = {
14894
// We set the full path '.files/boardName/semanticURL/filename'
14995
var path = './files/' + boardName + '/' + surl + '/' +filename;
15096

151-
console.log(url + '::' + path);
152-
15397
// We download the file
98+
var fileCounter = cont + 1;
15499
self.dowloadResource(url, boardName, surl, path, function () {
155-
console.log('File downloaded');
100+
console.log(fileCounter + '::File ' + path + ' downloaded');
156101
});
157-
158-
/*
159-
self.dowloadResource(url, path, function () {
160-
console.log('Dowload completed');
161-
});*/
162102
}
163103
// After all, we call the callback
164104
callback(resources);
@@ -178,22 +118,38 @@ var fourChanService = {
178118
// We check for the directories and we create them if they dont exists
179119
var boardDirectory = './files/' + boardName
180120
var surlDirectory = './files/' + boardName + '/' + surl;
181-
if ( !fs.existsSync(boardDirectory) ) {
182-
121+
if ( !fs.existsSync(boardDirectory) ) {
122+
183123
fs.mkdirSync(boardDirectory);
124+
}
125+
else {
126+
184127
if( !fs.existsSync(surlDirectory) ) {
185-
186128
fs.mkdirSync(surlDirectory);
187129
}
188130
}
189131

190-
// We download the file
191-
console.log('=== Dowloading::' + uri );
192-
request.head(uri, function(err, res, body) {
193-
// console.log('content-type:', res.headers['content-type']);
194-
// console.log('content-length:', res.headers['content-length']);
195-
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
196-
});
132+
if( !fs.existsSync(filename) )
133+
{
134+
// We download the file
135+
console.log('=== Dowloading::' + uri );
136+
/*request.head(uri, function(err, res, body) {
137+
// console.log('content-type:', res.headers['content-type']);
138+
// console.log('content-length:', res.headers['content-length']);
139+
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
140+
});*/
141+
142+
request(uri)
143+
.pipe(fs.createWriteStream(filename))
144+
.on('close', function () {
145+
console.log(filename + ' downloaded.');
146+
})
147+
.on('error', function (err) {
148+
console.log(err)
149+
});
150+
151+
}
152+
197153
}
198154

199155

views/thread.jade

+4-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,7 @@ extends layouts/default
33
block content
44
div.row
55
div.col-md-12
6-
h2 hola!
6+
h2 #{threadName}
7+
ul
8+
each resource in resources
9+
li #{resource}

0 commit comments

Comments
 (0)