Skip to content

Commit 39fec49

Browse files
committed
Phyton integration on the system
1 parent 9806302 commit 39fec49

File tree

6 files changed

+105
-7
lines changed

6 files changed

+105
-7
lines changed

README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,7 @@ The app will be pooling the selected threads, receiving the information comparin
2020
9. Develop a way to ask and storage the new data
2121

2222
## To do's
23-
Update this Read me with the steps to scrap the site
23+
Update this Read me with the steps to scrap the site
24+
25+
### Palette
26+
http://paletton.com/#uid=1000u0kllllaFw0g0qFqFg0w0aF

python/scrapper.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import sys
2+
print "4Chan Scrapper"
3+
4+
for arg in sys.argv:
5+
print arg

routes.js

+21
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,25 @@ module.exports = function (app, fourChanService) {
2929
res.render('thread.jade', { threadName : surl, resources : data });
3030
});
3131
});
32+
33+
app.get('/test', function (req, res) {
34+
35+
var spawn = require('child_process').spawn,
36+
ls = spawn('python', ['./python/scrapper.py', 'cadena', 1, 'otraCadena']);
37+
38+
ls.stdout.on('data', function (data) {
39+
console.log('stdout: ' + data);
40+
});
41+
42+
ls.stderr.on('data', function (data) {
43+
console.log('stderr: ' + data);
44+
});
45+
46+
ls.on('close', function (code) {
47+
console.log('child process exited with code ' + code);
48+
});
49+
50+
res.render('test.jade');
51+
52+
});
3253
};

services/fourChanService.js

+68-3
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ var fourChanService = {
8383

8484
var resources = results.resources;
8585
console.log('There going to be downloaded ' + resources.length + ' resources.' );
86-
for(var cont = 0; cont < resources.length; cont++)
86+
for(var cont = 0; cont < resources.length && cont < 50; cont++)
8787
{
8888
var url = 'http:' + resources[cont];
8989

@@ -100,6 +100,63 @@ var fourChanService = {
100100
console.log(fileCounter + '::File ' + path + ' downloaded');
101101
});
102102
}
103+
104+
setTimeout(function () { console.log('Second batch...') }, 2000);
105+
for(; cont < resources.length && cont < 100; cont++)
106+
{
107+
url = 'http:' + resources[cont];
108+
109+
// We set the name for file
110+
filename = resources[cont].split('/');
111+
filename = filename[filename.length - 1];
112+
113+
// We set the full path '.files/boardName/semanticURL/filename'
114+
path = './files/' + boardName + '/' + surl + '/' +filename;
115+
116+
// We download the file
117+
fileCounter = cont + 1;
118+
self.dowloadResource(url, boardName, surl, path, function () {
119+
console.log(fileCounter + '::File ' + path + ' downloaded');
120+
});
121+
}
122+
123+
setTimeout(function () { console.log('Third batch...') }, 2000);
124+
for(; cont < resources.length && cont < 150; cont++)
125+
{
126+
url = 'http:' + resources[cont];
127+
128+
// We set the name for file
129+
filename = resources[cont].split('/');
130+
filename = filename[filename.length - 1];
131+
132+
// We set the full path '.files/boardName/semanticURL/filename'
133+
path = './files/' + boardName + '/' + surl + '/' +filename;
134+
135+
// We download the file
136+
fileCounter = cont + 1;
137+
self.dowloadResource(url, boardName, surl, path, function () {
138+
console.log(fileCounter + '::File ' + path + ' downloaded');
139+
});
140+
}
141+
142+
setTimeout(function () { console.log('Forth batch...') }, 2000);
143+
for(; cont < resources.length && cont < 200; cont++)
144+
{
145+
url = 'http:' + resources[cont];
146+
147+
// We set the name for file
148+
filename = resources[cont].split('/');
149+
filename = filename[filename.length - 1];
150+
151+
// We set the full path '.files/boardName/semanticURL/filename'
152+
path = './files/' + boardName + '/' + surl + '/' +filename;
153+
154+
// We download the file
155+
fileCounter = cont + 1;
156+
self.dowloadResource(url, boardName, surl, path, function () {
157+
console.log(fileCounter + '::File ' + path + ' downloaded');
158+
});
159+
}
103160
// After all, we call the callback
104161
callback(resources);
105162
});
@@ -132,7 +189,7 @@ var fourChanService = {
132189
if( !fs.existsSync(filename) )
133190
{
134191
// We download the file
135-
console.log('=== Dowloading::' + uri );
192+
//console.log('=== Dowloading::' + uri );
136193
/*request.head(uri, function(err, res, body) {
137194
// console.log('content-type:', res.headers['content-type']);
138195
// console.log('content-length:', res.headers['content-length']);
@@ -146,7 +203,15 @@ var fourChanService = {
146203
})
147204
.on('error', function (err) {
148205
console.log(err)
149-
});
206+
})
207+
.on('response', function(response) {
208+
// unmodified http.IncomingMessage object
209+
response.on('data', function(data) {
210+
// compressed data as it is received
211+
console.log('Received ' + data.length + ' bytes of compressed data')
212+
});
213+
});
214+
150215

151216
}
152217

views/board.jade

+6-3
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@ block content
1616
a(href='/thread/#{name}/#{thread.id}/#{thread.semantic_url}')
1717
img(src='http://i.4cdn.org/#{name}/#{thread.imgurl}s.jpg' class="img-responsive")
1818
div.description
19-
if thread.sub.length > 0
20-
small.sub !{thread.sub} :
21-
small.teaser !{thread.teaser}
19+
p
20+
if thread.sub.length > 0
21+
small.sub !{thread.sub} :
22+
small.teaser !{thread.teaser}
23+
p
24+
small Images: #{thread.i}
2225

2326

views/test.jade

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
h1 TEST!

0 commit comments

Comments
 (0)