@@ -70,99 +70,31 @@ var fourChanService = {
70
70
71
71
getThread : function ( boardName , id , surl , callback ) {
72
72
73
- var self = this ; // Saving the reference to the main object
74
-
75
- // Proccess to scrap
76
- var osmosis = require ( 'osmosis' ) ;
73
+ // URL of the thread to download
77
74
var url = 'http://boards.4chan.org/' + boardName + '/thread/' + id + '/' + surl ;
78
- osmosis . get ( url )
79
- . set ( {
80
- 'resources' : [ 'a.fileThumb @href' ]
81
- } )
82
- . data ( function ( results ) {
75
+ // We prepare us to call python
76
+ var spawn = require ( 'child_process' ) . spawn ;
77
+ var command = spawn ( 'python' , [ './python/scrapper.py' , url ] ) ;
78
+ var output = '' ;
79
+
80
+ //Listening for the python information
81
+ command . stdout . on ( 'data' , function ( data ) {
82
+ console . log ( data ) ;
83
+ output += data ;
84
+ } ) ;
83
85
84
- var resources = results . resources ;
85
- console . log ( 'There going to be downloaded ' + resources . length + ' resources.' ) ;
86
- for ( var cont = 0 ; cont < resources . length && cont < 50 ; cont ++ )
87
- {
88
- var url = 'http:' + resources [ cont ] ;
89
-
90
- // We set the name for file
91
- var filename = resources [ cont ] . split ( '/' ) ;
92
- filename = filename [ filename . length - 1 ] ;
93
-
94
- // We set the full path '.files/boardName/semanticURL/filename'
95
- var path = './files/' + boardName + '/' + surl + '/' + filename ;
96
-
97
- // We download the file
98
- var fileCounter = cont + 1 ;
99
- self . dowloadResource ( url , boardName , surl , path , function ( ) {
100
- console . log ( fileCounter + '::File ' + path + ' downloaded' ) ;
101
- } ) ;
102
- }
103
-
104
- setTimeout ( function ( ) { console . log ( 'Second batch...' ) } , 2000 ) ;
105
- for ( ; cont < resources . length && cont < 100 ; cont ++ )
106
- {
107
- url = 'http:' + resources [ cont ] ;
108
-
109
- // We set the name for file
110
- filename = resources [ cont ] . split ( '/' ) ;
111
- filename = filename [ filename . length - 1 ] ;
112
-
113
- // We set the full path '.files/boardName/semanticURL/filename'
114
- path = './files/' + boardName + '/' + surl + '/' + filename ;
115
-
116
- // We download the file
117
- fileCounter = cont + 1 ;
118
- self . dowloadResource ( url , boardName , surl , path , function ( ) {
119
- console . log ( fileCounter + '::File ' + path + ' downloaded' ) ;
120
- } ) ;
121
- }
122
-
123
- setTimeout ( function ( ) { console . log ( 'Third batch...' ) } , 2000 ) ;
124
- for ( ; cont < resources . length && cont < 150 ; cont ++ )
125
- {
126
- url = 'http:' + resources [ cont ] ;
127
-
128
- // We set the name for file
129
- filename = resources [ cont ] . split ( '/' ) ;
130
- filename = filename [ filename . length - 1 ] ;
131
-
132
- // We set the full path '.files/boardName/semanticURL/filename'
133
- path = './files/' + boardName + '/' + surl + '/' + filename ;
134
-
135
- // We download the file
136
- fileCounter = cont + 1 ;
137
- self . dowloadResource ( url , boardName , surl , path , function ( ) {
138
- console . log ( fileCounter + '::File ' + path + ' downloaded' ) ;
139
- } ) ;
140
- }
141
-
142
- setTimeout ( function ( ) { console . log ( 'Forth batch...' ) } , 2000 ) ;
143
- for ( ; cont < resources . length && cont < 200 ; cont ++ )
144
- {
145
- url = 'http:' + resources [ cont ] ;
146
-
147
- // We set the name for file
148
- filename = resources [ cont ] . split ( '/' ) ;
149
- filename = filename [ filename . length - 1 ] ;
150
-
151
- // We set the full path '.files/boardName/semanticURL/filename'
152
- path = './files/' + boardName + '/' + surl + '/' + filename ;
153
-
154
- // We download the file
155
- fileCounter = cont + 1 ;
156
- self . dowloadResource ( url , boardName , surl , path , function ( ) {
157
- console . log ( fileCounter + '::File ' + path + ' downloaded' ) ;
158
- } ) ;
159
- }
160
- // After all, we call the callback
161
- callback ( resources ) ;
86
+ command . stderr . on ( 'data' , function ( data ) {
87
+ console . log ( 'stderr: ' + data ) ;
88
+ output += data ;
89
+ } ) ;
90
+
91
+ command . on ( 'close' , function ( code ) {
92
+ console . log ( 'Child process exited with code ' + code ) ;
162
93
} ) ;
163
94
164
-
165
95
96
+ // After all, we call the callback
97
+ callback ( output ) ;
166
98
} ,
167
99
168
100
dowloadResource : function ( uri , boardName , surl , filename , callback ) {
0 commit comments