-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRUSKY.pl
51 lines (51 loc) · 1.41 KB
/
RUSKY.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/local/bin/perl
use strict; use warnings;
use Proc::Daemon;
use LWP::UserAgent;
use WWW::Mechanize;
use HTTP::Cookies;
use MKRX::XS;
####################### SUMMONS #
# RUSKY - scrape golibgen.io pdfs
# (__!__) ---skrp of MKRX
# SETUP ###############################
my $work = 'MINION/' my $dump = 'dump';
my $state = 'STATE'; my $debug = 'DEBUG';
my $log = 'LOG'; my $pid = 'PID';
my $que = 'QUE'; my $clean = 'CLEAN'
my $pause = 'PAUSE'; my $shutdown = 'SHUT';
# DAEMONIZE ##########################
my $daemon = Proc::Daemon->new(
work_dir => $work,
child_STDOUT => $log,
child_STDERR => +>>$debug,
pid_file => $pid,
);
$daemon->Init();
# INITIALIZE ####################
open(my $ifh, '<', $inital) or die "cant open $inital";
my $ttl = readline $ifh; chomp $ttl; close $ifh;
# USER AGENT ####################
my $ua = LWP::UserAgent->new();
my $cookies = HTTP::Cookies->new(
file => "cookies.txt",
autosave => 1,
);
$ua->cookie_jar($cookies);
$ua->agent("Windows IE 7");
# MECH ##########################
while ($ttl > 0) {
my $iter = $ttl;
my $mech = WWW::Mechanize->new($ua);
my $url = $base.$iter;
print "scraping $url\n";
$mech->get($url);
$mech->click('submit');
$mech->save_content("$dump/$iter");
XS($dump $pool $g) or die "can't XS $dump/$iter";
$ttl--;
open(my $finfh, '>', $initial) or die "can't reopen $initial";
print $finfh "$ttl\n";
print "stored $url\n";
close $finfh;
}