-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
135 lines (107 loc) · 4.29 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
const chalk = require('chalk');
const puppeteer = require('puppeteer');
(async () => {
// Chromium browser instance;
const browser = await puppeteer.launch();
// very first tab of the browser
const page = (await browser.pages())[0];
// stackoverflow will be used to fetch from
const url = 'https://stackoverflow.com';
// we will look around for this keyword
const keyword = 'javascript';
await logIntro(url, keyword);
await page.waitFor(1500);
await logTask(`so, let's go to the page`);
await page.goto(url, {
waitUntil: 'domcontentloaded'
});
// I like to take a break
await page.waitFor(1500);
await logStatus('reached Stackoverflow');
await page.waitFor(1500);
// find the searchbox
const searchBox = await page.$('#search input[name="q"]');
await searchBox.type(keyword);
// press Enter to initiate search
await page.keyboard.press('Enter');
await logTask(`searching for ${keyword}, please wait ....`);
await page.waitFor(500);
await page.waitForNavigation({
waitUntil: 'domcontentloaded'
});
await page.waitFor(1000);
await logStatus('search complete');
// how many search result?
let count = await page.$eval('div[data-controller="se-uql"] > div > div', c => c.textContent);
count = count.replace(/\s+/g, ' ');
await logStatus(`found ${count}`);
await page.waitFor(1500);
// fetch results from first page
await logTask('will fetch results from first page, only');
const results = await page.evaluate(() => {
let res = [];
// question summary container
const summaries = document.querySelectorAll('.question-summary');
for (const summary of summaries) {
// votes
let votes = summary.querySelector('.statscontainer .stats .votes .vote-count-post');
votes = votes.textContent.replace(/\s+/g, ' ');
// number of answers
let answers = summary.querySelector('.statscontainer .stats .status strong');
answers = answers.textContent.replace(/\s+/g, ' ');
// number of views
let views = summary.querySelector('.statscontainer .views');
views = views.textContent.replace(/\s+/g, ' ');
// title
let title = summary.querySelector('.summary h3');
title = title.textContent.replace(/\s+/g, ' ');
// save it
res.push({ title, votes, answers, views });
}
return res;
});
await page.waitFor(1000);
await logStatus('fetched, showing result:');
for (const result of results) {
logResult(result);
}
await page.waitFor(2000);
await logTheEnd();
await page.waitFor(3000);
await browser.close();
})();
/*
colorful console.log
*/
async function logIntro(url, keyword) {
console.log(chalk.bgYellow(` *** ***************** INTRO ***************** ***`));
console.log(chalk.bgGreen(` *** Scrapper ft. Puppeteer, powered by NodeJS ***`));
console.log('');
console.log(chalk.cyan(` Fetch from: ${url}`));
console.log(chalk.cyan(` Keyoword: ${keyword}`));
console.log('');
console.log(chalk.bgGreen(` *** ----------------------------------------- ***`));
console.log('');
}
async function logTask(msg) {
console.log('');
console.log(chalk.blue(` :> ${msg}`));
}
async function logStatus(msg) {
console.log(chalk.green(` - ${msg}`));
}
async function logResult(res) {
console.log('');
console.log(` ${chalk.cyan('Title')}: ${res.title}`);
console.log(` ${chalk.cyan('Votes')}: ${res.votes}`);
console.log(` ${chalk.cyan('Answers')}: ${res.answers}`);
console.log(` ${chalk.cyan('Views')}: ${res.views}`);
console.log(chalk.cyan(' -------------- '));
}
async function logTheEnd() {
console.log('');
console.log('');
console.log(chalk.bgRed(' *********************** THE END ***********************'));
console.log(chalk.bgGreen(' *************** Thanks for checking out... ***************'));
console.log(chalk.bgRed(' ****************** ****************** ******************'));
}