1
1
import puppeteer from 'puppeteer' ;
2
2
import fs from 'fs' ;
3
- import cheerio from 'cheerio' ;
4
3
import _ from 'underscore' ;
5
- import getHrefs from 'get-hrefs' ;
6
4
import path from 'path' ;
7
5
import { fileURLToPath } from 'url' ;
8
6
//import PDFMerge from 'pdf-merge';
@@ -17,17 +15,18 @@ import {exec} from 'child_process';
17
15
18
16
const readFile = promisify ( fs . readFile ) ;
19
17
const writeFile = promisify ( fs . writeFile ) ;
18
+ const dirname = path . dirname ( fileURLToPath ( import . meta. url ) ) ;
19
+ const puppeteer_options = { headless : "new" , env : { LANGUAGE : "en-US" } }
20
20
21
21
async function saveNavLinks ( options ) {
22
- const url = `https://cloud.google.com/${ options . product } /docs/ ${ options . subProduct } ` ;
22
+ const url = `https://cloud.google.com/${ options . product } /docs` ;
23
23
let browser ;
24
24
let product = {
25
25
'name' : options . product
26
26
} ;
27
- const dirname = path . dirname ( fileURLToPath ( import . meta. url ) ) ;
28
27
const pathToOutputJson = path . join ( dirname , '/../output/products.json' ) ;
29
28
30
- browser = await puppeteer . launch ( ) ;
29
+ browser = await puppeteer . launch ( puppeteer_options ) ;
31
30
const page = await browser . newPage ( ) ;
32
31
const response = await page . goto ( url ) ;
33
32
if ( ! response . ok ( ) ) {
@@ -37,12 +36,16 @@ async function saveNavLinks(options) {
37
36
const buffer = await response . buffer ( ) ;
38
37
const html = buffer . toString ( 'utf8' ) ;
39
38
40
- let $ = cheerio . load ( html ) ;
41
- let links = _ . uniq ( _ . reject ( getHrefs ( $ ( 'ul.devsite-nav-expandable' ) . html ( ) ) , ( o ) => {
42
- return o . indexOf ( 'cloud.google.com' ) < 0 ||
43
- o . indexOf ( 'ref' ) > 0 ;
44
- } ) ) ;
39
+ const element = await page . $ ( '[track-metadata-position="nav - guides"]' ) ;
40
+ const href = await page . evaluate ( el => el . getAttribute ( 'href' ) , element ) ;
41
+ await page . goto ( href ) ;
42
+ console . log ( `${ chalk . gray . bold ( 'DEBUG' ) } Navigated to ${ href } ` ) ;
43
+
44
+ const all_hrefs = await page . $$eval ( '.devsite-mobile-nav-bottom a.devsite-nav-title' , links => links . map ( link => link . href ) ) ;
45
+ const links = Array . from ( new Set ( all_hrefs . filter ( href => href . includes ( url ) ) ) ) ;
46
+
45
47
console . log ( `${ chalk . yellow . bold ( 'INFO' ) } Found ${ links . length } links` ) ;
48
+ console . log ( links )
46
49
47
50
product . links = links ;
48
51
product . count = links . length ;
@@ -66,7 +69,7 @@ async function saveNavLinks(options) {
66
69
67
70
async function downloadAndMergePdf ( options ) {
68
71
let browser ;
69
- const pathToOutputJson = path . join ( __dirname , '/../output/products.json' ) ;
72
+ const pathToOutputJson = path . join ( dirname , '/../output/products.json' ) ;
70
73
let data = await readFile ( pathToOutputJson , 'utf8' ) ;
71
74
let jsonData = JSON . parse ( data ) ;
72
75
let existing = _ . find ( jsonData . products , ( a ) => {
@@ -81,7 +84,7 @@ async function downloadAndMergePdf(options) {
81
84
fs . mkdirSync ( `${ options . pathToSave } /${ options . product } ` ) ;
82
85
}
83
86
84
- browser = await puppeteer . launch ( ) ;
87
+ browser = await puppeteer . launch ( puppeteer_options ) ;
85
88
let files = [ ] ,
86
89
indx = 0 ;
87
90
0 commit comments