File tree 1 file changed +22
-7
lines changed
1 file changed +22
-7
lines changed Original file line number Diff line number Diff line change 5
5
"colab" : {
6
6
"private_outputs" : true ,
7
7
"provenance" : [],
8
- "authorship_tag" : " ABX9TyNVJFk3jg83ovhaMDcJHOSo " ,
8
+ "authorship_tag" : " ABX9TyMK76muNMFRt0JTXy2fMPCy " ,
9
9
"include_colab_link" : true
10
10
},
11
11
"kernelspec" : {
290
290
"execution_count" : null ,
291
291
"outputs" : []
292
292
},
293
+ {
294
+ "cell_type" : " markdown" ,
295
+ "source" : [
296
+ " # Crawl Launch"
297
+ ],
298
+ "metadata" : {
299
+ "id" : " lTFC4NkOqxUd"
300
+ }
301
+ },
293
302
{
294
303
"cell_type" : " code" ,
295
304
"source" : [
296
- " # @title Crawl Launch\n " ,
297
- " \n " ,
298
305
" import requests\n " ,
299
306
" from bs4 import BeautifulSoup\n " ,
300
307
" from urllib.parse import urljoin, urlparse\n " ,
421
428
"id" : " ce2N8HasNylT"
422
429
}
423
430
},
431
+ {
432
+ "cell_type" : " markdown" ,
433
+ "source" : [
434
+ " # Scraping Options"
435
+ ],
436
+ "metadata" : {
437
+ "id" : " HZau_9Hsq1Jg"
438
+ }
439
+ },
424
440
{
425
441
"cell_type" : " code" ,
426
442
"source" : [
427
- " # @title Scraping Options\n " ,
428
- " \n " ,
429
443
" # Create a dropdown for scrape options\n " ,
430
444
" scrape_option = \" Specific number of pages\" # @param [\" All pages\" , \" Specific number of pages\" ]\n " ,
431
445
" \n " ,
486
500
},
487
501
{
488
502
"cell_type" : " markdown" ,
489
- "source" : [],
503
+ "source" : [
504
+ " # Rate Limiting and Retry Parameters\n "
505
+ ],
490
506
"metadata" : {
491
507
"id" : " 2fqRWptDfhPc"
492
508
}
493
509
},
494
510
{
495
511
"cell_type" : " code" ,
496
512
"source" : [
497
- " # @title Rate Limiting and Retry Parameters\n " ,
498
513
" # @markdown Set the rate limiting and retry parameters for the web scraper:\n " ,
499
514
" \n " ,
500
515
" pages_per_minute = 9 # @param {type:\" integer\" }\n " ,
You can’t perform that action at this time.
0 commit comments