Skip to content

Commit

Permalink
use chromium driver
Browse files Browse the repository at this point in the history
  • Loading branch information
MarlikAlmighty committed Jul 2, 2023
1 parent 3c4102c commit c10f0fa
Show file tree
Hide file tree
Showing 17 changed files with 802 additions and 766 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
.idea/
bin/
config.json
data.db
1 change: 0 additions & 1 deletion Procfile

This file was deleted.

9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# Analyze

### A simple news parser for yourself.

### For the parser to work, you need a driver, installation:

```sh
sudo apt -y install chromium-chromedriver
```



38 changes: 27 additions & 11 deletions cmd/main.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"github.com/MarlikAlmighty/analyze-it/internal/botapi"
"log"
"os"
"os/signal"
Expand All @@ -13,24 +14,39 @@ import (

func main() {

cnf := config.New()
if err := cnf.GetEnv(); err != nil {
log.Fatalf("get environment keys: %v\n", err)
// got config
cnf, err := config.LoadConfig()
if err != nil {
log.Fatalf("error config: %v\n", err)
}

s := store.New()
r, err := s.Connect(cnf.RedisUrl)
if err != nil {
log.Fatalf("connect: %v\n", err)
// connect to store
var r *store.Wrapper
if r, err = r.New("posts", "ttl"); err != nil {
log.Fatalf("error store: %v\n", err)
}
s.Client = r

stopApp := make(chan os.Signal, 1)
signal.Notify(stopApp, syscall.SIGKILL, syscall.SIGINT, syscall.SIGTERM)
// init bot api
var api *botapi.TgAPI
if api, err = botapi.New(cnf, r); err != nil {
log.Fatalf("error botAPI: %s\n", err)
}

core := app.New(cnf, s)
// start bot
go func() {
if err = api.Run(); err != nil {
log.Fatalf("error run botAPI: %s\n", err)
return
}
}()

// init and run core
core := app.New(cnf, r)
go core.Run()

stopApp := make(chan os.Signal, 1)
signal.Notify(stopApp, syscall.SIGKILL, syscall.SIGINT, syscall.SIGTERM)

sig := <-stopApp
log.Printf("Catch signal %s, exit app...", sig)
core.Stop()
Expand Down
12 changes: 0 additions & 12 deletions docker-compose.yml

This file was deleted.

42 changes: 11 additions & 31 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,46 +1,26 @@
module github.com/MarlikAlmighty/analyze-it

go 1.18
go 1.20

require (
github.com/PuerkitoBio/goquery v1.8.0
github.com/chromedp/chromedp v0.8.1
github.com/go-openapi/errors v0.20.2
github.com/go-openapi/strfmt v0.21.2
github.com/go-openapi/swag v0.21.1
github.com/go-redis/redis/v8 v8.11.5
github.com/gorilla/mux v1.8.0
github.com/kelseyhightower/envconfig v1.4.0
gopkg.in/telegram-bot-api.v4 v4.6.4
github.com/PuerkitoBio/goquery v1.8.1
github.com/boltdb/bolt v1.3.1
github.com/chromedp/chromedp v0.9.1
github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1
github.com/sclevine/agouti v3.0.0+incompatible
)

require (
github.com/MarlikAlmighty/heroku-binary-buildpack v0.0.0-20200123225823-2ecea2ccf1dd // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/chromedp/cdproto v0.0.0-20220515234810-83d799542a04 // indirect
github.com/chromedp/cdproto v0.0.0-20230220211738-2b1ec77315c9 // indirect
github.com/chromedp/sysutil v1.0.0 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/go-stack/stack v1.8.1 // indirect
github.com/gobwas/httphead v0.1.0 // indirect
github.com/gobwas/pool v0.2.1 // indirect
github.com/gobwas/ws v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/stretchr/objx v0.2.0 // indirect
github.com/technoweenie/multipartstreamer v1.0.1 // indirect
go.mongodb.org/mongo-driver v1.9.1 // indirect
go.opentelemetry.io/otel v1.7.0 // indirect
go.opentelemetry.io/otel/metric v0.30.0 // indirect
go.opentelemetry.io/otel/trace v1.7.0 // indirect
golang.org/x/exp v0.0.0-20201203231725-fa01524bc59d // indirect
golang.org/x/net v0.0.0-20220516155154-20f960328961 // indirect
golang.org/x/sys v0.0.0-20220513210249-45d2b4557a2a // indirect
google.golang.org/genproto v0.0.0-20191009194640-548a555dbc03 // indirect
google.golang.org/grpc v1.31.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
github.com/onsi/ginkgo v1.16.5 // indirect
github.com/onsi/gomega v1.27.8 // indirect
golang.org/x/net v0.10.0 // indirect
golang.org/x/sys v0.8.0 // indirect
)
366 changes: 57 additions & 309 deletions go.sum

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions internal/app/.browser
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package app

import (
"context"
"github.com/chromedp/chromedp"
"log"
"time"
)

func (core *Core) browser(opts []chromedp.ExecAllocatorOption, url string) (string, error) {
allocCtx, cancelAlloc := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancelAlloc()
taskCtx, cancelTask := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
defer cancelTask()
ctx, cancel := context.WithTimeout(taskCtx, 2*time.Minute)
defer cancel()
var html string
if err := chromedp.Run(ctx,
chromedp.Navigate(url),
chromedp.OuterHTML("html", &html, chromedp.ByQuery)); err != nil {
return "", err
}
return html, nil
}
54 changes: 54 additions & 0 deletions internal/app/.opts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package app

import "github.com/chromedp/chromedp"

func rznOpts() []chromedp.ExecAllocatorOption {
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.NoDefaultBrowserCheck,
chromedp.Flag("headless", true),
chromedp.Flag("blink-settings", "imagesEnabled=false"),
chromedp.Flag("ignore-certificate-errors", true),
chromedp.Flag("authority", "rzn.info"),
chromedp.Flag("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"),
chromedp.Flag("accept-language", "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"),
chromedp.Flag("cache-control", "no-cache"),
chromedp.Flag("cookie", "XSRF-TOKEN=88pBJ1hLsp5p5EOgfXXvxjqT3XihMen3L8QkFi0J; rzninfo_session=1eFyjo1eTarnc0QKwdsr8q3aumvC2ULXd8H3lhV5; tmr_lvid=7a376f8fd4623efc3db273922d835687; tmr_lvidTS=1674769950132; _gid=GA1.2.1845806057.1674769950; _ym_uid=1674769950833812709; _ym_d=1674769950; _grf_vis=1; _grf_uid=1534284970; _grf_cm=1; _ym_isad=2; chash=XPIbNMM7Vg; adtech_uid=7560a927-96a5-4e47-bdb5-d2cb249b7943%3Arzn.info; top100_id=t1.7627570.382619791.1674769970893; _ohmybid_cmf=2; popupCookie=submited; _gat=1; _gat_newTracker=1; _ga_FLHB1SLDEJ=GS1.1.1674838850.8.1.1674838864.0.0.0; _ga=GA1.2.565078563.1674769950; cto_bundle=HphqcF94U3A2Qmc1QmV0bzZEb1JoYmxjcXRYaWZhNmY5VjVtRkY0ZGptczR3M1RjV0EyRXdWUEwlMkZqaUNicUV0VlA2QTZydkRFekolMkJpaWlWNlp1dlZDcmxIMENYJTJGVVdhcSUyRkJPZUY3anVwZDNoUFJ2Z3RPcTVxeSUyRklNMFJkNjUwT3A5WjF1elV3RlNuaEUzSll4aXhTRWhCNEtBJTNEJTNE; tmr_detect=0%7C1674838869485; last_visit=1674828084250%3A%3A1674838884250; t3_sid_7627570=s1.1586239044.1674838870070.1674838898452.2.5; XSRF-TOKEN=88pBJ1hLsp5p5EOgfXXvxjqT3XihMen3L8QkFi0J; rzninfo_session=1eFyjo1eTarnc0QKwdsr8q3aumvC2ULXd8H3lhV5"),
chromedp.Flag("pragma", "no-cache"),
chromedp.Flag("referer", "https://rzn.info/"),
chromedp.Flag("sec-ch-ua", "\"Not_A Brand\";v=\"99\", \"Google Chrome\";v=\"109\", \"Chromium\";v=\"109\""),
chromedp.Flag("sec-ch-ua-mobile", "?0"),
chromedp.Flag("sec-ch-ua-platform", "\"Linux\""),
chromedp.Flag("sec-fetch-dest", "document"),
chromedp.Flag("sec-fetch-mode", "navigate"),
chromedp.Flag("sec-fetch-site", "same-origin"),
chromedp.Flag("sec-fetch-user", "?1"),
chromedp.Flag("upgrade-insecure-requests", "1"),
chromedp.Flag("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"),
)
return opts
}

func yaOpts() []chromedp.ExecAllocatorOption {
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.NoDefaultBrowserCheck,
chromedp.Flag("headless", true),
chromedp.Flag("blink-settings", "imagesEnabled=false"),
chromedp.Flag("ignore-certificate-errors", true),
chromedp.Flag("authority", "ya62.ru"),
chromedp.Flag("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"),
chromedp.Flag("accept-language", "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"),
chromedp.Flag("cache-control", "max-age=0"),
chromedp.Flag("cookie", "__ddg1_=jkrGO1Hiz68Rq5lxScJL; BX_USER_ID=0ab3e778aebb24667da42ad9960c47f6; PHPSESSID=edRgvpmygNqUM2J3nwYxSdRRdObStciq; BITRIX_SM_BANNERS=1_31_6_04022023%2C1_32_3_04022023%2C1_40_6_04022023%2C1_45_6_04022023%2C1_88_6_04022023; BITRIX_CONVERSION_CONTEXT_s1=%7B%22ID%22%3A34%2C%22EXPIRE%22%3A1674939540%2C%22UNIQUE%22%3A%5B%22conversion_visit_day%22%5D%7D; _ym_uid=1674866119533751577; _ym_d=1674866119; _grf_vis=1; _grf_ref=www.google.com; _ym_visorc=b; _ym_isad=1; _grf_uid=1534284970; _grf_cm=1; __ddgid_=prb9Wi3vlKiBApNe; __ddgmark_=h3fkI2rETTPCjfHO; __ddg5_=MiLjHDSqNysLd4p9; __ddg2_=fCTm8wy0LbH6Up49; __ddg3=673g5BeOeQ9eoN8Q"),
chromedp.Flag("referer", "https://ya62.ru/news/incidents/"),
chromedp.Flag("sec-ch-ua", "\"Not_A Brand\";v=\"99\", \"Google Chrome\";v=\"109\", \"Chromium\";v=\"109\""),
chromedp.Flag("sec-ch-ua-mobile", "?0"),
chromedp.Flag("sec-ch-ua-platform", "\"Linux\""),
chromedp.Flag("sec-fetch-dest", "document"),
chromedp.Flag("sec-fetch-mode", "navigate"),
chromedp.Flag("sec-fetch-site", "same-origin"),
chromedp.Flag("sec-fetch-user", "?1"),
chromedp.Flag("upgrade-insecure-requests", "1"),
chromedp.Flag("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"),
)
return opts
}
Loading

0 comments on commit c10f0fa

Please sign in to comment.