Skip to content

Commit

Permalink
Skip proxy when scraping from remote scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
Wikidepia committed Jul 25, 2024
1 parent 7b067c9 commit cc27c52
Showing 1 changed file with 15 additions and 9 deletions.
24 changes: 15 additions & 9 deletions handlers/scraper/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ import (
)

var (
timeout = 10 * time.Second
ErrNotFound = errors.New("post not found")
transport = gzhttp.Transport(http.DefaultTransport, gzhttp.TransportAlwaysDecompress(true))
RemoteScraperAddr string
ErrNotFound = errors.New("post not found")
timeout = 5 * time.Second
transport *http.Transport
transportNoProxy *http.Transport
sflightScraper singleflight.Group
)

var RemoteScraperAddr string
var sflightScraper singleflight.Group

//go:embed dictionary.bin
var zstdDict []byte

Expand All @@ -50,6 +50,12 @@ type InstaData struct {
Medias []Media
}

func init() {
transport = gzhttp.Transport(http.DefaultTransport, gzhttp.TransportAlwaysDecompress(true)).(*http.Transport)
transportNoProxy = http.DefaultTransport.(*http.Transport).Clone()
transportNoProxy.Proxy = nil // Skip any proxy
}

func GetData(postID string) (*InstaData, error) {
if len(postID) == 0 || postID[0] != 'C' {
return nil, errors.New("postID is not a valid Instagram post ID")
Expand Down Expand Up @@ -134,17 +140,16 @@ func GetData(postID string) (*InstaData, error) {
}

func (i *InstaData) ScrapeData() error {
client := http.Client{Transport: transport, Timeout: timeout}

// Scrape from remote scraper if available
if len(RemoteScraperAddr) > 0 {
var err error
remoteClient := http.Client{Transport: transportNoProxy, Timeout: timeout}
req, err := http.NewRequest("GET", RemoteScraperAddr+"/scrape/"+i.PostID, nil)
if err != nil {
return err
}
req.Header.Set("Accept-Encoding", "zstd.dict")
res, err := client.Do(req)
res, err := remoteClient.Do(req)
if res != nil && res.StatusCode == 200 {
defer res.Body.Close()
zstdReader, err := zstd.NewReader(nil, zstd.WithDecoderLowmem(true), zstd.WithDecoderDicts(zstdDict))
Expand All @@ -166,6 +171,7 @@ func (i *InstaData) ScrapeData() error {
}
}

client := http.Client{Transport: transport, Timeout: timeout}
req, err := http.NewRequest("GET", "https://www.instagram.com/p/"+i.PostID+"/embed/captioned/", nil)
if err != nil {
return err
Expand Down

0 comments on commit cc27c52

Please sign in to comment.