Skip to content

Commit

Permalink
Remote fasthttp
Browse files Browse the repository at this point in the history
  • Loading branch information
Wikidepia committed Jul 15, 2024
1 parent 51ed2dd commit 3f10ea2
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 82 deletions.
3 changes: 0 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,13 @@ require (
github.com/rs/zerolog v1.33.0
github.com/tdewolff/parse/v2 v2.7.15
github.com/tidwall/gjson v1.17.1
github.com/valyala/fasthttp v1.55.0
golang.org/x/image v0.18.0
golang.org/x/net v0.26.0
golang.org/x/sync v0.7.0
)

require (
github.com/DataDog/zstd v1.5.5 // indirect
github.com/andybalholm/brotli v1.1.0 // indirect
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cockroachdb/errors v1.11.3 // indirect
Expand All @@ -47,7 +45,6 @@ require (
github.com/rogpeppe/go-internal v1.12.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
Expand Down
6 changes: 0 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ github.com/PurpleSec/escape v1.0.0 h1:25crJCsGmePlY6sTggm+qTg0xABcb7A0nUFgNas+N+
github.com/PurpleSec/escape v1.0.0/go.mod h1:y7jqOGecytNh1ROko233Z91ER9NHNuepiLgZtkrDMME=
github.com/RyanCarrier/dijkstra/v2 v2.0.2 h1:DIOg/a7XDR+KmlDkNSX9ggDY6sNLrG+EBGvZUjfgi+A=
github.com/RyanCarrier/dijkstra/v2 v2.0.2/go.mod h1:XwpYN7nC1LPwL3HkaavzB+VGaHRndSsZy/whsFy1AEI=
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
Expand Down Expand Up @@ -95,10 +93,6 @@ github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JT
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
Expand Down
4 changes: 2 additions & 2 deletions handlers/embed.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func Embed(w http.ResponseWriter, r *http.Request) {
viewsData.Title = "InstaFix"
viewsData.URL = "https://instagram.com" + r.URL.Path
if !utils.IsBot(r.Header.Get("User-Agent")) {
w.Header().Set("Location", viewsData.URL)
http.Redirect(w, r, viewsData.URL, http.StatusFound)
return
}

Expand Down Expand Up @@ -125,7 +125,7 @@ func Embed(w http.ResponseWriter, r *http.Request) {
}

if isDirect {
w.Header().Set("Location", sb.String())
http.Redirect(w, r, sb.String(), http.StatusFound)
return
}

Expand Down
2 changes: 1 addition & 1 deletion handlers/grid.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func Grid(w http.ResponseWriter, r *http.Request) {
}

if len(item.Medias) == 1 || len(mediaURLs) == 1 {
w.Header().Set("Location", "/images/"+postID+"/1")
http.Redirect(w, r, "/images/"+postID+"/1", http.StatusFound)
return
}

Expand Down
2 changes: 1 addition & 1 deletion handlers/images.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ func Images(w http.ResponseWriter, r *http.Request) {
return
}
imageURL := item.Medias[max(1, mediaNum)-1].URL
w.Header().Set("Location", imageURL)
http.Redirect(w, r, imageURL, http.StatusFound)
return
}
137 changes: 70 additions & 67 deletions handlers/scraper/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"bytes"
"errors"
"instafix/utils"
"io"
"net/http"
"net/url"
"strconv"
"strings"
Expand All @@ -17,18 +19,10 @@ import (
"github.com/tdewolff/parse/v2"
"github.com/tdewolff/parse/v2/js"
"github.com/tidwall/gjson"
"github.com/valyala/fasthttp"
"github.com/valyala/fasthttp/fasthttpproxy"
"golang.org/x/net/html"
"golang.org/x/sync/singleflight"
)

var client = &fasthttp.Client{
Dial: fasthttpproxy.FasthttpProxyHTTPDialerTimeout(5 * time.Second),
ReadBufferSize: 16 * 1024,
MaxConnsPerHost: 1024,
MaxConnWaitTimeout: 5 * time.Second,
}
var timeout = 10 * time.Second

var (
Expand Down Expand Up @@ -128,56 +122,64 @@ func GetData(postID string) (*InstaData, error) {
func (i *InstaData) ScrapeData() error {
var gqlData gjson.Result

req, res := fasthttp.AcquireRequest(), fasthttp.AcquireResponse()
defer func() {
fasthttp.ReleaseRequest(req)
fasthttp.ReleaseResponse(res)
}()
client := http.Client{Timeout: timeout}

// Scrape from remote scraper if available
if len(RemoteScraperAddr) > 0 {
var err error
req.Header.SetMethod("GET")
req, err := http.NewRequest("GET", RemoteScraperAddr+"/scrape/"+i.PostID, nil)
if err != nil {
return err
}
req.Header.Set("Accept-Encoding", "gzip, deflate, br")
req.SetRequestURI(RemoteScraperAddr + "/scrape/" + i.PostID)
if err = client.DoTimeout(req, res, timeout); err == nil && res.StatusCode() == fasthttp.StatusOK {
iDataGunzip, err := res.BodyGunzip()
if res, err := client.Do(req); err == nil {
defer res.Body.Close()
iDataGunzip, err := io.ReadAll(req.Body)
if err == nil {
if err = binary.Unmarshal(iDataGunzip, i); err == nil {
log.Info().Str("postID", i.PostID).Msg("Data parsed from remote scraper")
return nil
}
}
log.Error().Str("postID", i.PostID).Int("status", res.StatusCode).Err(err).Msg("Failed to scrape data from remote scraper")
}
log.Error().Str("postID", i.PostID).Int("status", res.StatusCode()).Err(err).Msg("Failed to scrape data from remote scraper")
}

req.Reset()
res.Reset()

// Embed scraper
req.Header.SetMethod("GET")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
req.Header.Set("Connection", "close")
req.Header.Set("Sec-Fetch-Mode", "navigate")
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36")
req.SetRequestURI("https://www.instagram.com/p/" + i.PostID + "/embed/captioned/")

var err error
// req.Header.SetMethod("GET")
// req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
// req.Header.Set("Accept-Language", "en-US,en;q=0.9")
// req.Header.Set("Connection", "close")
// req.Header.Set("Sec-Fetch-Mode", "navigate")
// req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36")
// req.SetRequestURI("https://www.instagram.com/p/" + i.PostID + "/embed/captioned/")

req, err := http.NewRequest("GET", "https://www.instagram.com/p/"+i.PostID+"/embed/captioned/", nil)
if err != nil {
return err
}

var body []byte
for retries := 0; retries < 3; retries++ {
err := client.DoTimeout(req, res, timeout)
if err == nil && len(res.Body()) > 0 {
break
var res *http.Response
if res, err = client.Do(req); err == nil {
defer res.Body.Close()
body, err = io.ReadAll(res.Body)
if err == nil && len(body) > 0 {
break
}
}
}
if err != nil {
return err
}

// Pattern matching using LDE
l := &Line{}

// TimeSliceImpl
ldeMatch := false
for _, line := range bytes.Split(res.Body(), []byte("\n")) {
for _, line := range bytes.Split(body, []byte("\n")) {
// Check if line contains TimeSliceImpl
ldeMatch, _ = l.Extract(line)
}
Expand Down Expand Up @@ -205,7 +207,7 @@ func (i *InstaData) ScrapeData() error {
}

// Scrape from embed HTML
embedHTML, err := scrapeFromEmbedHTML(res.Body())
embedHTML, err := scrapeFromEmbedHTML(body)
if err != nil {
log.Error().Str("postID", i.PostID).Err(err).Msg("Failed to parse data from scrapeFromEmbedHTML")
return err
Expand All @@ -219,7 +221,7 @@ func (i *InstaData) ScrapeData() error {

// Scrape from GraphQL API
if videoBlocked || len(username) == 0 {
gqlValue, err := scrapeFromGQL(i.PostID, req, res)
gqlValue, err := scrapeFromGQL(i.PostID)
if err != nil {
log.Error().Str("postID", i.PostID).Err(err).Msg("Failed to scrape data from scrapeFromGQL")
return err
Expand Down Expand Up @@ -351,33 +353,7 @@ func scrapeFromEmbedHTML(embedHTML []byte) (string, error) {
}`, nil
}

func scrapeFromGQL(postID string, req *fasthttp.Request, res *fasthttp.Response) ([]byte, error) {
req.Reset()
res.Reset()

req.Header.SetMethod("POST")
req.Header.Set("accept", "*/*")
req.Header.Set("accept-language", "en-US,en;q=0.9")
req.Header.Set("content-type", "application/x-www-form-urlencoded")
req.Header.Set("origin", "https://www.instagram.com")
req.Header.Set("priority", "u=1, i")
req.Header.Set("sec-ch-prefers-color-scheme", "dark")
req.Header.Set("sec-ch-ua", `"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"`)
req.Header.Set("sec-ch-ua-full-version-list", `"Google Chrome";v="125.0.6422.142", "Chromium";v="125.0.6422.142", "Not.A/Brand";v="24.0.0.0"`)
req.Header.Set("sec-ch-ua-mobile", "?0")
req.Header.Set("sec-ch-ua-model", `""`)
req.Header.Set("sec-ch-ua-platform", `"macOS"`)
req.Header.Set("sec-ch-ua-platform-version", `"12.7.4"`)
req.Header.Set("sec-fetch-dest", "empty")
req.Header.Set("sec-fetch-mode", "cors")
req.Header.Set("sec-fetch-site", "same-origin")
req.Header.Set("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36")
req.Header.Set("x-asbd-id", "129477")
req.Header.Set("x-bloks-version-id", "e2004666934296f275a5c6b2c9477b63c80977c7cc0fd4b9867cb37e36092b68")
req.Header.Set("x-fb-friendly-name", "PolarisPostActionLoadPostQueryQuery")
req.Header.Set("x-ig-app-id", "936619743392459")

req.SetRequestURI("https://www.instagram.com/graphql/query/")
func scrapeFromGQL(postID string) ([]byte, error) {
gqlParams := url.Values{
"av": {"0"},
"__d": {"www"},
Expand All @@ -404,10 +380,37 @@ func scrapeFromGQL(postID string, req *fasthttp.Request, res *fasthttp.Response)
"server_timestamps": {"true"},
"doc_id": {"25531498899829322"},
}
req.SetBodyString(gqlParams.Encode())

if err := client.DoTimeout(req, res, timeout); err != nil {
req, err := http.NewRequest("POST", "https://www.instagram.com/graphql/query/", strings.NewReader(gqlParams.Encode()))
if err != nil {
return nil, err
}
return res.Body(), nil
req.Header = http.Header{
"Accept": {"*/*"},
"Accept-Language": {"en-US,en;q=0.9"},
"Content-Type": {"application/x-www-form-urlencoded"},
"Origin": {"https://www.instagram.com"},
"Priority": {"u=1, i"},
"Sec-Ch-Prefers-Color-Scheme": {"dark"},
"Sec-Ch-Ua": {`"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"`},
"Sec-Ch-Ua-Full-Version-List": {`"Google Chrome";v="125.0.6422.142", "Chromium";v="125.0.6422.142", "Not.A/Brand";v="24.0.0.0"`},
"Sec-Ch-Ua-Mobile": {"?0"},
"Sec-Ch-Ua-Model": {`""`},
"Sec-Ch-Ua-Platform": {`"macOS"`},
"Sec-Ch-Ua-Platform-Version": {`"12.7.4"`},
"Sec-Fetch-Dest": {"empty"},
"Sec-Fetch-Mode": {"cors"},
"Sec-Fetch-Site": {"same-origin"},
"User-Agent": {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"},
"X-Asbd-Id": {"129477"},
"X-Bloks-Version-Id": {"e2004666934296f275a5c6b2c9477b63c80977c7cc0fd4b9867cb37e36092b68"},
"X-Fb-Friendly-Name": {"PolarisPostActionLoadPostQueryQuery"},
"X-Ig-App-Id": {"936619743392459"},
}

client := http.Client{Timeout: timeout}
if res, err := client.Do(req); err == nil {
defer res.Body.Close()
return io.ReadAll(res.Body)
}
return nil, err
}
4 changes: 2 additions & 2 deletions handlers/videos.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ func Videos(w http.ResponseWriter, r *http.Request) {

// Redirect to proxy if not TelegramBot in User-Agent
if strings.Contains(r.Header.Get("User-Agent"), "TelegramBot") {
w.Header().Set("Location", videoURL)
http.Redirect(w, r, videoURL, http.StatusFound)
return
}
w.Header().Set("Location", "https://envoy.lol/"+videoURL)
http.Redirect(w, r, "https://envoy.lol/"+videoURL, http.StatusFound)
return
}

0 comments on commit 3f10ea2

Please sign in to comment.