Skip to content

Commit

Permalink
Fix scrape embed from html
Browse files Browse the repository at this point in the history
  • Loading branch information
Wikidepia committed Jul 15, 2024
1 parent b6f7690 commit 33ae615
Showing 1 changed file with 2 additions and 14 deletions.
16 changes: 2 additions & 14 deletions handlers/scraper/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,6 @@ func (i *InstaData) ScrapeData() error {
}
}

// Embed scraper
// req.Header.SetMethod("GET")
// req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
// req.Header.Set("Accept-Language", "en-US,en;q=0.9")
// req.Header.Set("Connection", "close")
// req.Header.Set("Sec-Fetch-Mode", "navigate")
// req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36")
// req.SetRequestURI("https://www.instagram.com/p/" + i.PostID + "/embed/captioned/")

req, err := http.NewRequest("GET", "https://www.instagram.com/p/"+i.PostID+"/embed/captioned/", nil)
if err != nil {
return err
Expand All @@ -173,7 +164,6 @@ func (i *InstaData) ScrapeData() error {
if err != nil {
return err
}

// Pattern matching using LDE
l := &Line{}

Expand Down Expand Up @@ -212,10 +202,8 @@ func (i *InstaData) ScrapeData() error {
log.Error().Str("postID", i.PostID).Err(err).Msg("Failed to parse data from scrapeFromEmbedHTML")
return err
}

embedHTMLData := gjson.Parse(embedHTML)

smedia := embedHTMLData.Get("shortcode_media")
gqlData = gjson.Parse(embedHTML)
smedia := gqlData.Get("shortcode_media")
videoBlocked := smedia.Get("video_blocked").Bool()
username := smedia.Get("owner.username").String()

Expand Down

0 comments on commit 33ae615

Please sign in to comment.