From a8328379b0dd4f4941a1453f1643efbccee12a52 Mon Sep 17 00:00:00 2001 From: Akmal <72781956+Wikidepia@users.noreply.github.com> Date: Wed, 29 Jan 2025 09:19:43 +0700 Subject: [PATCH] Scrape new /share/ remotely --- handlers/embed.go | 16 ++++++++-------- handlers/scraper/data.go | 11 ++++------- handlers/scraper/remote.go | 4 ++++ 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/handlers/embed.go b/handlers/embed.go index ab17a5d..788f45a 100644 --- a/handlers/embed.go +++ b/handlers/embed.go @@ -30,7 +30,7 @@ func mediaidToCode(mediaID int) string { } func getSharePostID(postID string) (string, error) { - req, err := http.NewRequest("HEAD", "https://www.instagram.com/share/reel/"+postID+"/", nil) + req, err := http.NewRequest("HEAD", "https://www.instagram.com/share/p/"+postID+"/", nil) if err != nil { return postID, err } @@ -43,11 +43,11 @@ func getSharePostID(postID string) (string, error) { if err != nil { return postID, err } - postID = path.Base(redirURL.Path) - if postID == "login" { + postIDTemp := path.Base(redirURL.Path) + if postIDTemp == "login" { return postID, errors.New("not logged in") } - return postID, nil + return postIDTemp, nil } func Embed(w http.ResponseWriter, r *http.Request) { @@ -98,7 +98,7 @@ func Embed(w http.ResponseWriter, r *http.Request) { postID = mediaidToCode(mediaID) } else if strings.Contains(r.URL.Path, "/share/") { postID, err = getSharePostID(postID) - if err != nil { + if err != nil && scraper.GetRemoteSessCount() == 0 { slog.Error("Failed to get new postID from share URL", "postID", postID, "err", err) viewsData.Description = "Failed to get new postID from share URL" views.Embed(viewsData, w) @@ -148,19 +148,19 @@ func Embed(w http.ResponseWriter, r *http.Request) { case mediaNum == 0 && isImage && len(item.Medias) > 1: viewsData.Card = "summary_large_image" sb.WriteString("/grid/") - sb.WriteString(postID) + sb.WriteString(item.PostID) viewsData.ImageURL = sb.String() case isImage: viewsData.Card = "summary_large_image" sb.WriteString("/images/") - sb.WriteString(postID) + sb.WriteString(item.PostID) sb.WriteString("/") sb.WriteString(strconv.Itoa(max(1, mediaNum))) viewsData.ImageURL = sb.String() default: viewsData.Card = "player" sb.WriteString("/videos/") - sb.WriteString(postID) + sb.WriteString(item.PostID) sb.WriteString("/") sb.WriteString(strconv.Itoa(max(1, mediaNum))) viewsData.VideoURL = sb.String() diff --git a/handlers/scraper/data.go b/handlers/scraper/data.go index 307e3a2..60fa162 100644 --- a/handlers/scraper/data.go +++ b/handlers/scraper/data.go @@ -25,11 +25,10 @@ import ( ) var ( - ErrNotFound = errors.New("post not found") - timeout = 5 * time.Second - transport http.RoundTripper - transportNoProxy *http.Transport - sflightScraper singleflight.Group + ErrNotFound = errors.New("post not found") + timeout = 5 * time.Second + transport http.RoundTripper + sflightScraper singleflight.Group ) //go:embed dictionary.bin @@ -49,8 +48,6 @@ type InstaData struct { func init() { transport = gzhttp.Transport(http.DefaultTransport, gzhttp.TransportAlwaysDecompress(true)) - transportNoProxy = http.DefaultTransport.(*http.Transport).Clone() - transportNoProxy.Proxy = nil // Skip any proxy } func GetData(postID string) (*InstaData, error) { diff --git a/handlers/scraper/remote.go b/handlers/scraper/remote.go index aabe445..a0ab0d6 100644 --- a/handlers/scraper/remote.go +++ b/handlers/scraper/remote.go @@ -148,3 +148,7 @@ func ScrapeRemote(i *InstaData) error { return errors.New("failed to get data from remote scraper") } } + +func GetRemoteSessCount() int { + return int(sessCount.Load()) +}