Skip to content

Commit

Permalink
Merge pull request #326 from CSUSTers/dev
Browse files Browse the repository at this point in the history
feat(inline): remove tracing parameters from Bilibili URL
  • Loading branch information
hugefiver authored Jan 5, 2024
2 parents 0f22d3e + da301d8 commit 68610f3
Show file tree
Hide file tree
Showing 20 changed files with 2,892 additions and 34 deletions.
3 changes: 2 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
"eamodio.gitlens",
"esbenp.prettier-vscode",
"ms-python.python",
"mutantdino.resourcemonitor"
"mutantdino.resourcemonitor",
"ms-vscode.makefile-tools"
]
}
}
Expand Down
2 changes: 1 addition & 1 deletion .golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ linters-settings:

nestif:
# minimal complexity of if statements to report, 5 by default
min-complexity: 5
min-complexity: 12

# nolintlint:
# # Disable to ensure that all nolint directives actually have an effect. Default is false.
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ build: get
go build -o $(OUTPUT) .

test:
go test -v -race -covermode=atomic ./...
go test -v -race -covermode=atomic -test.short ./...

fmt:
gofmt -l -w .
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
github.com/prometheus/common v0.45.0
github.com/quic-go/quic-go v0.40.1
github.com/redis/go-redis/v9 v9.3.1
github.com/sashabaranov/go-openai v1.17.9
github.com/sashabaranov/go-openai v1.17.10
github.com/spf13/viper v1.18.2
github.com/stretchr/testify v1.8.4
go.uber.org/zap v1.26.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,8 @@ github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6ke
github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4=
github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
github.com/sashabaranov/go-openai v1.17.9 h1:QEoBiGKWW68W79YIfXWEFZ7l5cEgZBV4/Ow3uy+5hNY=
github.com/sashabaranov/go-openai v1.17.9/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.17.10 h1:ybvWN+d/rgEK/64U6dsjnOQ9AUya2wBoJKj3Wuaonqo=
github.com/sashabaranov/go-openai v1.17.10/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
Expand Down
170 changes: 170 additions & 0 deletions inline/bili_url.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
package inline

import (
"bytes"
"context"
"csust-got/util/urlx"
"net/http"
"net/url"
"regexp"
"strings"
)

var urlPathPatt = regexp.MustCompile(`(?i)(?:/)(?P<fragment>[^/\s]*)`)
var biliVideoIdPatt = regexp.MustCompile(`(?i)^((?:av|ep)(?:\d+)|bv(?:[a-zA-Z0-9]+))$`)

var biliDomains = []string{
"b23.tv",
"bilibili.com",
"www.bilibili.com",
"space.bilibili.com",
"m.bilibili.com",
"t.bilibili.com",
"live.bilibili.com",
}

var biliRetainQueryParams = []string{
"p",
"t",
"tab",
}

func clearBiliUrlQuery(u *urlx.ExtraUrl) error {
q, err := removeBiliTracingParramFromQuery(u.Query)
if err != nil {
return err
}
u.Query = q
return nil
}

func removeBiliTracingParramFromQuery(query string) (string, error) {
if query == "" {
return "", nil
}

if query[0] == '?' {
query = query[1:]
}

old, err := url.ParseQuery(query)
if err != nil {
return "", err
}

newMap := make(url.Values)
for _, k := range biliRetainQueryParams {
if v, ok := old[k]; ok {
newMap[k] = v
}
}

ret := newMap.Encode()
if ret != "" {
ret = "?" + ret
}
return ret, nil
}

func writeBiliUrl(buf *bytes.Buffer, u *urlx.ExtraUrl) error {
if strings.ToLower(u.Domain) == "b23.tv" {
to, err := processB23Url(context.TODO(), u)
if err != nil {
return nil
}
buf.WriteString(to)
} else {
err := clearBiliUrlQuery(u)
if err != nil {
return err
}
buf.WriteString(u.StringByFields())
}
return nil
}

func processB23Url(ctx context.Context, u *urlx.ExtraUrl) (string, error) {
path := u.Path
pathFragm := spliteUrlPath(path)
if len(pathFragm) == 0 {
if u.Query == "" {
return u.Text, nil
}
err := clearBiliUrlQuery(u)
if err != nil {
return "", err
}
return u.StringByFields(), nil
}

// process origin video URL
firstFr := pathFragm[0]
if biliVideoIdPatt.MatchString(firstFr) {
u.Path = "/" + firstFr
err := clearBiliUrlQuery(u)
if err != nil {
return "", err
}
return u.StringByFields(), nil
}

// process short video URL
return processBiliShortenUrl(ctx, u)
}

func processBiliShortenUrl(ctx context.Context, u *urlx.ExtraUrl) (string, error) {
oriUrl := u.Text
req, err := http.NewRequestWithContext(ctx, http.MethodGet, oriUrl, nil)
if err != nil {
return "", err
}

client := http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := client.Do(req)
if err != nil {
return "", err
}

// get origin URL from a shorten URL
if resp.StatusCode >= 300 && resp.StatusCode < 400 {
to, err := resp.Location()
if err != nil {
return "", err
}
e := urlx.UrlToExtraUrl(to)

// video URL without `p` and `t` query params
// use `b23.tv` domain for shorten URL
if strings.HasPrefix(e.Path, "/video/") {
pQ := to.Query().Get("p")
tQ := to.Query().Get("t")
paths := spliteUrlPath(e.Path)
if len(paths) >= 2 && (pQ == "" || pQ == "1") && tQ == "" {
e.Path = "/" + paths[1]
e.Domain = "b23.tv"
e.Query = ""
}
}
err = clearBiliUrlQuery(e)
if err != nil {
return "", err
}
return e.StringByFields(), nil
}

return u.Text, nil
}

func spliteUrlPath(path string) []string {
matches := urlPathPatt.FindAllStringSubmatchIndex(path, -1)

ret := make([]string, 0, len(matches))
for _, m := range matches {
ret = append(ret, urlx.SubmatchGroupStringByName(urlPathPatt, path, m, "fragment"))
}
return ret
}
35 changes: 35 additions & 0 deletions inline/bili_url_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package inline

import (
"bytes"
"csust-got/util/urlx"
"testing"

"github.com/stretchr/testify/assert"
)

func Test_writeBiliUrl(t *testing.T) {
tests := []struct {
name string
url string
want string
wantErr bool
}{
}

buf := bytes.NewBufferString("")
for _, tt := range tests {
buf.Reset()
t.Run(tt.name, func(t *testing.T) {
u := urlx.ExtractStr(tt.url)[0]
err := writeBiliUrl(buf, u.Url)
if (err != nil) != tt.wantErr {
t.Errorf("writeBiliUrl() error = %v, wantErr %v", err, tt.wantErr)
return
}
if err == nil {
assert.Equal(t, tt.want, buf.String())
}
})
}
}
97 changes: 97 additions & 0 deletions inline/inline.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package inline

import (
"bytes"
"csust-got/config"
"csust-got/log"
"csust-got/util"
"csust-got/util/urlx"
"errors"
"regexp"
"slices"
"strings"

"go.uber.org/zap"
tb "gopkg.in/telebot.v3"
)

//nolint:revive // It's too long.
var biliUrlRegex = `(?i)((?P<schema>https?://)?(?P<host>(?P<sub_domain>[\w\d\-]+\.)?(?P<main_domain>b23\.tv|bilibili\.com))(?P<path>(?:/[^\s\?#]*)*)?(?P<query>\?[^\s#]*)?(?P<hash>#[\S]*)?)`
var biliPatt = regexp.MustCompile(biliUrlRegex)

var (
// ErrContextCanceled is returned when context is canceled
ErrContextCanceled = errors.New("context canceled")
)

func init() {
biliPatt.Longest()
}

// RegisterInlineHandler regiester inline mode handler
func RegisterInlineHandler(bot *tb.Bot, conf *config.Config) {
bot.Handle(tb.OnQuery, handler(conf))
}

func handler(conf *config.Config) func(ctx tb.Context) error {
return func(ctx tb.Context) error {
q := ctx.Query()
text := q.Text

exs := urlx.ExtractStr(text)
log.Debug("extracted urls", zap.String("origin", text), zap.Any("urls", exs))

buf := bytes.NewBufferString("")
err := writeAll(buf, exs)
if err != nil {
log.Error("write all error", zap.Error(err))
return err
}

reText := buf.String()
log.Debug("replaced text", zap.String("origin", text), zap.String("replaced", reText))
reTextEscaped := util.EscapeTelegramReservedChars(reText)
err = ctx.Answer(&tb.QueryResponse{
Results: tb.Results{
&tb.ArticleResult{
ResultBase: tb.ResultBase{
ParseMode: tb.ModeMarkdownV2,
},
Title: "发送",
Description: reText,
Text: reTextEscaped,
},
},
})
if err != nil {
log.Error("inline mode answer error", zap.Error(err))
}
return nil
}
}

func writeAll(buf *bytes.Buffer, exs []*urlx.Extra) error {
for _, e := range exs {
if e.Type == urlx.TypeUrl {
err := writeUrl(buf, e)
if err != nil {
return err
}
} else {
buf.WriteString(e.Text)
}
}
return nil
}

func writeUrl(buf *bytes.Buffer, e *urlx.Extra) error {
u := e.Url

if slices.Contains(biliDomains, strings.ToLower(u.Domain)) {
err := writeBiliUrl(buf, u)
return err
}

buf.WriteString(u.Text)
return nil
}
Loading

0 comments on commit 68610f3

Please sign in to comment.