Skip to content

Commit 6c1f58e

Browse files
committed
Add headers field in ruleset. Enable Google Cache.
1 parent d3c995d commit 6c1f58e

File tree

6 files changed

+133
-58
lines changed

6 files changed

+133
-58
lines changed

README.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,11 @@ See in [ruleset.yaml](ruleset.yaml) for an example.
124124
domains: # Additional domains to apply the rule
125125
- www.example.com
126126
- www.beispiel.de
127+
headers:
128+
x-forwarded-for: none # override X-Forwarded-For header or delete with none
129+
referer: none # override Referer header or delete with none
130+
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
131+
cookie: privacy=1
127132
regexRules:
128133
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
129134
replace: <script $1 script="/https://www.example.com/$3"
@@ -138,7 +143,7 @@ See in [ruleset.yaml](ruleset.yaml) for an example.
138143
- domain: www.anotherdomain.com # Domain where the rule applies
139144
paths: # Paths where the rule applies
140145
- /article
141-
googleCache: false # Search also in Google Cache
146+
googleCache: false # Use Google Cache to fetch the content
142147
regexRules: # Regex rules to apply
143148
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
144149
replace: <script $1 script="/https://www.example.com/$3"

cmd/main.go

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ func main() {
4747
app := fiber.New(
4848
fiber.Config{
4949
Prefork: *prefork,
50+
GETOnly: true,
5051
},
5152
)
5253

handlers/proxy.go

+78-55
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,45 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
6060
log.Println(u.String() + urlQuery)
6161
}
6262

63+
rule := fetchRule(u.Host, u.Path)
64+
65+
if rule.GoogleCache {
66+
u, err = url.Parse("https://webcache.googleusercontent.com/search?q=cache:" + u.String())
67+
if err != nil {
68+
return "", nil, nil, err
69+
}
70+
}
71+
6372
// Fetch the site
6473
client := &http.Client{}
6574
req, _ := http.NewRequest("GET", u.String()+urlQuery, nil)
66-
req.Header.Set("User-Agent", UserAgent)
67-
req.Header.Set("X-Forwarded-For", ForwardedFor)
68-
req.Header.Set("Referer", u.String())
69-
req.Header.Set("Host", u.Host)
75+
76+
if rule.Headers.UserAgent != "" {
77+
req.Header.Set("User-Agent", rule.Headers.UserAgent)
78+
} else {
79+
req.Header.Set("User-Agent", UserAgent)
80+
}
81+
82+
if rule.Headers.XForwardedFor != "" {
83+
if rule.Headers.XForwardedFor != "none" {
84+
req.Header.Set("X-Forwarded-For", rule.Headers.XForwardedFor)
85+
}
86+
} else {
87+
req.Header.Set("X-Forwarded-For", ForwardedFor)
88+
}
89+
90+
if rule.Headers.Referer != "" {
91+
if rule.Headers.Referer != "none" {
92+
req.Header.Set("Referer", rule.Headers.Referer)
93+
}
94+
} else {
95+
req.Header.Set("Referer", u.String())
96+
}
97+
98+
if rule.Headers.Cookie != "" {
99+
req.Header.Set("Cookie", rule.Headers.Cookie)
100+
}
101+
70102
resp, err := client.Do(req)
71103

72104
if err != nil {
@@ -79,11 +111,12 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
79111
return "", nil, nil, err
80112
}
81113

82-
body := rewriteHtml(bodyB, u)
114+
log.Print("rule", rule)
115+
body := rewriteHtml(bodyB, u, rule)
83116
return body, req, resp, nil
84117
}
85118

86-
func rewriteHtml(bodyB []byte, u *url.URL) string {
119+
func rewriteHtml(bodyB []byte, u *url.URL, rule Rule) string {
87120
// Rewrite the HTML
88121
body := string(bodyB)
89122

@@ -104,7 +137,7 @@ func rewriteHtml(bodyB []byte, u *url.URL) string {
104137
body = strings.ReplaceAll(body, "href=\"https://"+u.Host, "href=\"/https://"+u.Host+"/")
105138

106139
if os.Getenv("RULESET") != "" {
107-
body = applyRules(u.Host, u.Path, body)
140+
body = applyRules(body, rule)
108141
}
109142
return body
110143
}
@@ -169,67 +202,57 @@ func loadRules() RuleSet {
169202
return ruleSet
170203
}
171204

172-
func applyRules(domain string, path string, body string) string {
205+
func fetchRule(domain string, path string) Rule {
173206
if len(rulesSet) == 0 {
174-
return body
207+
return Rule{}
175208
}
176-
209+
rule := Rule{}
177210
for _, rule := range rulesSet {
178211
domains := rule.Domains
179212
domains = append(domains, rule.Domain)
180213
for _, ruleDomain := range domains {
181-
if ruleDomain != domain {
182-
continue
183-
}
184-
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
185-
continue
186-
}
187-
for _, regexRule := range rule.RegexRules {
188-
re := regexp.MustCompile(regexRule.Match)
189-
body = re.ReplaceAllString(body, regexRule.Replace)
190-
}
191-
for _, injection := range rule.Injections {
192-
doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
193-
if err != nil {
194-
log.Fatal(err)
195-
}
196-
if injection.Replace != "" {
197-
doc.Find(injection.Position).ReplaceWithHtml(injection.Replace)
198-
}
199-
if injection.Append != "" {
200-
doc.Find(injection.Position).AppendHtml(injection.Append)
201-
}
202-
if injection.Prepend != "" {
203-
doc.Find(injection.Position).PrependHtml(injection.Prepend)
204-
}
205-
body, err = doc.Html()
206-
if err != nil {
207-
log.Fatal(err)
214+
if ruleDomain == domain {
215+
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
216+
continue
208217
}
218+
// return first match
219+
return rule
209220
}
210221
}
211222
}
212-
213-
return body
223+
return rule
214224
}
215225

216-
type Rule struct {
217-
Match string `yaml:"match"`
218-
Replace string `yaml:"replace"`
219-
}
226+
func applyRules(body string, rule Rule) string {
227+
if len(rulesSet) == 0 {
228+
return body
229+
}
220230

221-
type RuleSet []struct {
222-
Domain string `yaml:"domain"`
223-
Domains []string `yaml:"domains,omitempty"`
224-
Paths []string `yaml:"paths,omitempty"`
225-
GoogleCache bool `yaml:"googleCache,omitempty"`
226-
RegexRules []Rule `yaml:"regexRules"`
227-
Injections []struct {
228-
Position string `yaml:"position"`
229-
Append string `yaml:"append"`
230-
Prepend string `yaml:"prepend"`
231-
Replace string `yaml:"replace"`
232-
} `yaml:"injections"`
231+
for _, regexRule := range rule.RegexRules {
232+
re := regexp.MustCompile(regexRule.Match)
233+
body = re.ReplaceAllString(body, regexRule.Replace)
234+
}
235+
for _, injection := range rule.Injections {
236+
doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
237+
if err != nil {
238+
log.Fatal(err)
239+
}
240+
if injection.Replace != "" {
241+
doc.Find(injection.Position).ReplaceWithHtml(injection.Replace)
242+
}
243+
if injection.Append != "" {
244+
doc.Find(injection.Position).AppendHtml(injection.Append)
245+
}
246+
if injection.Prepend != "" {
247+
doc.Find(injection.Position).PrependHtml(injection.Prepend)
248+
}
249+
body, err = doc.Html()
250+
if err != nil {
251+
log.Fatal(err)
252+
}
253+
}
254+
255+
return body
233256
}
234257

235258
func StringInSlice(s string, list []string) bool {

handlers/proxy.test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func TestRewriteHtml(t *testing.T) {
5151
</html>
5252
`
5353

54-
actual := rewriteHtml(bodyB, u)
54+
actual := rewriteHtml(bodyB, u, Rule{})
5555
assert.Equal(t, expected, actual)
5656
}
5757

handlers/types.go

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package handlers
2+
3+
type Regex struct {
4+
Match string `yaml:"match"`
5+
Replace string `yaml:"replace"`
6+
}
7+
8+
type RuleSet []Rule
9+
10+
type Rule struct {
11+
Domain string `yaml:"domain"`
12+
Domains []string `yaml:"domains,omitempty"`
13+
Paths []string `yaml:"paths,omitempty"`
14+
Headers struct {
15+
UserAgent string `yaml:"user-agent,omitempty"`
16+
XForwardedFor string `yaml:"x-forwarded-for,omitempty"`
17+
Referer string `yaml:"referer,omitempty"`
18+
Cookie string `yaml:"cookie,omitempty"`
19+
} `yaml:"headers,omitempty"`
20+
GoogleCache bool `yaml:"googleCache,omitempty"`
21+
RegexRules []Regex `yaml:"regexRules"`
22+
Injections []struct {
23+
Position string `yaml:"position"`
24+
Append string `yaml:"append"`
25+
Prepend string `yaml:"prepend"`
26+
Replace string `yaml:"replace"`
27+
} `yaml:"injections"`
28+
}

ruleset.yaml

+19-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
- domain: www.example.com
22
domains:
3-
- www.beispiel.com
3+
- www.beispiel.de
4+
googleCache: true
5+
headers:
6+
x-forwarded-for: none
7+
referer: none
8+
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
9+
cookie: privacy=1
410
regexRules:
511
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
612
replace: <script $1 script="/https://www.example.com/$3"
@@ -77,6 +83,10 @@
7783
- domains:
7884
- www.nytimes.com
7985
- www.time.com
86+
headers:
87+
ueser-agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
88+
cookie: nyt-a=; nyt-gdpr=0; nyt-geo=DE; nyt-privacy=1
89+
referer: https://www.google.com/
8090
injections:
8191
- position: head
8292
append: |
@@ -146,3 +156,11 @@
146156
headimage.forEach(image => { image.style.filter = ''; });
147157
});
148158
</script>
159+
- domain: www.medium.com
160+
domains:
161+
- medium.com
162+
headers:
163+
referer: https://t.co/x?amp=1
164+
x-forwarded-for: none
165+
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
166+
cookie:

0 commit comments

Comments
 (0)