-
Notifications
You must be signed in to change notification settings - Fork 865
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added probe to identify copyright year #1955
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
package httpx | ||
|
||
import ( | ||
"regexp" | ||
"sort" | ||
"strings" | ||
) | ||
|
||
var crreYear = regexp.MustCompile(`(?:copyright|Copyright|COPYRIGHT|\(C\)|\(c\)|©|©|©)?\s*(?:[a-zA-Z0-9 ,-]+\s*)?[\s,]*(199[0-9]|20[0-1][0-9]|202[0-4])[\s,<-]+(?:copyright|Copyright|COPYRIGHT|\(C\)|\(c\)|©|©|©|199[0-9]|20[0-1][0-9]|202[0-4])?`) | ||
|
||
|
||
func cleanText(text string) string { | ||
text = strings.ReplaceAll(text, "<span>", "") | ||
text = strings.ReplaceAll(text, "</span>", "") | ||
text = strings.ReplaceAll(text, "\u00a0", " ") | ||
text = strings.ReplaceAll(text, "©", "©") | ||
text = strings.ReplaceAll(text, "–", "-") | ||
text = strings.ReplaceAll(text, "-->", "") | ||
text = strings.ReplaceAll(text, "<!--", "") | ||
return text | ||
} | ||
|
||
// ExtractCopyright extracts all copyright dates or years from the raw response body and returns them as a space-delimited string | ||
func ExtractCopyright(resp *Response) string { | ||
var years []string // To store all matched years | ||
var copyrightyears []string // To store any bonafide copyrights | ||
var copyrightresults string // Declare variables outside the blocks | ||
var yearresults string | ||
|
||
// Convert response data to string and clean it | ||
textContent := string(resp.Data) | ||
textContent = cleanText(textContent) | ||
|
||
|
||
// Apply regex to extract the years and check for indicators | ||
matches := crreYear.FindAllStringSubmatch(textContent, -1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we expect multiple copyright text in pages? If not, we should rethink post-processing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The regex will match strings like Copyright 2024, as well as Copyright 1995-2001, in which case it will display both dates. |
||
for _, match := range matches { | ||
year := strings.TrimSpace(match[1]) | ||
|
||
// Check if the year has a copyright indicator around it | ||
if strings.Contains(match[0], "copyright") || strings.Contains(match[0], "Copyright") || strings.Contains(match[0], "COPYRIGHT") || strings.Contains(match[0], "(C)") || strings.Contains(match[0], "(c)") || strings.Contains(match[0], "©") || strings.Contains(match[0], "©") || strings.Contains(match[0], "©") { | ||
copyrightyears = append(copyrightyears, year) | ||
} | ||
|
||
years = append(years, year) | ||
} | ||
|
||
// If we have any copyrights found, craft our string | ||
if len(copyrightyears) > 0 { | ||
// Sort, unique, and flatten our array | ||
sort.Strings(copyrightyears) | ||
|
||
// Make the years list unique | ||
uniqueCopyrightYears := make([]string, 0, len(copyrightyears)) | ||
seen := make(map[string]bool) | ||
for _, copyrightyear := range copyrightyears { | ||
if !seen[copyrightyear] { | ||
uniqueCopyrightYears = append(uniqueCopyrightYears, copyrightyear) | ||
seen[copyrightyear] = true | ||
} | ||
} | ||
|
||
green := "\033[32m" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're using |
||
reset := "\033[0m" | ||
copyrightresults = "Copyright: " + green + strings.Join(uniqueCopyrightYears, " ") + reset | ||
return copyrightresults | ||
} | ||
|
||
if len(years) > 0 { | ||
sort.Strings(years) | ||
|
||
// Make the years list unique | ||
uniqueYears := make([]string, 0, len(years)) | ||
seen := make(map[string]bool) | ||
for _, year := range years { | ||
if !seen[year] { | ||
uniqueYears = append(uniqueYears, year) | ||
seen[year] = true | ||
} | ||
} | ||
yearresults = "Possible Years: " + strings.Join(uniqueYears, " ") | ||
return yearresults | ||
} | ||
|
||
return "" | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -245,6 +245,7 @@ func New(options *Options) (*Runner, error) { | |
runner.options.protocol = httpx.HTTPorHTTPS | ||
scanopts.VHost = options.VHost | ||
scanopts.OutputTitle = options.ExtractTitle | ||
scanopts.OutputCopyright = options.ExtractCopyright | ||
scanopts.OutputStatusCode = options.StatusCode | ||
scanopts.OutputLocation = options.Location | ||
scanopts.OutputContentLength = options.ContentLength | ||
|
@@ -1800,6 +1801,21 @@ retry: | |
builder.WriteRune(']') | ||
} | ||
|
||
var copyright string | ||
if httpx.CanHaveTitleTag(resp.GetHeaderPart("Content-Type", ";")) { | ||
copyright = httpx.ExtractCopyright(resp) // This will return a space-delimited string of years | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we extract copyright text here and not under There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I basically just copied the same functions that exist for Title extraction, but changed them to copyright instead. If there is a better way, or if I mis-copied that format from Title extraction, happy to mod. |
||
} | ||
|
||
if scanopts.OutputCopyright && copyright != "" { | ||
builder.WriteString(" [") | ||
if !scanopts.OutputWithNoColor { | ||
builder.WriteString(aurora.Cyan(copyright).String()) | ||
} else { | ||
builder.WriteString(copyright) | ||
} | ||
builder.WriteRune(']') | ||
} | ||
|
||
var bodyPreview string | ||
if r.options.ResponseBodyPreviewSize > 0 && resp != nil { | ||
bodyPreview = string(resp.Data) | ||
|
@@ -2243,6 +2259,7 @@ retry: | |
Location: resp.GetHeaderPart("Location", ";"), | ||
ContentType: resp.GetHeaderPart("Content-Type", ";"), | ||
Title: title, | ||
Copyright: copyright, | ||
str: builder.String(), | ||
VHost: isvhost, | ||
WebServer: serverHeader, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're limiting the year statically with 2024. This must be dynamic. It'll not detect
© 2025 Dummy Media Group. All Rights Reserved.
, for example.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes sense, I'll extend it through 2029 if that is acceptable. I am trying to avoid false positives so trying to keep it to a realistic range.