Skip to content

Commit 7965700

Browse files
authoredMay 27, 2022
fix/misc fixes (csirtgadgets#152)
* support `location` field to fix cifv3 geo gatherer * https://github.com/csirtgadgets/bearded-avenger/blob/master/cif/gatherer/geo.py#L112 * strip tabs and newlines from str fields * tweak fqdn regex to more align with dns records, a la rfc2181 * ref https://regex101.com/r/FLA9Bv/59
1 parent 2d5bbaf commit 7965700

File tree

3 files changed

+43
-8
lines changed

3 files changed

+43
-8
lines changed
 

‎csirtg_indicator/constants.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
]
4444

4545
FIELDS_GEO = [
46-
'cc', 'latitude', 'timezone', 'longitude', 'city', 'region'
46+
'cc', 'latitude', 'timezone', 'longitude', 'city', 'region', 'location'
4747
]
4848

4949
FIELDS_IP = [
@@ -59,12 +59,8 @@
5959
# http://stackoverflow.com/a/17871737
6060
RE_IPV6 = re.compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))')
6161

62-
# http://goo.gl/Cztyn2 -- probably needs more work
63-
# http://stackoverflow.com/a/26987741/7205341
64-
# ^((xn--)?(--)?[a-zA-Z0-9-_@]+(-[a-zA-Z0-9]+)*\.)+[a-zA-Z]{2,}(--p1ai)?$
65-
#RE_FQDN = re.compile('^((?!-))(xn--)?[a-z0-9][a-z0-9-_\.]{0,61}[a-z0-9]{0,1}\.(xn--)?([a-z0-9\-]{1,61}|[a-z0-9-]{1,30}\.[a-z]{2,})$')
66-
# http://stackoverflow.com/questions/14402407/maximum-length-of-a-domain-name-without-the-http-www-com-parts
67-
RE_FQDN = re.compile(r'^((?!-))(xn--)?[a-z0-9][a-z0-9-_\.]{0,245}[a-z0-9]{0,1}\.(xn--)?([a-z0-9\-]{1,61}|[a-z0-9-]{1,30}\.[a-z]{2,})\.?$')
62+
# https://regex101.com/r/FLA9Bv/59
63+
RE_FQDN = re.compile(r'^(?!(?:[\w]+?\.)?\-[\w\.\-]*?)(?![\w\.]+?\-\.(?:[\w\.\-]+?))(?=[\w\.\-]*?[\w\.\-]*?)(?![\w\.\-]{254})(?!(?:\.?[\w\-\.]*?[\w\-]{64,}\.)+?)(?:[\w\-]+?\.)+?(?![^a-zA-Z])[A-Za-z0-9\-]{2,64}(?<!\-)\.?$')
6864
#RE_URI_SCHEMES = re.compile(r'^(https?|ftp)://')
6965
RE_URI_SCHEMES = re.compile(r'^(https?|ftp)$')
7066
RE_EMAIL = re.compile(r"^[-\w+.!#$%&'*\/=?^_`{|}~;]+@[-.0-9a-zA-Z][-.0-9a-zA-Z]*[a-zA-Z]{2,}$")

‎csirtg_indicator/indicator.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import textwrap
1616
import json
1717
import sys
18+
import re
1819
if sys.version_info > (3,):
1920
from urllib.parse import urlparse
2021
basestring = (str, bytes)
@@ -57,7 +58,7 @@ def __init__(self, indicator=None, version=PROTOCOL_VERSION, **kwargs):
5758

5859
if isinstance(kwargs[k], basestring):
5960
# always strip whitespace
60-
kwargs[k] = kwargs[k].strip()
61+
kwargs[k] = re.sub(r'\r|\t|\n', ' ', kwargs[k]).strip()
6162

6263
if self._lowercase is True and k != 'reference': # don't lower reference which may be a url
6364
kwargs[k] = kwargs[k].lower()

‎test/test_fqdn.py

+38
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from csirtg_indicator import Indicator
2+
from csirtg_indicator.exceptions import InvalidIndicator
23
from faker import Faker
4+
import pytest
35
fake = Faker()
46

57

@@ -13,6 +15,37 @@
1315
'laser-retargeting-server-production.us-east-1-prod-core-edge-public.spongecell.net',
1416
'example.org.',
1517
'an0ther.exAmple.orG.',
18+
'under_score.com',
19+
'_dc-mx.test.someotherdom.com.',
20+
]
21+
22+
BAD = [
23+
'dot.',
24+
'this.is-bad-',
25+
'this-too.is_bad-.',
26+
'this.is.bad-.com',
27+
'space com',
28+
'a.12E.',
29+
'192.168.1.13F',
30+
'underscore.c_om',
31+
'-dash.com',
32+
'dash-.com',
33+
'sub.-dash.com',
34+
'sub-.dash.com',
35+
'-.com',
36+
'-com',
37+
'.com',
38+
'com',
39+
'mkyong.t.t.c',
40+
'mkyong,com',
41+
'mkyong.com/users',
42+
'slash.com/',
43+
'a.123',
44+
'b.123.',
45+
'x.XN--VERMGENSBERATUNG-PWBBJALKJSDFWHATLADKDALKJSDFJWHATLEIUDWLAIFU',
46+
'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkk.com',
47+
'www.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkk.co.uk',
48+
'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcde.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.comm',
1649
]
1750

1851

@@ -45,6 +78,11 @@ def test_fqdn_ok():
4578
d = d.rstrip('.')
4679
assert e.indicator == d.lower()
4780

81+
def test_fqdn_not_ok():
82+
for d in BAD:
83+
with pytest.raises(InvalidIndicator):
84+
e = Indicator(d)
85+
4886

4987
def test_fqdn_subdomain():
5088
data = [

0 commit comments

Comments
 (0)
Please sign in to comment.