-
Notifications
You must be signed in to change notification settings - Fork 311
TAXII Collector bot and STIX Parser bot #2611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
316af6b
873bef8
fd8ed84
38d323c
a9b07cc
c43b644
2382721
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# SPDX-FileCopyrightText: 2025 Ladislav Baco | ||
# SPDX-License-Identifier: AGPL-3.0-or-later | ||
|
||
taxii2-client>=2.3.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
""" | ||
SPDX-FileCopyrightText: 2025 Ladislav Baco | ||
SPDX-License-Identifier: AGPL-3.0-or-later | ||
|
||
Get indicator objects from TAXII server | ||
|
||
Configuration parameters: taxii collection (feed) url, username and password. | ||
""" | ||
|
||
import datetime | ||
import json | ||
from requests.exceptions import HTTPError | ||
|
||
from intelmq.lib.bot import CollectorBot | ||
from intelmq.lib.exceptions import MissingDependencyError | ||
|
||
try: | ||
import taxii2client.v21 as taxii2 | ||
except ImportError: | ||
taxii2 = None | ||
|
||
|
||
class TaxiiCollectorBot(CollectorBot): | ||
"""Collect data from TAXII Server""" | ||
collection: str = None | ||
username: str = None | ||
password: str = None | ||
rate_limit: int = 3600 | ||
time_delta: int = 3600 | ||
|
||
def init(self): | ||
if taxii2 is None: | ||
raise MissingDependencyError('taxii2-client') | ||
|
||
if self.collection is None: | ||
raise ValueError('No TAXII collection URL provided.') | ||
if self.username is None: | ||
raise ValueError('No TAXII username provided.') | ||
if self.password is None: | ||
raise ValueError('No TAXII password provided.') | ||
|
||
self._date_after = datetime.datetime.now() - datetime.timedelta(seconds=int(self.time_delta)) | ||
|
||
self._taxii_collection = taxii2.Collection(self.collection, user=self.username, password=self.password) | ||
|
||
def process(self): | ||
try: | ||
title = self._taxii_collection.title | ||
self.logger.info('Collection title: %r.', title) | ||
|
||
# get the indicator objects | ||
objects = self._taxii_collection.get_objects(added_after=self._date_after, type='indicator').get('objects', []) | ||
for obj in objects: | ||
report = self.new_report() | ||
report.add('raw', json.dumps(obj)) | ||
report.add('feed.url', self.collection) | ||
report.add('feed.code', title) | ||
self.send_message(report) | ||
|
||
except HTTPError as e: | ||
self.logger.error('Connection error: %r!', e) | ||
|
||
|
||
BOT = TaxiiCollectorBot |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# SPDX-FileCopyrightText: 2025 Ladislav Baco | ||
# SPDX-License-Identifier: AGPL-3.0-or-later | ||
|
||
stix2-patterns>=2.0.0 |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,149 @@ | ||||||
""" | ||||||
SPDX-FileCopyrightText: 2025 Ladislav Baco | ||||||
SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
|
||||||
Parse indicators objects in STIX format received from TAXII collector | ||||||
""" | ||||||
|
||||||
import json | ||||||
|
||||||
|
||||||
from intelmq.lib.bot import ParserBot | ||||||
from intelmq.lib.exceptions import MissingDependencyError | ||||||
|
||||||
try: | ||||||
import stix2patterns.v21.pattern as stix2_pattern | ||||||
except ImportError: | ||||||
stix2_pattern = None | ||||||
|
||||||
|
||||||
class StixParserBot(ParserBot): | ||||||
"""Parse STIX indicators""" | ||||||
parse = ParserBot.parse_json_stream | ||||||
recover_line = ParserBot.recover_line_json_stream | ||||||
|
||||||
def init(self): | ||||||
if stix2_pattern is None: | ||||||
raise MissingDependencyError('stix2-patterns') | ||||||
|
||||||
def parse_line(self, line, report): | ||||||
""" Parse one STIX object of indicator type """ | ||||||
object_type = line.get('type', '') | ||||||
if object_type == 'indicator': | ||||||
pattern = line.get('pattern', '') | ||||||
# stix, pcre, sigma, snort, suricata, yara | ||||||
pattern_type = line.get('pattern_type', '') | ||||||
|
||||||
if pattern_type == 'stix': | ||||||
indicators = StixParserBot.parse_stix_pattern(pattern, self.logger) | ||||||
for indicator_type, indicator_value in indicators: | ||||||
event = self.new_event(report) | ||||||
event.add('raw', json.dumps(line)) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can use the unparsed line by the json parser without the need of encoding it again:
Suggested change
|
||||||
event.add('comment', line.get('description', '')) | ||||||
event.add('extra.labels', line.get('labels', None)) | ||||||
event.add('time.source', line.get('valid_from', '1970-01-01T00:00:00Z')) | ||||||
|
||||||
# IP address may be passed in Domain feeds or Domain may be passed in URL feeds | ||||||
# It violates the STIX format, however, in some sources it happens (e.g. in ETI) | ||||||
# Drop such events without failures and exceptions which slowing down the processing | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean this?
Suggested change
|
||||||
event.add(indicator_type, indicator_value, raise_failure=False) | ||||||
|
||||||
# classification can be overridden by vendor-specific parser below | ||||||
event.add('classification.type', 'undetermined') | ||||||
self.parse_vendor_specific(event, line, report) | ||||||
yield event | ||||||
else: | ||||||
self.logger.warning('Unexpected type of pattern expression: %r, pattern: %r', pattern_type, pattern) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as with object types. |
||||||
else: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it really OK to skip every other type? if it is valid, it must at least be mentioned in the documentation. |
||||||
self.logger.warning('Unexpected type of STIX object: %r', object_type) | ||||||
|
||||||
def parse_vendor_specific(self, event, line, report): | ||||||
""" | ||||||
Parse vendor specific details from the STIX 2.1 Indicator object. | ||||||
This method by default does nothing and it is called just before IntelMQ event is yielded. | ||||||
If we need vendor-specific STIX parser, we can inherit from this class and override this one method. | ||||||
""" | ||||||
return | ||||||
|
||||||
@staticmethod | ||||||
def _get_value_from_comparison_expression(comparison, logger=None): | ||||||
""" | ||||||
STIX Comparison Expressions: | ||||||
https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_boiciucr9smf | ||||||
|
||||||
comparison is a tuple obtained from stix2patterns.v21.pattern.Pattern(pattern).inspect().comparisons, | ||||||
e.g. (['value'], '=', "'http://example.org'"), (['value'], '=', "'127.0.0.1/32'") | ||||||
""" | ||||||
if len(comparison) != 3: | ||||||
if logger: | ||||||
logger.warning('Unexpected Comparison Expressions. Expression: {}'.format(comparison)) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
return | ||||||
|
||||||
property_name, operator, value = comparison | ||||||
supported_property_names = [['value'], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a constant and can be defined on module or class level. Also: Can it be a tuples instead of lists? |
||||||
['hashes', 'MD5'], | ||||||
['hashes', 'SHA-1'], | ||||||
['hashes', 'SHA-256'], | ||||||
# Based on 10.7 Hashing Algorithm Vocabulary, these keys are not valid, but they are used in some feeds (e.g. ETI) | ||||||
# https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_ths0b11wzxv3 | ||||||
['hashes', 'SHA1'], | ||||||
['hashes', 'SHA256'] | ||||||
] | ||||||
if not (property_name in supported_property_names) or (operator != '=') or not value.startswith("'") or not value.endswith("'"): | ||||||
if logger: | ||||||
logger.info('Unsupported Comparison Expression. Only Comparison Expressions with "equal" comparison operator and "value" or "hashes" property are supported. Expression: {}'.format(comparison)) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
return | ||||||
|
||||||
# remove single quotes from returned value | ||||||
return value[1:-1] | ||||||
|
||||||
@staticmethod | ||||||
def parse_stix_pattern(pattern, logger=None): | ||||||
""" | ||||||
STIX Patterning: | ||||||
https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_e8slinrhxcc9 | ||||||
""" | ||||||
|
||||||
indicators = [] | ||||||
comparisons = stix2_pattern.Pattern(pattern).inspect().comparisons | ||||||
for key in comparisons.keys(): | ||||||
comparison_expressions = comparisons.get(key, []) | ||||||
for comparison in comparison_expressions: | ||||||
value = StixParserBot._get_value_from_comparison_expression(comparison, logger) | ||||||
if not value: | ||||||
pass | ||||||
if key == 'url': | ||||||
indicators.append(('source.url', value)) | ||||||
elif key == 'domain-name': | ||||||
indicators.append(('source.fqdn', value)) | ||||||
elif key == 'ipv4-addr': | ||||||
# remove port, sometimes the port is present in ETI | ||||||
value = value.split(':')[0] | ||||||
Comment on lines
+120
to
+121
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use the port for |
||||||
# strip CIDR if IPv4 network contains single host only | ||||||
value = value[:-3] if value.endswith('/32') else value | ||||||
# check if pattern is in CIDR notation | ||||||
if value.rfind('/') > -1: | ||||||
indicators.append(('source.network', value)) | ||||||
else: | ||||||
indicators.append(('source.ip', value)) | ||||||
elif key == 'ipv6-addr': | ||||||
# strip CIDR if IPv6 network contains single host only | ||||||
value = value[:-4] if value.endswith('/128') else value | ||||||
# check if pattern is in CIDR notation | ||||||
if value.rfind('/') > -1: | ||||||
indicators.append(('source.network', value)) | ||||||
else: | ||||||
indicators.append(('source.ip', value)) | ||||||
elif key == 'file': | ||||||
if len(comparison) == 3 and len(comparison[0]) == 2 and comparison[0][0] == 'hashes': | ||||||
# converts MD5, SHA-1, SHA1, SHA-256, SHA256 to md5, sha1, sha256 used in IntelMQ | ||||||
hash_algo = comparison[0][1].lower().replace('-', '') | ||||||
indicators.append(('malware.hash.' + hash_algo, value)) | ||||||
Comment on lines
+137
to
+141
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Either ensure here that no other hashes than the ones supported by IntelMQ are in the STIX data (and not, e.g. SHA 512), or in |
||||||
else: | ||||||
if logger: | ||||||
logger.warning('Unsupported Object Type "{}" in Pattern Expression. Pattern: {}'.format(key, pattern)) | ||||||
|
||||||
return indicators | ||||||
|
||||||
|
||||||
BOT = StixParserBot |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is the fallback value of an empty string necessary?