Skip to content

Commit 2d39d0b

Browse files
FIX/ENH: HttpMixin refactored and various fixes
**General** Removed 'requests' MissingDependencyError, because requests is a core lib from intelmq Removed HTTP variables from Bot class in favor of HttpMixin Removed trying to import requests in pipeline, its a core lib from intelmq Added additional configuration variables to HttpMixin ( from Bot class ) **Bots** GitHub API is now using HttpMixin MS Azure Collector is now using HttpMixin DO-Portal Expert is now using HttpMixin GeoHash using MissingDependencyError instead of ValueError (consistency) HttpContentExpert is now using HttpMixin HttpStatusExpert is now using HttpMixin NationalCERTContactCertATExpert is now using HttpMixin RDAPExpert is now using HttpMixin RIPEExpert is now using HttpMixin SplunkSavedSearchExpert is now using HttpMixin TuencyExpert is now using HttpMixin RestAPIOutput is now using HttpMixin **Bot tests** GitHub API Collector is now using requests_mock instead of MagicMock (consistency) RestAPI Output is now using correct headers Fixes #2150 Fixes #2137 Signed-off-by: Sebastian Waldbauer <[email protected]>
1 parent 8d1c926 commit 2d39d0b

File tree

23 files changed

+101
-245
lines changed

23 files changed

+101
-245
lines changed

intelmq/bots/collectors/github_api/REQUIREMENTS.txt

Lines changed: 0 additions & 4 deletions
This file was deleted.

intelmq/bots/collectors/github_api/_collector_github_api.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,10 @@
77
GITHUB API Collector bot
88
"""
99
import base64
10+
from requests import exceptions
1011

1112
from intelmq.lib.bot import CollectorBot
12-
13-
try:
14-
import requests
15-
except ImportError:
16-
requests = None
13+
from intelmq.lib.mixins import HttpMixin
1714

1815
static_params = {
1916
'headers': {
@@ -22,14 +19,11 @@
2219
}
2320

2421

25-
class GithubAPICollectorBot(CollectorBot):
22+
class GithubAPICollectorBot(CollectorBot, HttpMixin):
2623
basic_auth_username = None
2724
basic_auth_password = None
2825

2926
def init(self):
30-
if requests is None:
31-
raise ValueError('Could not import requests. Please install it.')
32-
3327
self.__user_headers = static_params['headers']
3428
if self.basic_auth_username is not None and self.basic_auth_password is not None:
3529
self.__user_headers.update(self.__produce_auth_header(self.basic_auth_username, self.basic_auth_password))
@@ -47,13 +41,13 @@ def process_request(self):
4741

4842
def github_api(self, api_path: str, **kwargs) -> dict:
4943
try:
50-
response = requests.get(f"{api_path}", params=kwargs, headers=self.__user_headers)
44+
response = self.http_get(api_path, headers=self.__user_headers, params=kwargs)
5145
if response.status_code == 401:
5246
# bad credentials
5347
raise ValueError(response.json()['message'])
5448
else:
5549
return response.json()
56-
except requests.RequestException:
50+
except exceptions.RequestException:
5751
raise ValueError(f"Unknown repository {api_path!r}.")
5852

5953
@staticmethod

intelmq/bots/collectors/github_api/collector_github_contents_api.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,14 @@
1414
'regex': file regex (DEFAULT = '*.json')
1515
"""
1616
import re
17+
from requests import exceptions
1718

1819
from intelmq.lib.exceptions import InvalidArgument
1920
from intelmq.bots.collectors.github_api._collector_github_api import GithubAPICollectorBot
21+
from intelmq.lib.mixins import HttpMixin
2022

21-
try:
22-
import requests
23-
except ImportError:
24-
requests = None
2523

26-
27-
class GithubContentsAPICollectorBot(GithubAPICollectorBot):
24+
class GithubContentsAPICollectorBot(GithubAPICollectorBot, HttpMixin):
2825
"Collect files from a GitHub repository via the API. Optionally with GitHub credentials."
2926
regex: str = None # TODO: could be re
3027
repository: str = None
@@ -62,7 +59,7 @@ def process_request(self):
6259
if item['extra'] != {}:
6360
report.add('extra.file_metadata', item['extra'])
6461
self.send_message(report)
65-
except requests.RequestException as e:
62+
except exceptions.RequestException as e:
6663
raise ConnectionError(e)
6764

6865
def __recurse_repository_files(self, base_api_url: str, extracted_github_files: list = None) -> list:
@@ -75,7 +72,7 @@ def __recurse_repository_files(self, base_api_url: str, extracted_github_files:
7572
elif github_file['type'] == 'file' and bool(re.search(self.regex, github_file['name'])):
7673
extracted_github_file_data = {
7774
'download_url': github_file['download_url'],
78-
'content': requests.get(github_file['download_url']).content,
75+
'content': self.http_get(github_file['download_url']).content,
7976
'extra': {}
8077
}
8178
for field_name in self.__extra_fields:

intelmq/bots/collectors/mail/collector_mail_url.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
"""
99
import io
1010
import re
11+
from requests import exceptions
1112

1213
from intelmq.lib.mixins import HttpMixin
1314
from intelmq.lib.splitreports import generate_reports
@@ -50,7 +51,7 @@ def process_message(self, uid, message):
5051
self.logger.info("Downloading report from %r.", url)
5152
try:
5253
resp = self.http_get(url)
53-
except requests.exceptions.Timeout:
54+
except exceptions.Timeout:
5455
self.logger.error("Request timed out %i times in a row." %
5556
self.http_timeout_max_tries)
5657
erroneous = True

intelmq/bots/collectors/microsoft/collector_azure.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from intelmq.lib.bot import CollectorBot
1313
from intelmq.lib.exceptions import MissingDependencyError
14-
from intelmq.lib.mixins import CacheMixin
14+
from intelmq.lib.mixins import CacheMixin, HttpMixin
1515

1616
try:
1717
from azure.storage.blob import ContainerClient
@@ -23,7 +23,7 @@
2323
create_configuration = None # noqa
2424

2525

26-
class MicrosoftAzureCollectorBot(CollectorBot, CacheMixin):
26+
class MicrosoftAzureCollectorBot(CollectorBot, CacheMixin, HttpMixin):
2727
"Fetch data blobs from a Microsoft Azure container"
2828
connection_string: str = "<insert your connection string here>"
2929
container_name: str = "<insert the container name>"

intelmq/bots/collectors/shodan/collector_stream.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from typing import List
2020

2121
from intelmq.lib.bot import CollectorBot
22+
from intelmq.lib.mixins import HttpMixin
2223

2324
try:
2425
import shodan
@@ -27,7 +28,7 @@
2728
shodan = None
2829

2930

30-
class ShodanStreamCollectorBot(CollectorBot):
31+
class ShodanStreamCollectorBot(CollectorBot, HttpMixin):
3132
"Collect the Shodan stream from the Shodan API"
3233
api_key: str = "<INSERT your API key>"
3334
countries: List[str] = []
@@ -36,7 +37,7 @@ def init(self):
3637
if shodan is None:
3738
raise ValueError("Library 'shodan' is needed but not installed.")
3839

39-
self.set_request_parameters()
40+
self.setup()
4041
if tuple(int(v) for v in pkg_resources.get_distribution("shodan").version.split('.')) <= (1, 8, 1):
4142
if self.proxy:
4243
raise ValueError('Proxies are given but shodan-python > 1.8.1 is needed for proxy support.')

intelmq/bots/experts/do_portal/expert.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,40 +8,26 @@
88
a "502 Bad Gateway" status code is treated the same as a timeout,
99
i.e. will be retried instead of a fail.
1010
"""
11-
try:
12-
import requests
13-
except ImportError:
14-
requests = None
15-
11+
from intelmq.lib.mixins import HttpMixin
1612
import intelmq.lib.utils as utils
1713
from intelmq.lib.bot import ExpertBot
1814

1915

20-
class DoPortalExpertBot(ExpertBot):
16+
class DoPortalExpertBot(ExpertBot, HttpMixin):
2117
"""Retrieve abuse contact information for the source IP address from a do-portal instance"""
2218
mode: str = "append"
2319
portal_api_key: str = None
2420
portal_url: str = None
2521

2622
def init(self):
27-
if requests is None:
28-
raise ValueError("Library 'requests' could not be loaded. Please install it.")
29-
30-
self.set_request_parameters()
31-
3223
self.url = self.portal_url + '/api/1.0/ripe/contact?cidr=%s'
3324
self.http_header.update({
3425
"Content-Type": "application/json",
3526
"Accept": "application/json",
3627
"API-Authorization": self.portal_api_key
3728
})
3829

39-
self.session = utils.create_request_session(self)
40-
retries = requests.urllib3.Retry.from_int(self.http_timeout_max_tries)
41-
retries.status_forcelist = [502]
42-
adapter = requests.adapters.HTTPAdapter(max_retries=retries)
43-
self.session.mount('http://', adapter)
44-
self.session.mount('https://', adapter)
30+
self.session = self.http_session()
4531

4632
def process(self):
4733
event = self.receive_message()

intelmq/bots/experts/geohash/expert.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
https://github.com/joyanujoy/geolib
1010
'''
1111
from intelmq.lib.bot import ExpertBot
12+
from intelmq.lib.exceptions import MissingDependencyError
1213

1314
try:
1415
from geolib import geohash
@@ -23,7 +24,7 @@ class GeohashExpertBot(ExpertBot):
2324

2425
def init(self):
2526
if not geohash:
26-
raise ValueError("Library 'geolib' is required, please install it.")
27+
raise MissingDependencyError("geolib")
2728

2829
def process(self):
2930
event = self.receive_message()

intelmq/bots/experts/http/expert_content.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
from typing import List
88

99
from intelmq.lib.bot import ExpertBot
10-
from intelmq.lib.utils import create_request_session
10+
from intelmq.lib.mixins import HttpMixin
1111

1212

13-
class HttpContentExpertBot(ExpertBot):
13+
class HttpContentExpertBot(ExpertBot, HttpMixin):
1414
"""
1515
Test if a given string is part of the content for a given URL
1616
@@ -29,8 +29,7 @@ class HttpContentExpertBot(ExpertBot):
2929
__session = None
3030

3131
def init(self):
32-
self.set_request_parameters()
33-
self.__session = create_request_session(self)
32+
self.__session = self.http_session()
3433

3534
def process(self):
3635
event = self.receive_message()

intelmq/bots/experts/http/expert_status.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@
88

99
from intelmq.lib.bot import ExpertBot
1010
from intelmq.lib.utils import create_request_session
11+
from intelmq.lib.mixins import HttpMixin
1112

1213

13-
class HttpStatusExpertBot(ExpertBot):
14+
class HttpStatusExpertBot(ExpertBot, HttpMixin):
1415
"""
1516
Fetch the HTTP Status for a given URL
1617
@@ -31,8 +32,7 @@ def process(self):
3132
event = self.receive_message()
3233

3334
if self.field in event:
34-
self.set_request_parameters()
35-
session = create_request_session(self)
35+
session = self.http_session()
3636

3737
try:
3838
response = session.get(event[self.field])

intelmq/bots/experts/national_cert_contact_certat/expert.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,22 @@
2020
"""
2121

2222
from intelmq.lib.bot import ExpertBot
23+
from intelmq.lib.mixins import HttpMixin
2324
from intelmq.lib.utils import create_request_session
2425
from intelmq.lib.exceptions import MissingDependencyError
2526

26-
try:
27-
import requests
28-
except ImportError:
29-
requests = None
30-
3127

3228
URL = 'https://contacts.cert.at/cgi-bin/abuse-nationalcert.pl'
3329

3430

35-
class NationalCERTContactCertATExpertBot(ExpertBot):
31+
class NationalCERTContactCertATExpertBot(ExpertBot, HttpMixin):
3632
"""Add country and abuse contact information from the CERT.at national CERT Contact Database. Set filter to true if you want to filter out events for Austria. Set overwrite_cc to true if you want to overwrite an existing country code value"""
3733
filter: bool = False
3834
http_verify_cert: bool = True
3935
overwrite_cc: bool = False
4036

4137
def init(self):
42-
if requests is None:
43-
raise MissingDependencyError("requests")
44-
45-
self.set_request_parameters()
46-
self.session = create_request_session(self)
38+
self.session = self.http_session()
4739

4840
def process(self):
4941
event = self.receive_message()

intelmq/bots/experts/rdap/expert.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,13 @@
33
# SPDX-License-Identifier: AGPL-3.0-or-later
44

55
# -*- coding: utf-8 -*-
6+
import requests
67
from intelmq.lib.bot import ExpertBot
78
from intelmq.lib.utils import create_request_session
8-
from intelmq.lib.exceptions import MissingDependencyError
9-
from intelmq.lib.mixins import CacheMixin
9+
from intelmq.lib.mixins import CacheMixin, HttpMixin
1010

11-
try:
12-
import requests
13-
except ImportError:
14-
requests = None
1511

16-
17-
class RDAPExpertBot(ExpertBot, CacheMixin):
12+
class RDAPExpertBot(ExpertBot, CacheMixin, HttpMixin):
1813
""" Get RDAP data"""
1914
rdap_order: list = ['abuse', 'technical', 'administrative', 'registrant', 'registrar']
2015
rdap_bootstrapped_servers: dict = {}
@@ -30,11 +25,7 @@ class RDAPExpertBot(ExpertBot, CacheMixin):
3025
__session: requests.Session
3126

3227
def init(self):
33-
if requests is None:
34-
raise MissingDependencyError("requests")
35-
36-
self.set_request_parameters()
37-
self.__session = create_request_session(self)
28+
self.__session = self.http_session()
3829

3930
# get overall rdap data from iana
4031
resp = self.__session.get('https://data.iana.org/rdap/dns.json')
@@ -73,7 +64,7 @@ def process(self):
7364
if result:
7465
event.add('source.abuse_contact', result, overwrite=self.overwrite)
7566
else:
76-
self.__session = create_request_session(self)
67+
self.__session = self.http_session()
7768
domain_parts = url.split('.')
7869
domain_suffix = None
7970
while domain_suffix is None:

intelmq/bots/experts/ripe/expert.py

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,7 @@
1515
import intelmq.lib.utils as utils
1616
from intelmq.lib.bot import ExpertBot
1717
from intelmq.lib.exceptions import MissingDependencyError
18-
from intelmq.lib.mixins import CacheMixin
19-
20-
try:
21-
import requests
22-
except ImportError:
23-
requests = None
18+
from intelmq.lib.mixins import CacheMixin, HttpMixin
2419

2520

2621
STATUS_CODE_ERROR = 'HTTP status code was {}. Possible problem at the connection endpoint or network issue.'
@@ -41,7 +36,7 @@ def clean_geo(geo_data):
4136
return geo_data
4237

4338

44-
class RIPEExpertBot(ExpertBot, CacheMixin):
39+
class RIPEExpertBot(ExpertBot, CacheMixin, HttpMixin):
4540
"""Fetch abuse contact and/or geolocation information for the source and/or destination IP addresses and/or ASNs of the events"""
4641
mode: str = "append"
4742
query_ripe_db_asn: bool = True
@@ -77,22 +72,14 @@ class RIPEExpertBot(ExpertBot, CacheMixin):
7772
}
7873

7974
def init(self):
80-
if requests is None:
81-
raise MissingDependencyError("requests")
82-
8375
self.__query = {
8476
"db_asn": self.query_ripe_db_asn,
8577
"db_ip": self.query_ripe_db_ip,
8678
"stat_asn": self.query_ripe_stat_asn,
8779
"stat_ip": self.query_ripe_stat_ip,
8880
"stat_geo": self.query_ripe_stat_geolocation,
8981
}
90-
91-
self.__initialize_http_session()
92-
93-
def __initialize_http_session(self):
94-
self.set_request_parameters()
95-
self.http_session = utils.create_request_session(self)
82+
self.session = self.http_session()
9683

9784
def process(self):
9885
event = self.receive_message()
@@ -134,8 +121,8 @@ def __perform_cached_query(self, type, resource):
134121
else:
135122
return json.loads(cached_value)
136123
else:
137-
response = self.http_session.get(self.QUERY[type].format(resource),
138-
data="", timeout=self.http_timeout_sec)
124+
response = self.session.get(self.QUERY[type].format(resource),
125+
data="", timeout=self.http_timeout_sec)
139126

140127
if response.status_code != 200:
141128
if type == 'db_asn' and response.status_code == 404:

0 commit comments

Comments
 (0)