Skip to content

Commit 39ee476

Browse files
kevin-bateslresende
authored andcommitted
Honor http policy configured in yarn-site xml (#41)
In order to allow clients to have a means of using https for their resource manager, this change enables that behavior ONLY in cases where no resource manager address is provided. In this case, the module will first check if the http policy is HTTPS_ONLY and, in such case, construct the appropriate endpoint. Note that the changes are minimally evasive so as to prevent clients from having to change code.
1 parent 6602471 commit 39ee476

File tree

5 files changed

+78
-18
lines changed

5 files changed

+78
-18
lines changed

tests/test_hadoop_conf.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@
2828
<name>yarn.resourcemanager.webapp.address</name>
2929
<value>localhost:8022</value>
3030
</property>
31+
<property>
32+
<name>yarn.resourcemanager.webapp.https.address</name>
33+
<value>localhost:8024</value>
34+
</property>
35+
<property>
36+
<name>yarn.http.policy</name>
37+
<value>HTTPS_ONLY</value>
38+
</property>
3139
</configuration>
3240
""".encode('latin1')
3341

@@ -42,6 +50,14 @@ def test_parse(self):
4250
value = hadoop_conf.parse(f.name, key)
4351
self.assertEqual('localhost:8022', value)
4452

53+
key = 'yarn.resourcemanager.webapp.https.address'
54+
value = hadoop_conf.parse(f.name, key)
55+
self.assertEqual('localhost:8024', value)
56+
57+
key = 'yarn.http.policy'
58+
value = hadoop_conf.parse(f.name, key)
59+
self.assertEqual('HTTPS_ONLY', value)
60+
4561
with NamedTemporaryFile() as f:
4662
f.write(empty_config)
4763
f.flush()
@@ -50,6 +66,14 @@ def test_parse(self):
5066
value = hadoop_conf.parse(f.name, key)
5167
self.assertEqual(None, value)
5268

69+
key = 'yarn.resourcemanager.webapp.https.address'
70+
value = hadoop_conf.parse(f.name, key)
71+
self.assertEqual(None, value)
72+
73+
key = 'yarn.http.policy'
74+
value = hadoop_conf.parse(f.name, key)
75+
self.assertEqual(None, value)
76+
5377
def test_get_resource_host_port(self):
5478
with patch('yarn_api_client.hadoop_conf.parse') as parse_mock:
5579
with patch('yarn_api_client.hadoop_conf._get_rm_ids') as get_rm_ids_mock:
@@ -100,9 +124,10 @@ def test_get_rm_ids(self):
100124
rm_list = hadoop_conf._get_rm_ids(hadoop_conf.CONF_DIR)
101125
self.assertIsNone(rm_list)
102126

127+
@mock.patch('yarn_api_client.hadoop_conf._is_https_only')
103128
@mock.patch(_http_request_method)
104129
@mock.patch(_http_getresponse_method)
105-
def test_check_is_active_rm(self, http_getresponse_mock, http_conn_request_mock):
130+
def test_check_is_active_rm(self, http_getresponse_mock, http_conn_request_mock, is_https_only_mock):
106131
class ResponseMock():
107132
def __init__(self, status, header_dict):
108133
self.status = status
@@ -114,6 +139,7 @@ def getheader(self, header_key, default_return):
114139
else:
115140
return default_return
116141

142+
is_https_only_mock.return_value = False
117143
http_conn_request_mock.return_value = None
118144
http_getresponse_mock.return_value = ResponseMock(OK, {})
119145
self.assertTrue(hadoop_conf.check_is_active_rm('example2', '8022'))

tests/test_resource_manager.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,27 @@
44

55
from yarn_api_client.resource_manager import ResourceManager
66
from yarn_api_client.errors import IllegalArgumentError
7+
from yarn_api_client.hadoop_conf import _is_https_only
78

89

910
@patch('yarn_api_client.resource_manager.ResourceManager.request')
1011
class ResourceManagerTestCase(TestCase):
1112
def setUp(self):
1213
self.rm = ResourceManager('localhost')
1314

15+
@patch('yarn_api_client.resource_manager._is_https_only')
1416
@patch('yarn_api_client.resource_manager.get_resource_manager_host_port')
15-
def test__init__(self, get_config_mock, request_mock):
16-
get_config_mock.return_value = (None, None)
17-
ResourceManager()
17+
def test__init__(self, get_config_mock, is_https_only_mock, request_mock):
18+
get_config_mock.return_value = ('example', '8024')
19+
is_https_only_mock.return_value = True
20+
21+
rm = ResourceManager()
22+
1823
get_config_mock.assert_called_with()
24+
self.assertEqual(rm.address, 'example')
25+
self.assertEqual(rm.port, '8024')
26+
is_https_only_mock.assert_called_with()
27+
self.assertEqual(rm.is_https, True)
1928

2029
def test_cluster_information(self, request_mock):
2130
self.rm.cluster_information()

yarn_api_client/base.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ class BaseYarnAPI(object):
1616
__logger = None
1717
response_class = Response
1818

19-
def __init__(self, address=None, port=None, timeout=None, kerberos_enabled=None):
20-
self.address, self.port, self.timeout, self.kerberos_enabled = address, port, timeout, kerberos_enabled
19+
def __init__(self, address=None, port=None, timeout=None, kerberos_enabled=None, is_https=False):
20+
self.address, self.port, self.timeout, self.kerberos_enabled, self.is_https = \
21+
address, port, timeout, kerberos_enabled, is_https
2122

2223
def _validate_configuration(self):
2324
if self.address is None:
@@ -26,7 +27,8 @@ def _validate_configuration(self):
2627
raise ConfigurationError('API port is not set')
2728

2829
def request(self, api_path, method='GET', **kwargs):
29-
api_endpoint = 'http://{}:{}{}'.format(self.address, self.port, api_path)
30+
scheme = 'https' if self.is_https else 'http'
31+
api_endpoint = '{}://{}:{}{}'.format(scheme, self.address, self.port, api_path)
3032

3133
self.logger.info('API Endpoint {}'.format(api_endpoint))
3234

yarn_api_client/hadoop_conf.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
import os
33
import xml.etree.ElementTree as ET
44
try:
5-
from httplib import HTTPConnection, OK
5+
from httplib import HTTPConnection, HTTPSConnection, OK
66
except ImportError:
7-
from http.client import HTTPConnection, OK
7+
from http.client import HTTPConnection, HTTPSConnection, OK
88

99
CONF_DIR = os.getenv('HADOOP_CONF_DIR', '/etc/hadoop/conf')
1010

@@ -15,25 +15,45 @@ def _get_rm_ids(hadoop_conf_path):
1515
rm_ids = rm_ids.split(',')
1616
return rm_ids
1717

18+
1819
def _get_maximum_container_memory(hadoop_conf_path):
1920
container_memory = int(parse(os.path.join(hadoop_conf_path,'yarn-site.xml'), 'yarn.nodemanager.resource.memory-mb'))
2021
return container_memory
2122

23+
24+
def _is_https_only():
25+
# determine if HTTPS_ONLY is the configured policy, else use http
26+
hadoop_conf_path = CONF_DIR
27+
http_policy = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), 'yarn.http.policy')
28+
if http_policy == 'HTTPS_ONLY':
29+
return True
30+
return False
31+
32+
2233
def _get_resource_manager(hadoop_conf_path, rm_id=None):
23-
prop_name = 'yarn.resourcemanager.webapp.address'
24-
if rm_id is not None:
25-
rm_webapp_address = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), '%s.%s' % (prop_name, rm_id))
34+
# compose property name based on policy (and rm_id)
35+
if _is_https_only():
36+
prop_name = 'yarn.resourcemanager.webapp.https.address'
2637
else:
27-
rm_webapp_address = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), prop_name)
38+
prop_name = 'yarn.resourcemanager.webapp.address'
39+
40+
# Adjust prop_name if rm_id is set
41+
if rm_id:
42+
prop_name = "{name}.{rm_id}".format(name=prop_name, rm_id=rm_id)
43+
44+
rm_webapp_address = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), prop_name)
2845
if rm_webapp_address is not None:
2946
[host, port] = rm_webapp_address.split(':')
30-
return (host, port)
47+
return host, port
3148
else:
3249
return None
3350

3451

3552
def check_is_active_rm(rm_web_host, rm_web_port):
36-
conn = HTTPConnection(rm_web_host, rm_web_port)
53+
if _is_https_only():
54+
conn = HTTPSConnection(rm_web_host, rm_web_port)
55+
else:
56+
conn = HTTPConnection(rm_web_host, rm_web_port)
3757
try:
3858
conn.request('GET', '/cluster')
3959
except:

yarn_api_client/resource_manager.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
from .constants import YarnApplicationState, FinalApplicationStatus
55
from .errors import IllegalArgumentError
66
from .hadoop_conf import get_resource_manager_host_port,\
7-
check_is_active_rm, _get_maximum_container_memory, CONF_DIR
7+
check_is_active_rm, _get_maximum_container_memory, CONF_DIR, \
8+
_is_https_only
89
from collections import deque
910

11+
1012
class ResourceManager(BaseYarnAPI):
1113
"""
1214
The ResourceManager REST API's allow the user to get information about the
@@ -30,14 +32,15 @@ def __init__(self, address=None, port=8088, alt_address=None, alt_port=8088, tim
3032
if address is None:
3133
self.logger.debug('Get configuration from hadoop conf dir: {conf_dir}'.format(conf_dir=CONF_DIR))
3234
address, port = get_resource_manager_host_port()
35+
is_https = _is_https_only()
3336
else:
37+
is_https = False
3438
if alt_address: # Determine active RM
3539
if not check_is_active_rm(address, port):
3640
# Default is not active, check alternate
3741
if check_is_active_rm(alt_address, alt_port):
3842
address, port = alt_address, alt_port
39-
40-
super(ResourceManager, self).__init__(address, port, timeout, kerberos_enabled)
43+
super(ResourceManager, self).__init__(address, port, timeout, kerberos_enabled, is_https)
4144

4245
def get_active_host_port(self):
4346
"""

0 commit comments

Comments
 (0)