Skip to content

Commit 5e7334d

Browse files
d-w-moorealanking
authored andcommitted
[#626][#627] client redirection is no longer active by default.
When first implemented, the client redirect feature was on by default, meaning that an open() call on a data object always favored a connection to the server hosting the replica indicated by the resource hierarchy resolution outcome, and then subsequent data movements would involve the new server host. This proved problematic in some scenarios, notably whenever the client was unable to connect using the server hostname attached to the storage resource in question - DNS often being a factor. As of now, therefore, the client redirects will no longer happen without explicit "approval" by way of assigning the value of True to either the open() method's allow_redirect parameter or (more globally) to a new configuration setting, named irods.client_configuration.data_objects.allow_redirect
1 parent bde6ce2 commit 5e7334d

File tree

4 files changed

+164
-91
lines changed

4 files changed

+164
-91
lines changed

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,18 @@ The current value of the setting is global in scope (i.e. applies to all
414414
sessions, whenever created) and is always consulted for the creation of
415415
any data object handle to govern that handle's cleanup behavior.
416416

417+
Also, alternatively, the client may opt into a special "redirect" behavior
418+
in which data objects' open() method forge a connection directly to whichever
419+
iRODS server is found to host the selected replica. Data reads and
420+
writes will therefore happen on that alternate network route, instead of
421+
through the originally-connected server. Though not the client's default
422+
behavior, this approach can turn out to be more efficient, particularly
423+
if several concurrent data uploads ("puts") and downloads ("gets") are
424+
happening which might increase traffic on the client's main communication
425+
route with the server. (See, in [Python iRODS Client Settings File](#python-irods-client-settings-file),
426+
the client configuration setting `data_objects.allow_redirect`, which may be
427+
set to True for the aforementioned opt-in.)
428+
417429
Python iRODS Client Settings File
418430
---------------------------------
419431

@@ -470,6 +482,12 @@ variables serving as overrides:
470482
- Default Value: `False`
471483
- Environment Variable Override: `PYTHON_IRODSCLIENT_CONFIG__DATA_OBJECTS__AUTO_CLOSE`
472484

485+
- Setting: Let a call to data object open() redirect to the server whose storage resource hosts the given object's preferred replica.
486+
- Dotted Name: `data_objects.allow_redirect`
487+
- Type: `bool`
488+
- Default Value: `False`
489+
- Environment Variable Override: `PYTHON_IRODSCLIENT_CONFIG__DATA_OBJECTS__ALLOW_REDIRECT`
490+
473491
- Setting: Number of hours to request for the new password entry's TTL (Time To Live) when auto-renewing PAM-authenticated sessions.
474492
- Dotted Name: `legacy_auth.pam.time_to_live_in_hours`
475493
- Type: `int`

irods/client_configuration/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ def xml_parser_default(self,str_value):
5757
# (irods.client_configuration.data_objects is one such category):
5858

5959
class DataObjects(iRODSConfiguration):
60-
__slots__ = ('auto_close',)
60+
__slots__ = ('auto_close',
61+
'allow_redirect',)
6162

6263
def __init__(self):
6364

@@ -71,6 +72,7 @@ def __init__(self):
7172
# >>> irods.client_configuration.data_objects.auto_close = True
7273

7374
self.auto_close = False
75+
self.allow_redirect = False
7476

7577
# #############################################################################
7678
#

irods/manager/data_object_manager.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ def open(self, path, mode,
422422
auto_close = client_config.getter('data_objects','auto_close'), # The default value will be a lambda returning the
423423
# global setting. Use True or False as an override.
424424
returned_values = None, # Used to update session reference, for forging more conns to same host, in irods.parallel.io_main
425-
allow_redirect = True, # This may be set to False to disallow the client redirect-to-resource.
425+
allow_redirect = client_config.getter('data_objects','allow_redirect'),
426426
**options):
427427
_raw_fd_holder = options.get('_raw_fd_holder',[])
428428
# If no keywords are used that would influence the server as to the choice of a storage resource,
@@ -475,6 +475,9 @@ def make_FileOpenRequest(**extra_opts):
475475

476476
use_get_rescinfo_apis = False
477477

478+
if callable(allow_redirect):
479+
allow_redirect = allow_redirect()
480+
478481
if allow_redirect and conn.server_version >= (4,3,1):
479482
key = 'CREATE' if mode[0] in ('w','a') else 'OPEN'
480483
message = iRODSMessage('RODS_API_REQ',

irods/test/data_obj_test.py

Lines changed: 139 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -834,59 +834,62 @@ def do_test_redirect_in_data_object_put_and_get__issue_452(self, content, data_c
834834
self.skipTest('Expects iRODS server version 4.3.1')
835835
LOCAL_FILE = mktemp()
836836
filename = ''
837-
try:
838-
with self.create_simple_resc(hostname = 'localhost') as rescName:
839-
with NamedTemporaryFile(delete = False) as f:
840-
filename = f.name
841-
f.write(content)
842-
data_ctx['initialize']()
843-
sess = data_ctx['session']
844-
remote_name = data_ctx['path']
845-
PUT_LOG = self.In_Memory_Stream(always_unicode = True)
846-
with helpers.enableLogging(logging.getLogger('irods.manager.data_object_manager'),
847-
logging.StreamHandler, (PUT_LOG,), level_ = logging.DEBUG),\
848-
helpers.enableLogging(logging.getLogger('irods.parallel'),
849-
logging.StreamHandler, (PUT_LOG,), level_ = logging.DEBUG):
850-
sess.data_objects.put(filename, remote_name, **{kw.DEST_RESC_NAME_KW: rescName})
851-
def srch(BUF):
852-
nthr = 0
853-
search_text = BUF.getvalue()
854-
find_iterator = itertools.chain( re.finditer(u'redirect_to_host = (\S+)', search_text),
855-
re.finditer(u'target_host = (\S+)', search_text) )
856-
for match in find_iterator:
857-
nthr += 1
858-
self.assertEqual(match.group(1), u'localhost')
859-
occur_threshold = (1 if len(content) <= 32*MEBI else 2)
860-
self.assertGreaterEqual(nthr, occur_threshold)
861-
srch(PUT_LOG)
862-
generator = None
863-
# Activate a read ticket on a new session if necessary, and attempt a GET
864-
if data_ctx['ticket_access']:
865-
for access in iRODSAccess('own',remote_name,self.sess.username), \
866-
iRODSAccess('null',remote_name,sess.username):
867-
self.sess.acls.set(access, admin = True)
868-
generator = self._data_object_and_associated_ticket(data_name=remote_name, auto_delete_data = False, ticket_access='read')
869-
# Emulate the 'with'-block construction for the read ticket:
870-
data_ctx_get = next(generator)
871-
data_ctx_get['initialize']()
872-
sess = data_ctx_get['session']
873-
GET_LOG = self.In_Memory_Stream(always_unicode = True)
874-
with helpers.enableLogging(logging.getLogger('irods.manager.data_object_manager'),
875-
logging.StreamHandler, (GET_LOG,), level_ = logging.DEBUG),\
876-
helpers.enableLogging(logging.getLogger('irods.parallel'),
877-
logging.StreamHandler, (GET_LOG,), level_ = logging.DEBUG):
878-
sess.data_objects.get(remote_name,LOCAL_FILE)
879-
srch(GET_LOG)
880-
with open(LOCAL_FILE,'rb') as get_result:
881-
self.assertTrue(content, get_result.read())
882-
# Finalize the emulated 'with'-block construction for the read ticket, if active:
883-
del generator
884-
data_ctx['finalize']()
885-
finally:
886-
if os.path.isfile(LOCAL_FILE):
887-
os.unlink(LOCAL_FILE)
888-
if filename:
889-
os.unlink(filename)
837+
with config.loadlines(entries=[dict(setting='data_objects.allow_redirect',value=True)]):
838+
try:
839+
with self.create_simple_resc(hostname = 'localhost') as rescName:
840+
with NamedTemporaryFile(delete = False) as f:
841+
filename = f.name
842+
f.write(content)
843+
data_ctx['initialize']()
844+
sess = data_ctx['session']
845+
remote_name = data_ctx['path']
846+
PUT_LOG = self.In_Memory_Stream(always_unicode = True)
847+
with helpers.enableLogging(logging.getLogger('irods.manager.data_object_manager'),
848+
logging.StreamHandler, (PUT_LOG,), level_ = logging.DEBUG),\
849+
helpers.enableLogging(logging.getLogger('irods.parallel'),
850+
logging.StreamHandler, (PUT_LOG,), level_ = logging.DEBUG):
851+
sess.data_objects.put(filename, remote_name, **{kw.DEST_RESC_NAME_KW: rescName})
852+
# Within a buffer 'BUF' (is expected to be an io.StringIO object) assert the presence of certain
853+
# log text that will indicate a redirection was performed.
854+
def assert_expected_redirection_logging(BUF):
855+
nthr = 0
856+
search_text = BUF.getvalue()
857+
find_iterator = itertools.chain( re.finditer(u'redirect_to_host = (\S+)', search_text),
858+
re.finditer(u'target_host = (\S+)', search_text) )
859+
for match in find_iterator:
860+
nthr += 1
861+
self.assertEqual(match.group(1), u'localhost')
862+
occur_threshold = (1 if len(content) <= 32*MEBI else 2)
863+
self.assertGreaterEqual(nthr, occur_threshold)
864+
assert_expected_redirection_logging(PUT_LOG)
865+
generator = None
866+
# Activate a read ticket on a new session if necessary, and attempt a GET
867+
if data_ctx['ticket_access']:
868+
for access in iRODSAccess('own',remote_name,self.sess.username), \
869+
iRODSAccess('null',remote_name,sess.username):
870+
self.sess.acls.set(access, admin = True)
871+
generator = self._data_object_and_associated_ticket(data_name=remote_name, auto_delete_data = False, ticket_access='read')
872+
# Emulate the 'with'-block construction for the read ticket:
873+
data_ctx_get = next(generator)
874+
data_ctx_get['initialize']()
875+
sess = data_ctx_get['session']
876+
GET_LOG = self.In_Memory_Stream(always_unicode = True)
877+
with helpers.enableLogging(logging.getLogger('irods.manager.data_object_manager'),
878+
logging.StreamHandler, (GET_LOG,), level_ = logging.DEBUG),\
879+
helpers.enableLogging(logging.getLogger('irods.parallel'),
880+
logging.StreamHandler, (GET_LOG,), level_ = logging.DEBUG):
881+
sess.data_objects.get(remote_name,LOCAL_FILE)
882+
assert_expected_redirection_logging(GET_LOG)
883+
with open(LOCAL_FILE,'rb') as get_result:
884+
self.assertTrue(content, get_result.read())
885+
# Finalize the emulated 'with'-block construction for the read ticket, if active:
886+
del generator
887+
data_ctx['finalize']()
888+
finally:
889+
if os.path.isfile(LOCAL_FILE):
890+
os.unlink(LOCAL_FILE)
891+
if filename:
892+
os.unlink(filename)
890893

891894
def test_redirect_in_data_object_open__issue_452(self):
892895
self._skip_unless_connected_to_local_computer_by_other_than_localhost_synonym()
@@ -895,18 +898,19 @@ def test_redirect_in_data_object_open__issue_452(self):
895898
sess = self.sess
896899
home = helpers.home_collection(sess)
897900

898-
with self.create_simple_resc(hostname = 'localhost') as rescName:
899-
try:
900-
test_path = home + '/data_open_452'
901-
desc = sess.data_objects.open(test_path, 'w', **{kw.RESC_NAME_KW: rescName})
902-
self.assertEqual('localhost', desc.raw.session.host)
903-
desc.close()
904-
desc = sess.data_objects.open(test_path, 'r')
905-
self.assertEqual('localhost', desc.raw.session.host)
906-
desc.close()
907-
finally:
908-
if sess.data_objects.exists(test_path):
909-
sess.data_objects.unlink(test_path, force=True)
901+
with config.loadlines(entries=[dict(setting='data_objects.allow_redirect',value=True)]):
902+
with self.create_simple_resc(hostname = 'localhost') as rescName:
903+
try:
904+
test_path = home + '/data_open_452'
905+
desc = sess.data_objects.open(test_path, 'w', **{kw.RESC_NAME_KW: rescName})
906+
self.assertEqual('localhost', desc.raw.session.host)
907+
desc.close()
908+
desc = sess.data_objects.open(test_path, 'r')
909+
self.assertEqual('localhost', desc.raw.session.host)
910+
desc.close()
911+
finally:
912+
if sess.data_objects.exists(test_path):
913+
sess.data_objects.unlink(test_path, force=True)
910914

911915

912916
def test_create_with_checksum(self):
@@ -2174,35 +2178,53 @@ def test_touch_operation_does_not_work_when_given_a_collection__525(self):
21742178
with self.assertRaises(ex.InvalidInputArgument):
21752179
user_session.data_objects.touch(home_collection_path)
21762180

2181+
def assert_redirect_happens_on_open(self, open_opts):
2182+
name = 'redirect_happens_' + unique_name (my_function_name(), datetime.now())
2183+
data_path = '{self.coll_path}/{name}'.format(**locals())
2184+
try:
2185+
PUT_LOG = self.In_Memory_Stream(always_unicode = True)
2186+
with helpers.enableLogging(logging.getLogger('irods.manager.data_object_manager'),
2187+
logging.StreamHandler, (PUT_LOG,), level_ = logging.DEBUG):
2188+
with self.sess.data_objects.open(data_path,'w',**open_opts):
2189+
pass
2190+
log_text = PUT_LOG.getvalue()
2191+
self.assertIn('redirect_to_host',log_text)
2192+
finally:
2193+
if self.sess.data_objects.exists(data_path):
2194+
self.sess.data_objects.unlink(data_path, force = True)
2195+
21772196
@unittest.skipIf(six.PY2, "Python2 won't destruct an out-of-scope iRODSSession due to lazy GC ref-cycle detection.")
21782197
def test_client_redirect_lets_go_of_connections__issue_562(self):
21792198
self._skip_unless_connected_to_local_computer_by_other_than_localhost_synonym()
21802199
# Force data object connections to redirect by enforcing a non-equivalent hostname for their resource
21812200
total_conns = lambda session: len(session.pool.idle | session.pool.active)
2182-
with self.create_simple_resc(hostname = 'localhost') as resc_name:
2183-
# A reasonable number of data objects to create without eliciting problems.
2184-
# (But before resolution of #562, a NetworkException was eventually thrown from
2185-
# this test loop if a session cleanup() did not intervene between open() calls.)
2186-
REPS_TO_REPRODUCE_CONNECT_ERROR = 100
2187-
paths=[]
2188-
prev_conns = None
2189-
try:
2190-
# Try to exhaust connections
2191-
for n in range(REPS_TO_REPRODUCE_CONNECT_ERROR):
2192-
data_path = '{self.coll_path}/issue_562_test_obj_{n:03d}.dat'.format(**locals())
2193-
paths.append(data_path)
2194-
with self.sess.data_objects.open(data_path, 'w', **{kw.DEST_RESC_NAME_KW: resc_name}) as f:
2195-
pass
2196-
# Assert number of connections does not increase
2197-
current_conns = total_conns(self.sess)
2198-
if isinstance(prev_conns,int):
2199-
self.assertLessEqual(current_conns, prev_conns)
2200-
prev_conns = current_conns
2201-
finally:
2202-
# Clean up data objects before resource is deleted.
2203-
for data_path in paths:
2204-
if self.sess.data_objects.exists(data_path):
2205-
self.sess.data_objects.unlink(data_path, force = True)
2201+
with config.loadlines(entries=[dict(setting='data_objects.allow_redirect',value=True)]):
2202+
with self.create_simple_resc(hostname = 'localhost') as resc_name:
2203+
self.assert_redirect_happens_on_open( {kw.DEST_RESC_NAME_KW: resc_name} )
2204+
# A reasonable number of data objects to create without eliciting problems.
2205+
# (But before resolution of #562, a NetworkException was eventually thrown from
2206+
# this test loop if a session cleanup() did not intervene between open() calls.)
2207+
REPS_TO_REPRODUCE_CONNECT_ERROR = 100
2208+
paths=[]
2209+
prev_conns = None
2210+
try:
2211+
# Try to exhaust connections
2212+
for n in range(REPS_TO_REPRODUCE_CONNECT_ERROR):
2213+
data_path = '{self.coll_path}/issue_562_test_obj_{n:03d}.dat'.format(**locals())
2214+
paths.append(data_path)
2215+
with self.sess.data_objects.open(data_path, 'w', **{kw.DEST_RESC_NAME_KW: resc_name}) as f:
2216+
pass
2217+
# Assert number of connections does not increase
2218+
current_conns = total_conns(self.sess)
2219+
if isinstance(prev_conns,int):
2220+
self.assertLessEqual(current_conns, prev_conns)
2221+
prev_conns = current_conns
2222+
finally:
2223+
# Clean up data objects before resource is deleted.
2224+
for data_path in paths:
2225+
if self.sess.data_objects.exists(data_path):
2226+
self.sess.data_objects.unlink(data_path, force = True)
2227+
22062228

22072229
@unittest.skipIf(progressbar is None, "progressbar is not installed")
22082230
def test_progressbar_style_of_pbar_without_registering__issue_574(self):
@@ -2397,6 +2419,34 @@ def test_replica_truncate_related_errors__issue_534(self):
23972419
if data_objs.exists(data_path):
23982420
data_objs.unlink(data_path, force = True)
23992421

2422+
def test_allow_redirect_configuration_setting__issue_627(self):
2423+
2424+
self._skip_unless_connected_to_local_computer_by_other_than_localhost_synonym()
2425+
2426+
logical_paths = ['{}/issue_627_{}_{}'.format(self.coll_path, n, unique_name(my_function_name(), datetime.now())) for n in range(2)]
2427+
2428+
with self.create_simple_resc(hostname = 'localhost') as newResc1,\
2429+
self.create_simple_resc() as newResc2:
2430+
2431+
if self.sess.resources.get(newResc1).location == self.sess.resources.get(newResc2).location:
2432+
self.skipTest('test runs only if host locations differ between experimental and control resource')
2433+
2434+
for use_redirect in (True,False):
2435+
2436+
with config.loadlines(entries = [dict(setting = 'data_objects.allow_redirect',value = use_redirect)]):
2437+
2438+
try:
2439+
with self.sess.data_objects.open(logical_paths[0], 'w', **{kw.RESC_NAME_KW:newResc1}) as d1,\
2440+
self.sess.data_objects.open(logical_paths[1], 'w', **{kw.RESC_NAME_KW:newResc2}) as d2:
2441+
2442+
hostname_inequality_relation = (d1.raw.session.host != d2.raw.session.host)
2443+
self.assertEqual(use_redirect, hostname_inequality_relation)
2444+
finally:
2445+
for path in logical_paths:
2446+
if self.sess.data_objects.exists(path):
2447+
self.sess.data_objects.unlink(path, force = True)
2448+
2449+
24002450
def test_replica_truncate__issue_534(self):
24012451
sess = self.sess
24022452
data_objs = self.sess.data_objects

0 commit comments

Comments
 (0)