Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve behavior of HTTP redirects #182

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"files": "package-lock.json|^.secrets.baseline$",
"lines": null
},
"generated_at": "2023-11-10T17:28:14Z",
"generated_at": "2023-11-22T18:03:02Z",
"plugins_used": [
{
"name": "AWSKeyDetector"
Expand Down Expand Up @@ -416,15 +416,15 @@
"hashed_secret": "2863fa4b5510c46afc2bd2998dfbc0cf3d6df032",
"is_secret": false,
"is_verified": false,
"line_number": 528,
"line_number": 527,
"type": "Secret Keyword",
"verified_result": null
},
{
"hashed_secret": "b9cad336062c0dc3bb30145b1a6697fccfe755a6",
"is_secret": false,
"is_verified": false,
"line_number": 589,
"line_number": 588,
"type": "Secret Keyword",
"verified_result": null
}
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ matrix:
- python: 3.10
- python: 3.11

cache: pip3
cache: pip

before_install:
- npm install npm@latest -g
Expand Down
51 changes: 47 additions & 4 deletions ibm_cloud_sdk_core/base_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@
from http.cookiejar import CookieJar
from os.path import basename
from typing import Dict, List, Optional, Tuple, Union
from urllib3.util.retry import Retry
from urllib.parse import urlparse

import requests
from requests.structures import CaseInsensitiveDict
from requests.exceptions import JSONDecodeError
from urllib3.exceptions import MaxRetryError
from urllib3.util.retry import Retry

from ibm_cloud_sdk_core.authenticators import Authenticator
from .api_exception import ApiException
Expand All @@ -52,6 +54,10 @@
logger = logging.getLogger(__name__)


MAX_REDIRECTS = 10
SAFE_HEADERS = ['authorization', 'www-authenticate', 'cookie', 'cookie2']


# pylint: disable=too-many-instance-attributes
# pylint: disable=too-many-locals
class BaseService:
Expand Down Expand Up @@ -96,7 +102,7 @@ def __init__(
service_url: str = None,
authenticator: Authenticator = None,
disable_ssl_verification: bool = False,
enable_gzip_compression: bool = False
enable_gzip_compression: bool = False,
) -> None:
self.set_service_url(service_url)
self.http_client = requests.Session()
Expand Down Expand Up @@ -280,6 +286,7 @@ def set_default_headers(self, headers: Dict[str, str]) -> None:
else:
raise TypeError("headers parameter must be a dictionary")

# pylint: disable=too-many-branches
def send(self, request: requests.Request, **kwargs) -> DetailedResponse:
"""Send a request and wrap the response in a DetailedResponse or APIException.

Expand All @@ -294,7 +301,9 @@ def send(self, request: requests.Request, **kwargs) -> DetailedResponse:
"""
# Use a one minute timeout when our caller doesn't give a timeout.
# http://docs.python-requests.org/en/master/user/quickstart/#timeouts
kwargs = dict({"timeout": 60}, **kwargs)
# We also disable the default redirection, to have more granular control
# over the headers sent in each request.
kwargs = dict({'timeout': 60, 'allow_redirects': False}, **kwargs)
kwargs = dict(kwargs, **self.http_config)

if self.disable_ssl_verification:
Expand All @@ -314,6 +323,40 @@ def send(self, request: requests.Request, **kwargs) -> DetailedResponse:
try:
response = self.http_client.request(**request, cookies=self.jar, **kwargs)

# Handle HTTP redirects.
redirects_count = 0
# Check if the response is a redirect to another host.
while response.is_redirect and response.next is not None:
redirects_count += 1

if redirects_count > MAX_REDIRECTS:
# Raise an error if the maximum number of redirects has been reached.
raise MaxRetryError(
None, response.url, reason=f'reached the maximum number of redirects: {MAX_REDIRECTS}'
)

# The `requests` package has already prepared a request that can almost be used as-is.
next_request = response.next

from_host = urlparse(response.request.url).hostname
to_host = urlparse(next_request.url).hostname
same_host = from_host == to_host
safe_domain = from_host.endswith('.cloud.ibm.com') and to_host.endswith('.cloud.ibm.com')

# If both the original and the redirected URL are under the `.cloud.ibm.com` domain,
# copy the safe headers that are used for authentication purposes,
if same_host or safe_domain:
original_headers = request.get('headers')
for header, value in original_headers.items():
if header.lower() in SAFE_HEADERS:
next_request.headers[header] = value
# otherwise remove them manually, because `urllib3` doesn't strip all of them.
else:
for header in SAFE_HEADERS:
next_request.headers.pop(header, None)

response = self.http_client.send(next_request, **kwargs)

# Process a "success" response.
if 200 <= response.status_code <= 299:
if response.status_code == 204 or request['method'] == 'HEAD':
Expand Down Expand Up @@ -362,7 +405,7 @@ def prepare_request(
params: Optional[dict] = None,
data: Optional[Union[str, dict]] = None,
files: Optional[Union[Dict[str, Tuple[str]], List[Tuple[str, Tuple[str, ...]]]]] = None,
**kwargs
**kwargs,
) -> dict:
"""Build a dict that represents an HTTP service request.

Expand Down
134 changes: 133 additions & 1 deletion test/test_base_service.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# coding=utf-8
# pylint: disable=missing-docstring,protected-access,too-few-public-methods
# pylint: disable=missing-docstring,protected-access,too-few-public-methods,too-many-lines
import gzip
import json
import os
import ssl
import tempfile
import time
from collections import namedtuple
from shutil import copyfile
from typing import Optional
from urllib3.exceptions import ConnectTimeoutError, MaxRetryError
Expand Down Expand Up @@ -788,6 +789,137 @@ def test_retry_config_external():
assert retry_err.value.reason == error


class TestRedirect:
safe_headers = {
'Authorization': 'foo',
'WWW-Authenticate': 'bar',
'Cookie': 'baz',
'Cookie2': 'baz2',
}

url_cloud_1 = 'https://region1.cloud.ibm.com'
url_cloud_2 = 'https://region2.cloud.ibm.com'
url_notcloud_1 = 'https://region1.notcloud.ibm.com'
url_notcloud_2 = 'https://region2.notcloud.ibm.com'

# pylint: disable=too-many-locals
def run_test(self, url_from_base: str, url_to_base: str, safe_headers_included: bool):
paths = [
# 1. port, 1. path, 2. port with path
['', '/', '/'],
[':3000', '/', '/'],
[':3000', '/', ':3333/'],
['', '/a/very/long/path/with?some=query&params=the_end', '/'],
[':3000', '/a/very/long/path/with?some=query&params=the_end', '/'],
[':3000', '/a/very/long/path/with?some=query&params=the_end', '/api/v1'],
[':3000', '/a/very/long/path/with?some=query&params=the_end', ':3000/api/v1'],
]

# Different test cases to make sure different status codes handled correctly.
TestCase = namedtuple(
'TestCase', ['status_1', 'status_2', 'method_1', 'method_2', 'method_expected', 'body_returned']
)
test_matrix = [
TestCase(301, 200, responses.GET, responses.GET, responses.GET, False),
TestCase(301, 200, responses.POST, responses.GET, responses.GET, False),
TestCase(302, 200, responses.GET, responses.GET, responses.GET, False),
TestCase(302, 200, responses.POST, responses.GET, responses.GET, False),
TestCase(303, 200, responses.GET, responses.GET, responses.GET, False),
TestCase(303, 200, responses.POST, responses.GET, responses.GET, False),
TestCase(307, 200, responses.GET, responses.GET, responses.GET, True),
TestCase(307, 200, responses.POST, responses.POST, responses.POST, True),
TestCase(308, 200, responses.GET, responses.GET, responses.GET, True),
TestCase(308, 200, responses.POST, responses.POST, responses.POST, True),
]

for path in paths:
url_from = url_from_base + path[0] + path[1]
url_to = url_to_base + path[2]

for test_case in test_matrix:
# Make sure we start with a clean "env".
responses.reset()

# Add our mock responses.
responses.add(
test_case.method_1,
url_from,
status=test_case.status_1,
adding_headers={'Location': url_to},
body='just about to redirect',
)
responses.add(test_case.method_2, url_to, status=test_case.status_2, body='successfully redirected')

# Create the service, prepare the request and call it.
service = BaseService(service_url=url_from_base + path[0], authenticator=NoAuthAuthenticator())
prepped = service.prepare_request(test_case.method_1, path[1], headers=self.safe_headers)
response = service.send(prepped)
result = response.get_result()

# Check the status code, URL, body and the method of the last request (redirected).
assert result.status_code == test_case.status_2
assert result.url == url_to
assert result.text == 'successfully redirected'
assert result.request.method == test_case.method_expected

# Check each headers based on the kind of the current test.
redirected_request = responses.calls[1].request
for key in self.safe_headers:
if safe_headers_included:
assert key in redirected_request.headers
else:
assert key not in redirected_request.headers

# We don't always want to see a body in the last response.
if not test_case.body_returned:
assert redirected_request.body is None

@responses.activate
def test_redirect_ibm_to_ibm(self):
self.run_test(self.url_cloud_1, self.url_cloud_2, True)

@responses.activate
def test_redirect_not_ibm_to_ibm(self):
self.run_test(self.url_notcloud_1, self.url_cloud_2, False)

@responses.activate
def test_redirect_ibm_to_not_ibm(self):
self.run_test(self.url_cloud_1, self.url_notcloud_2, False)

@responses.activate
def test_redirect_not_ibm_to_not_ibm(self):
self.run_test(self.url_notcloud_1, self.url_notcloud_2, False)

@responses.activate
def test_redirect_ibm_same_host(self):
self.run_test(self.url_cloud_1, self.url_cloud_1, True)

@responses.activate
def test_redirect_not_ibm_same_host(self):
self.run_test(self.url_notcloud_1, self.url_notcloud_1, True)

@responses.activate
def test_redirect_ibm_to_ibm_exhausted(self):
redirects = 11

for i in range(redirects):
responses.add(
responses.GET,
f'https://region{i+1}.cloud.ibm.com/',
status=302,
adding_headers={'Location': f'https://region{i+2}.cloud.ibm.com/'},
body='just about to redirect',
)

service = BaseService(service_url='https://region1.cloud.ibm.com/', authenticator=NoAuthAuthenticator())

with pytest.raises(MaxRetryError) as ex:
prepped = service.prepare_request('GET', '', headers=self.safe_headers)
service.send(prepped)

assert ex.value.reason == 'reached the maximum number of redirects: 10'


@responses.activate
def test_user_agent_header():
service = AnyServiceV1('2018-11-20', authenticator=NoAuthAuthenticator())
Expand Down