Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Different rate limits depending on HTTP method (#5555) #5869

Open
wants to merge 7 commits into
base: develop
Choose a base branch
from
14 changes: 0 additions & 14 deletions deployments/prod/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1476,19 +1476,5 @@ def env() -> Mapping[str, Optional[str]]:

'AZUL_ENABLE_REPLICAS': '1',

# HCA allocates a daily budget for file downloads. To avoid exceeding
# that budget, we limit the download rate as follows:
#
# r = b/d/f/24/60*w
#
# where `r` is the rate limit (downloads/window), `b` is the daily
# download budget (dollars/day), `d` is the download cost (dollars/
# gibibyte/download), `f` is the average file size (gibibytes), and `w`
# is the evaluation window (minutes) (=10). The value for `d` varies by
# region, so a weighted average is calculated based on the observed
# number of daily downloads per region.
#
# 'azul_waf_download_rate_limit': '59/[email protected]'

'AZUL_ENABLE_VERBATIM_RELATIONS': '0',
}
16 changes: 0 additions & 16 deletions environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,22 +942,6 @@ def env() -> Mapping[str, Optional[str]]:
#
'azul_it_flags': None,

# A global rate limit on file downloads across all regions and IP
# addresses, enforced by AWS WAF.
#
# The syntax is `<limit>/<window>@<concurrency>` where `<limit>` is the
# maximum allowed number of download requests made every `<window>`
# seconds, and `<concurrency>` is the expected number of distinct IPs
# making at least one download request during that time. The concurrency
# does not need to be an integer. See
#
# https://docs.aws.amazon.com/waf/latest/developerguide/waf-rule-statement-type-rate-based-high-level-settings.html
#
# for restrictions on the supported values for `<limit>` ("Rate limit")
# and `<window>` ("Evaluation window").
#
'azul_waf_download_rate_limit': None,

# Wether to enable bot control in AWS WAF. Setting this to 1 will enable
# two rules aimed at blocking requests from suspected and verified bots.
# As of January 2024, this will incur monthly cost of $10 per ACL plus
Expand Down
72 changes: 37 additions & 35 deletions src/azul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
)

import attr
import attrs
from furl import (
furl,
)
Expand Down Expand Up @@ -1713,45 +1712,48 @@ def docker_image_gists_path(self) -> Path:

allowed_v4_ips_term = 'allowed_v4_ips'

waf_rate_rule_name = 'RateRule'

waf_rate_alarm_rule_name = 'RateAlarmRule'

waf_rate_rule_period = 300 # seconds; this value is fixed by AWS

waf_rate_rule_retry_after = 30 # seconds

waf_rate_rule_limit = 1000
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
class RateLimit:
name: str
value: int # number of requests per evaluation period
period: int # evaluation window in seconds
retry_after: int # in seconds

@attrs.frozen(auto_attribs=True, kw_only=True)
class FileDownloadLimit:
rate_limit: int
evaluation_window: int
assumed_request_concurrency: float
def __attrs_post_init__(self):
assert 10 <= self.value <= 2_000_000_000, R(
'Rate limit out of range', self)

@classmethod
def parse(cls, s: str) -> Self:
rate, s = s.split('/')
window, concurrency = s.split('@')
return cls(rate_limit=int(rate),
evaluation_window=int(window),
assumed_request_concurrency=float(concurrency))
#: The rate limit per IP before WAF starts rejecting requests
#:
waf_rate_limit = RateLimit(name='rate_limit',
value=1000,
period=5 * 60,
retry_after=30)

@property
def retry_after(self) -> int:
return round(self.evaluation_window /
self.rate_limit *
self.assumed_request_concurrency)
#: The rate limit per IP before a CloudWatch alarm is raised
#:
waf_rate_limit_alarm = attr.evolve(waf_rate_limit,
name='rate_limit_alarm',
value=waf_rate_limit.value * 2)

@property
def waf_file_download_limit(self) -> FileDownloadLimit | None:
value = self.environ.get('azul_waf_download_rate_limit')
if value is None:
return None
else:
return self.FileDownloadLimit.parse(value)
#: The rate limit per IP for requests that trigger a manifest generation
#:
waf_rate_limit_manifests = RateLimit(name='rate_limit_manifests',
value=10,
period=10 * 60,
retry_after=30)

assert 100 <= waf_rate_rule_limit <= 2_000_000_000 # mandated by AWS
#: The rate limit for file download requests
#:
#: We aim for a global limit of 60 file downloads per 10 minutes. Based on
#: an observed average of 2.9 distinct IPs concurrently downloading files
#: in any 10-minute window, the maximum per-IP request rate we can allow is
#: 20/10min, or 10/5min.
#:
waf_rate_limit_files = RateLimit(name='rate_limit_files',
value=10,
period=5 * 60,
retry_after=30)

@property
def waf_bot_control(self) -> bool:
Expand Down
3 changes: 2 additions & 1 deletion src/azul/service/manifest_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -1395,13 +1395,14 @@ def command_lines(cls,
'--location',
'--fail',
]
rate_limit = config.waf_rate_limit
file_options = [
'--fail-early', # Exit curl with error on the first failure encountered
'--continue-at -', # Resume partially downloaded files
# We want curl to make enough retries so that it waits a total of
# one and a half times the evaluation period of the WAF rate rule,
# long enough for the tripped rule to clear.
f'--retry {ceil(config.waf_rate_rule_period * 1.5 / config.waf_rate_rule_retry_after)}',
f'--retry {ceil(rate_limit.period * 1.5 / rate_limit.retry_after)}',
# Curl will respect the 'Retry-After' header if given in a response,
# like the one returned when the WAF rate rule is tripped. Otherwise,
# curl will wait for the number of seconds specified here.
Expand Down
Loading