Skip to content

Commit

Permalink
Download rate limit (#404)
Browse files Browse the repository at this point in the history
* download rate limit

* update

* optimize code

* update

* update

* update2

* update ccnet_db

* optimize code

* update download limit

* update

* update threshold

* update

---------

Co-authored-by: 孙永强 <[email protected]>
Co-authored-by: r350178982 <[email protected]>
  • Loading branch information
3 people authored Dec 11, 2024
1 parent e862fcb commit 8b198ed
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 5 deletions.
3 changes: 3 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
METADATA_SERVER_URL = getattr(seahub_settings, 'METADATA_SERVER_URL', '')
ENABLE_METADATA_MANAGEMENT = getattr(seahub_settings, 'ENABLE_METADATA_MANAGEMENT', False)
METADATA_FILE_TYPES = getattr(seahub_settings, 'METADATA_FILE_TYPES', {})
DOWNLOAD_LIMIT_WHEN_THROTTLE = getattr(seahub_settings, 'DOWNLOAD_LIMIT_WHEN_THROTTLE', '1k')
ENABLED_ROLE_PERMISSIONS = getattr(seahub_settings, 'ENABLED_ROLE_PERMISSIONS', {})

except ImportError:
logger.critical("Can not import seahub settings.")
raise RuntimeError("Can not import seahub settings.")
Expand Down
119 changes: 114 additions & 5 deletions statistics/counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
from datetime import datetime
from sqlalchemy import func, select, update, null
from sqlalchemy.sql import text

from .models import FileOpsStat, TotalStorageStat, UserTraffic, SysTraffic,\
MonthlyUserTraffic, MonthlySysTraffic
from seafevents.events.models import FileUpdate
from seafevents.events.models import FileAudit
from seafevents.db import SeafBase, init_db_session_class
from seaserv import seafile_api
from seafevents.utils.seafile_db import SeafileDB
from seafevents.utils.ccnet_db import CcnetDB
from seafevents.utils import get_quota_from_string
from seafevents.app.config import DOWNLOAD_LIMIT_WHEN_THROTTLE, ENABLED_ROLE_PERMISSIONS
from .db import get_org_id

# This is a throwaway variable to deal with a python bug
Expand All @@ -18,6 +24,28 @@
login_records = {}
traffic_info = {}


rate_limit_users = {}
rate_limit_orgs = {}
reset_rate_limit_dates = []

DEFAULT_USER = 'default'
GUEST_USER = 'guest'
MONTHLY_RATE_LIMIT_PER_USER = 'monthly_rate_limit_per_user'
MONTHLY_RATE_LIMIT = 'monthly_rate_limit'


def get_org_user_count(local_traffic_info, date_str):
org_user_dict = {}
for row in local_traffic_info[date_str]:
org_id = row[0]
if org_id > 0 and org_id not in org_user_dict:
with CcnetDB() as ccnet_db:
user_count = ccnet_db.get_org_user_count(org_id)
org_user_dict[org_id] = user_count
return org_user_dict


def update_hash_record(session, login_name, login_time, org_id):
time_str = login_time.strftime('%Y-%m-%d 00:00:00')
time_by_day = datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
Expand All @@ -33,6 +61,23 @@ def save_traffic_info(session, timestamp, user_name, repo_id, oper, size):
traffic_info[time_str][(org_id, user_name, oper)] = size
else:
traffic_info[time_str][(org_id, user_name, oper)] += size

def get_role_download_rate_limit_info():
if not ENABLED_ROLE_PERMISSIONS:
return None
traffic_info_dict = {}
for role, v in ENABLED_ROLE_PERMISSIONS.items():
rate_limit = {}
if MONTHLY_RATE_LIMIT in v:
monthly_rate_limit = get_quota_from_string(v[MONTHLY_RATE_LIMIT])
rate_limit[MONTHLY_RATE_LIMIT] = monthly_rate_limit
if MONTHLY_RATE_LIMIT_PER_USER in v:
monthly_rate_limit_per_user = get_quota_from_string(v[MONTHLY_RATE_LIMIT_PER_USER])
rate_limit[MONTHLY_RATE_LIMIT_PER_USER] = monthly_rate_limit_per_user
traffic_info_dict[role] = rate_limit
return traffic_info_dict



class FileOpsCounter(object):
def __init__(self, config):
Expand Down Expand Up @@ -188,6 +233,7 @@ def start_count(self):
class TrafficInfoCounter(object):
def __init__(self, config):
self.edb_session = init_db_session_class(config)()
self.download_type_list = ['web-file-download', 'link-file-download', 'sync-file-download']

def start_count(self):
time_start = time.time()
Expand Down Expand Up @@ -230,6 +276,14 @@ def update_record(self, local_traffic_info, date, date_str):
org_delta = {}

trans_count = 0
first_day_of_month = datetime(datetime.now().year, datetime.now().month, 1)
traffic_info_dict = None
org_user_count_dict = get_org_user_count(local_traffic_info, date_str)
try:
# list role traffic info
traffic_info_dict = get_role_download_rate_limit_info()
except Exception as e:
logging.warning('Failed get download rate limit info: %s.', e)
# Update UserTraffic
for row in local_traffic_info[date_str]:
trans_count += 1
Expand All @@ -239,12 +293,42 @@ def update_record(self, local_traffic_info, date, date_str):
size = local_traffic_info[date_str][row]
if size == 0:
continue
if (org_id, oper) not in org_delta:
org_delta[(org_id, oper)] = size

traffic_threshold = None
if traffic_info_dict and oper in self.download_type_list:
with CcnetDB() as ccnet_db:
user_role = ccnet_db.get_user_role(user)
role = DEFAULT_USER if (user_role == '' or user_role == DEFAULT_USER) else user_role
traffic_threshold = traffic_info_dict[role].get(MONTHLY_RATE_LIMIT) or None
if org_id > 0:
monthly_rate_limit_per_user = traffic_info_dict[role].get(MONTHLY_RATE_LIMIT_PER_USER) or None
traffic_threshold = monthly_rate_limit_per_user * org_user_count_dict[org_id] if monthly_rate_limit_per_user else None
if (org_id, oper) not in org_delta:
org_delta[(org_id, oper, traffic_threshold)] = size
else:
org_delta[(org_id, oper, traffic_threshold)] += size
else:
org_delta[(org_id, oper)] += size

if (org_id, oper) not in org_delta:
org_delta[(org_id, oper)] = size
else:
org_delta[(org_id, oper)] += size
try:
# Check the download traffic for the current month.
if traffic_threshold and (org_id < 0 and oper in self.download_type_list
and not rate_limit_users.get(user, False)):
stmt2 = select(func.sum(UserTraffic.size).label("size")).where(
UserTraffic.timestamp.between(first_day_of_month, date),
UserTraffic.user == user,
UserTraffic.org_id == org_id,
UserTraffic.op_type.in_(self.download_type_list)
)
user_monthly_traffic_size = self.edb_session.scalars(stmt2).first()
# not org user rate limit
if user_monthly_traffic_size and user_monthly_traffic_size > traffic_threshold:
download_limit_format = get_quota_from_string(DOWNLOAD_LIMIT_WHEN_THROTTLE)
seafile_api.set_user_download_rate_limit(user, download_limit_format)
rate_limit_users[user] = True

stmt = select(UserTraffic.size).where(
UserTraffic.timestamp == date,
UserTraffic.user == user,
Expand Down Expand Up @@ -276,11 +360,27 @@ def update_record(self, local_traffic_info, date, date_str):
oper = row[1]
size = org_delta[row]
try:
# Check org download traffic for current month.
if traffic_info_dict and org_id > 0 and oper in self.download_type_list and not rate_limit_orgs.get(org_id):
traffic_threshold = row[2]
stmt2 = select(func.sum(SysTraffic.size).label("size")).where(
SysTraffic.timestamp.between(first_day_of_month, date),
SysTraffic.org_id == org_id,
SysTraffic.op_type.in_(self.download_type_list)
)
org_monthly_traffic_size = self.edb_session.scalars(stmt2).first()
# org rate limit
if org_monthly_traffic_size and traffic_threshold and org_monthly_traffic_size > traffic_threshold:
download_limit_format = get_quota_from_string(DOWNLOAD_LIMIT_WHEN_THROTTLE)
seafile_api.org_set_download_rate_limit(org_id, download_limit_format)
rate_limit_orgs[org_id] = True

stmt = select(SysTraffic.size).where(
SysTraffic.timestamp == date,
SysTraffic.org_id == org_id,
SysTraffic.op_type == oper).limit(1)
result = self.edb_session.scalars(stmt).first()

if result is not None:
size_in_db = result
stmt = update(SysTraffic).where(SysTraffic.timestamp == date,
Expand All @@ -307,10 +407,19 @@ def start_count(self):
today = dt.date()
delta = timedelta(days=dt.day - 1)
first_day = today - delta

self.user_item_count = 0
self.sys_item_count = 0

# reset rate limit
if today == first_day and first_day not in reset_rate_limit_dates:
if len(reset_rate_limit_dates) > 2:
reset_rate_limit_dates.pop(0)
with SeafileDB() as seafile_db:
seafile_db.reset_download_rate_limit()
reset_rate_limit_dates.append(first_day)
rate_limit_orgs.clear()
rate_limit_users.clear()

try:
# Get raw data from UserTraffic, then update MonthlyUserTraffic and MonthlySysTraffic.
stmt = select(UserTraffic.user, UserTraffic.org_id,
Expand Down
51 changes: 51 additions & 0 deletions utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,3 +230,54 @@ def timestamp_to_isoformat_timestr(timestamp):
except Exception as e:
logger.error(e)
return ''


UNIT_KB = 'kb'
UNIT_MB = 'mb'
UNIT_GB = 'gb'
UNIT_TB = 'tb'
UNIT_PB = 'pb'

UNIT_KIB = 'kib'
UNIT_MIB = 'mib'
UNIT_GIB = 'gib'
UNIT_TIB = 'tib'
UNIT_PIB = 'pib'

def get_file_size_unit(unit_type):
"""
File size unit according to https://en.wikipedia.org/wiki/Kibibyte.
"""
table = {
# decimal
UNIT_KB: 10 ** 3,
UNIT_MB: 10 ** 6,
UNIT_GB: 10 ** 9,
UNIT_TB: 10 ** 12,
UNIT_PB: 10 ** 15,
# binary
UNIT_KIB: 1 << 10,
UNIT_MIB: 1 << 20,
UNIT_GIB: 1 << 30,
UNIT_TIB: 1 << 40,
UNIT_PIB: 1 << 50,
}

unit_type = unit_type.lower()
if unit_type not in list(table.keys()):
raise TypeError('Invalid unit type')

return table.get(unit_type)

def get_quota_from_string(quota_str):
quota_str = quota_str.lower()
if quota_str.endswith('g'):
quota = int(quota_str[:-1]) * get_file_size_unit('gb')
elif quota_str.endswith('m'):
quota = int(quota_str[:-1]) * get_file_size_unit('mb')
elif quota_str.endswith('k'):
quota = int(quota_str[:-1]) * get_file_size_unit('kb')
else:
return None

return quota
20 changes: 20 additions & 0 deletions utils/ccnet_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,23 @@ def get_groups_by_ids(self, group_ids):
groups_map[item[0]] = self.get_group_info(item)

return groups_map

def get_org_user_count(self, org_id):
sql = f"""
SELECT COUNT(1) FROM `{self.db_name}`.`OrgUser` WHERE org_id={org_id}
"""
with self.ccnet_db_cursor as cursor:
cursor.execute(sql)

return cursor.fetchone()[0]

def get_user_role(self, email):
sql = f"""
SELECT role FROM `{self.db_name}`.`UserRole`
WHERE email="{email}"
"""
with self.ccnet_db_cursor as cursor:
cursor.execute(sql)
result = cursor.fetchone()

return result[0] if result else 'default'
11 changes: 11 additions & 0 deletions utils/seafile_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,14 @@ def get_repo_info_by_ids(self, repo_ids):
repos_map[row[0]] = self.repo_info(row)

return repos_map

def reset_download_rate_limit(self):
sql1 = f"""
TRUNCATE TABLE `{self.db_name}`.`UserDownloadRateLimit`;
"""
sql2 = f"""
TRUNCATE TABLE `{self.db_name}`.`OrgDownloadRateLimit`
"""
with self.seafile_db_cursor as cursor:
cursor.execute(sql1)
cursor.execute(sql2)

0 comments on commit 8b198ed

Please sign in to comment.