diff --git a/content_scanner/models.py b/content_scanner/models.py index e7065077..d0a36745 100644 --- a/content_scanner/models.py +++ b/content_scanner/models.py @@ -6,7 +6,7 @@ class ContentScanRecord(Base): - __tablename__ = 'ContentScanRecord' + __tablename__ = 'CONTENTSCANRECORD' id = mapped_column(Integer, primary_key=True, autoincrement=True) repo_id = mapped_column(String(length=36), nullable=False, index=True) @@ -21,7 +21,7 @@ def __init__(self, repo_id, commit_id, timestamp): class ContentScanResult(Base): - __tablename__ = 'ContentScanResult' + __tablename__ = 'CONTENTSCANRESULT' id = mapped_column(Integer, primary_key=True, autoincrement=True) repo_id = mapped_column(String(length=36), nullable=False, index=True) diff --git a/db.py b/db.py index 2e42ae52..b9e135a5 100644 --- a/db.py +++ b/db.py @@ -81,6 +81,23 @@ def create_engine_from_conf(config, db='seafevent'): db_url = "oracle://%s:%s@%s:%s/%s" % (username, quote_plus(passwd), host, port, service_name) + elif backend == 'dm': + if config.has_option(db_sec, 'host'): + host = config.get(db_sec, 'host').lower() + else: + host = 'localhost' + + if config.has_option(db_sec, 'port'): + port = config.getint(db_sec, 'port') + else: + port = 5236 + username = config.get(db_sec, user) + passwd = config.get(db_sec, 'password') + service_name = config.get(db_sec, db_name) + if db == 'seafile': + service_name = config.get(db_sec, user) + + db_url = "dm+dmPython://%s:%s@%s:%s/?schema=%s" % (username, quote_plus(passwd), host, port, service_name) else: logger.error("Unknown database backend: %s" % backend) raise RuntimeError("Unknown database backend: %s" % backend) @@ -104,7 +121,7 @@ def init_db_session_class(config, db='seafevent'): try: engine = create_engine_from_conf(config, db) except (configparser.NoOptionError, configparser.NoSectionError) as e: - logger.error(e) + logger.exception(e) raise RuntimeError("create db engine error: %s" % e) Session = sessionmaker(bind=engine) @@ -134,6 +151,7 @@ def prepare_db_tables(seafile_config): logger.error(e) raise RuntimeError("create db engine error: %s" % e) + # SeafBase.prepare(autoload_with=engine, schema='SYSDBA') SeafBase.prepare(autoload_with=engine) diff --git a/events/db.py b/events/db.py index da6a0b9b..ac80cc9b 100644 --- a/events/db.py +++ b/events/db.py @@ -61,7 +61,7 @@ def _get_user_activities(session, username, start, limit): if limit <= 0: logger.error('limit must be positive') raise RuntimeError('limit must be positive') - + sub_query = ( select(UserActivity.activity_id) .where(UserActivity.username == username) @@ -85,7 +85,7 @@ def _get_user_activities_by_op_user(session, username, op_user, start, limit): if limit <= 0: logger.error('limit must be positive') raise RuntimeError('limit must be positive') - + sub_query = ( select(UserActivity.activity_id) .where(UserActivity.username == username) @@ -199,9 +199,10 @@ def get_file_history_by_day(session, repo_id, path, start, limit, to_tz, history repo_id_path_md5 = hashlib.md5((repo_id + path).encode('utf8')).hexdigest() current_item = session.scalars(select(FileHistory).where(FileHistory.repo_id_path_md5 == repo_id_path_md5). order_by(desc(FileHistory.id)).limit(1)).first() - + new_events = [] if current_item: + # convert_tz 需要改,但是接口是sdoc的接口,需要配好sdoc后再测试 query_stmt = select( FileHistory.id, FileHistory.op_type, FileHistory.op_user, FileHistory.timestamp, FileHistory.repo_id, FileHistory.commit_id, FileHistory.file_id, FileHistory.file_uuid, FileHistory.path, FileHistory.repo_id_path_md5, FileHistory.size, FileHistory.old_path, @@ -244,8 +245,9 @@ def get_file_daily_history_detail(session, repo_id, path, start_time, end_time, repo_id_path_md5 = hashlib.md5((repo_id + path).encode('utf8')).hexdigest() current_item = session.scalars(select(FileHistory).where(FileHistory.repo_id_path_md5 == repo_id_path_md5). order_by(desc(FileHistory.id)).limit(1)).first() - + details = list() + # convert_tz 需要改,但是接口是sdoc的接口,需要配好sdoc后再测试 try: q = select(FileHistory.id, FileHistory.op_type, FileHistory.op_user, FileHistory.timestamp, FileHistory.repo_id, FileHistory.commit_id, FileHistory.file_id, FileHistory.file_uuid, FileHistory.path, FileHistory.repo_id_path_md5, FileHistory.size, FileHistory.old_path, @@ -271,8 +273,20 @@ def save_user_activity(session, record): session.commit() def save_repo_trash(session, record): - repo_trash = FileTrash(record) - session.add(repo_trash) + user = record['op_user'] + obj_type = record['obj_type'] + obj_id = record.get('obj_id', "") + obj_name = record['obj_name'] + delete_time = record['timestamp'] + repo_id = record['repo_id'] + path = record['path'] + commit_id = record.get('commit_id', None) + size = record.get('size', 0) + + sql = f"""INSERT INTO FileTrash ("user", obj_type, obj_id, obj_name, delete_time, repo_id, commit_id, path, "size") + VALUES ('{user}', '{obj_type}', '{obj_id}', '{obj_name}', '{delete_time}', '{repo_id}', '{commit_id}', '{path}', {size})""" + + session.execute(text(sql)) session.commit() def restore_repo_trash(session, record): @@ -291,7 +305,7 @@ def clean_up_repo_trash(session, repo_id, keep_days): stmt = delete(FileTrash).where(FileTrash.repo_id == repo_id, FileTrash.delete_time < _timestamp) session.execute(stmt) session.commit() - + def clean_up_all_repo_trash(session, keep_days): if keep_days == 0: stmt = delete(FileTrash) @@ -314,10 +328,12 @@ def update_user_activity_timestamp(session, activity_id, record): session.commit() def update_file_history_record(session, history_id, record): - stmt = update(FileHistory).where(FileHistory.id == history_id).\ - values(timestamp=record["timestamp"], file_id=record["obj_id"], - commit_id=record["commit_id"], size=record["size"]) - session.execute(stmt) + timestamp = record["timestamp"] + file_id = record["obj_id"] + commit_id = record["commit_id"] + size = record["size"] + sql = f"""UPDATE FileHistory SET timestamp='{timestamp}', file_id='{file_id}', commit_id='{commit_id}', "size"={size} WHERE id='{history_id}'""" + session.execute(text(sql)) session.commit() def query_prev_record(session, record): @@ -366,8 +382,22 @@ def save_filehistory(session, fh_threshold, record): file_uuid = uuid.uuid4().__str__() record['file_uuid'] = file_uuid - filehistory = FileHistory(record) - session.add(filehistory) + op_type = record['op_type'] + op_user = record['op_user'] + timestamp = record['timestamp'] + repo_id = record['repo_id'] + commit_id = record.get('commit_id', '') + file_id = record.get('obj_id') + file_uuid = record.get('file_uuid') + path = record['path'] + repo_id_path_md5 = hashlib.md5((repo_id + path).encode('utf8')).hexdigest() + size = record.get('size') + old_path = record.get('old_path', '') + + sql = f"""INSERT INTO FileHistory (op_type, op_user, timestamp, repo_id, commit_id, file_id, file_uuid, path, repo_id_path_md5, "size", old_path) VALUES + ('{op_type}', '{op_user}', '{timestamp}', '{repo_id}', '{commit_id}', '{file_id}', '{file_uuid}', '{path}', '{repo_id_path_md5}', {size}, '{old_path}')""" + + session.execute(text(sql)) session.commit() @@ -376,10 +406,12 @@ def save_file_update_event(session, timestamp, user, org_id, repo_id, if timestamp is None: timestamp = datetime.datetime.utcnow() - event = FileUpdate(timestamp, user, org_id, repo_id, commit_id, file_oper) - session.add(event) + sql = f"""INSERT INTO FileUpdate (timestamp, "user", org_id, repo_id, commit_id, file_oper) VALUES + ('{timestamp}', '{user}', {org_id}, '{repo_id}', '{commit_id}', '{file_oper}')""" + session.execute(text(sql)) session.commit() + def get_events(session, obj, username, org_id, repo_id, file_path, start, limit): if start < 0: logger.error('start must be non-negative') @@ -428,10 +460,10 @@ def save_file_audit_event(session, timestamp, etype, user, ip, device, if timestamp is None: timestamp = datetime.datetime.utcnow() - file_audit = FileAudit(timestamp, etype, user, ip, device, org_id, - repo_id, file_path) - - session.add(file_audit) + sql = f"""INSERT INTO FileAudit (timestamp, etype, "user", ip, device, org_id, + repo_id, file_path) VALUES + ('{timestamp}', '{etype}', '{user}', '{ip}', '{device}', {org_id}, '{repo_id}', '{file_path}')""" + session.execute(text(sql)) session.commit() def save_perm_audit_event(session, timestamp, etype, from_user, to, @@ -439,10 +471,10 @@ def save_perm_audit_event(session, timestamp, etype, from_user, to, if timestamp is None: timestamp = datetime.datetime.utcnow() - perm_audit = PermAudit(timestamp, etype, from_user, to, org_id, - repo_id, file_path, perm) - - session.add(perm_audit) + sql = f"""INSERT INTO PermAudit (timestamp, etype, from_user, "to", org_id, + repo_id, file_path, permission) VALUES + ('{timestamp}', '{etype}', '{from_user}', '{to}', {org_id}, '{repo_id}', '{file_path}', '{perm}')""" + session.execute(text(sql)) session.commit() def get_perm_audit_events(session, from_user, org_id, repo_id, start, limit): diff --git a/events/models.py b/events/models.py index 35815192..3d096ea2 100644 --- a/events/models.py +++ b/events/models.py @@ -12,7 +12,7 @@ class Activity(Base): """ """ - __tablename__ = 'Activity' + __tablename__ = 'ACTIVITY' id = mapped_column(BigInteger, primary_key=True, autoincrement=True) op_type = mapped_column(String(length=128), nullable=False) @@ -51,7 +51,7 @@ def __str__(self): class UserActivity(Base): """ """ - __tablename__ = 'UserActivity' + __tablename__ = 'USERACTIVITY' id = mapped_column(BigInteger, primary_key=True, autoincrement=True) username = mapped_column(String(length=255), nullable=False) @@ -73,7 +73,7 @@ def __str__(self): class FileHistory(Base): - __tablename__ = 'FileHistory' + __tablename__ = 'FILEHISTORY' id = mapped_column(BigInteger, primary_key=True, autoincrement=True) op_type = mapped_column(String(length=128), nullable=False) @@ -105,7 +105,7 @@ def __init__(self, record): class FileAudit(Base): - __tablename__ = 'FileAudit' + __tablename__ = 'FILEAUDIT' eid = mapped_column(BigInteger, primary_key=True, autoincrement=True) timestamp = mapped_column(DateTime, nullable=False, index=True) @@ -149,7 +149,7 @@ def __str__(self): class FileUpdate(Base): - __tablename__ = 'FileUpdate' + __tablename__ = 'FILEUPDATE' eid = mapped_column(BigInteger, primary_key=True, autoincrement=True) timestamp = mapped_column(DateTime, nullable=False, index=True) @@ -186,7 +186,7 @@ def __str__(self): class PermAudit(Base): - __tablename__ = 'PermAudit' + __tablename__ = 'PERMAUDIT' eid = mapped_column(BigInteger, primary_key=True, autoincrement=True) timestamp = mapped_column(DateTime, nullable=False) @@ -246,7 +246,7 @@ def __init__(self, login_date, username, login_ip, login_success): self.login_success = login_success class FileTrash(Base): - __tablename__ = 'FileTrash' + __tablename__ = 'FILETRASH' id = mapped_column(Integer, primary_key=True, autoincrement=True) user = mapped_column(String(length=255), nullable=False) diff --git a/main.py b/main.py index 40ecd073..17c40aad 100644 --- a/main.py +++ b/main.py @@ -37,7 +37,7 @@ def main(background_tasks_only=False): seafile_config = get_config(seafile_conf_path) config = get_config(args.config_file) try: - create_db_tables(config) + # create_db_tables(config) prepare_db_tables(seafile_config) except Exception as e: logging.error('Failed create tables, error: %s' % e) diff --git a/repo_data/db.py b/repo_data/db.py index 2c766c4d..1ddec672 100644 --- a/repo_data/db.py +++ b/repo_data/db.py @@ -36,6 +36,20 @@ def create_engine_from_conf(config_file): db_url = "mysql+pymysql://%s:%s@%s:%s/%s?charset=utf8" % \ (db_username, quote_plus(db_passwd), db_server, db_port, db_name) + + elif backend == 'dm': + db_server = 'localhost' + db_port = 5236 + + if seaf_conf.has_option('database', 'host'): + db_server = seaf_conf.get('database', 'host') + if seaf_conf.has_option('database', 'port'): + db_port = seaf_conf.getint('database', 'port') + db_username = seaf_conf.get('database', 'user') + db_passwd = seaf_conf.get('database', 'password') + db_name = seaf_conf.get('database', 'user') + db_url = "dm+dmPython://%s:%s@%s:%s?schema=%s" % (db_username, quote_plus(db_passwd), + db_server, db_port, db_name) else: logger.critical("Unknown Database backend: %s" % backend) raise RuntimeError("Unknown Database backend: %s" % backend) @@ -57,7 +71,7 @@ def init_db_session_class(config_file): except (configparser.NoOptionError, configparser.NoSectionError) as e: logger.error(e) raise RuntimeError("invalid config file %s", config_file) - + Session = sessionmaker(bind=engine) return Session @@ -74,6 +88,6 @@ def ping_connection(dbapi_connection, connection_record, connection_proxy): # py except: logger.info('fail to ping database server, disposing all cached connections') connection_proxy._pool.dispose() # pylint: disable=protected-access - + # Raise DisconnectionError so the pool would create a new connection raise DisconnectionError() diff --git a/statistics/counter.py b/statistics/counter.py index 61b9853b..29c7e47c 100644 --- a/statistics/counter.py +++ b/statistics/counter.py @@ -61,7 +61,7 @@ def save_traffic_info(session, timestamp, user_name, repo_id, oper, size): traffic_info[time_str][(org_id, user_name, oper)] = size else: traffic_info[time_str][(org_id, user_name, oper)] += size - + def get_role_download_rate_limit_info(): if not ENABLED_ROLE_PERMISSIONS: return None @@ -76,8 +76,8 @@ def get_role_download_rate_limit_info(): rate_limit[MONTHLY_RATE_LIMIT_PER_USER] = monthly_rate_limit_per_user traffic_info_dict[role] = rate_limit return traffic_info_dict - - + + class FileOpsCounter(object): def __init__(self, config): @@ -151,20 +151,32 @@ def start_count(self): return for k, v in org_added.items(): - new_record = FileOpsStat(k, s_timestamp, 'Added', v) - self.edb_session.add(new_record) + sql = f"""INSERT INTO FileOpsStat (timestamp, op_type, "number", org_id) VALUES + ('{s_timestamp}', 'Added', '{v}', '{k}')""" + + self.edb_session.execute(text(sql)) + self.edb_session.commit() for k, v in org_deleted.items(): - new_record = FileOpsStat(k, s_timestamp, 'Deleted', v) - self.edb_session.add(new_record) + sql = f"""INSERT INTO FileOpsStat (timestamp, op_type, "number", org_id) VALUES + ('{s_timestamp}', 'Deleted', '{v}', '{k}')""" + + self.edb_session.execute(text(sql)) + self.edb_session.commit() for k, v in org_visited.items(): - new_record = FileOpsStat(k, s_timestamp, 'Visited', v) - self.edb_session.add(new_record) + sql = f"""INSERT INTO FileOpsStat (timestamp, op_type, "number", org_id) VALUES + ('{s_timestamp}', 'Visited', '{v}', '{k}')""" + + self.edb_session.execute(text(sql)) + self.edb_session.commit() for k, v in org_modified.items(): - new_record = FileOpsStat(k, s_timestamp, 'Modified', v) - self.edb_session.add(new_record) + sql = f"""INSERT INTO FileOpsStat (timestamp, op_type, "number", org_id) VALUES + ('{s_timestamp}', 'Modified', '{v}', '{k}')""" + + self.edb_session.execute(text(sql)) + self.edb_session.commit() logging.info('[FileOpsCounter] Finish counting file operations in %s seconds, %d added, %d deleted, %d visited,' ' %d modified', @@ -182,11 +194,11 @@ def start_count(self): logging.info('Start counting total storage..') time_start = time.time() try: - RepoSize = SeafBase.classes.RepoSize - VirtualRepo = SeafBase.classes.VirtualRepo - OrgRepo = SeafBase.classes.OrgRepo + RepoSize = SeafBase.classes.reposize + VirtualRepo = SeafBase.classes.virtualrepo + OrgRepo = SeafBase.classes.orgrepo - stmt = select(func.sum(RepoSize.size).label("size"), OrgRepo.org_id).outerjoin( + stmt = select(func.sum(RepoSize.SIZE).label("size"), OrgRepo.org_id).outerjoin( VirtualRepo, RepoSize.repo_id == VirtualRepo.repo_id).outerjoin( OrgRepo, RepoSize.repo_id == OrgRepo.repo_id).where( VirtualRepo.repo_id == null()).group_by(OrgRepo.org_id) @@ -293,7 +305,7 @@ def update_record(self, local_traffic_info, date, date_str): size = local_traffic_info[date_str][row] if size == 0: continue - + traffic_threshold = None if traffic_info_dict and oper in self.download_type_list: with CcnetDB() as ccnet_db: diff --git a/statistics/db.py b/statistics/db.py index 0135e19a..52038d12 100644 --- a/statistics/db.py +++ b/statistics/db.py @@ -1,5 +1,5 @@ import logging -from sqlalchemy import desc, func, distinct, select +from sqlalchemy import desc, func, distinct, select, text from datetime import datetime from .models import UserActivityStat, UserTraffic, SysTraffic, \ @@ -10,6 +10,22 @@ repo_org = {} is_org = -1 +import pytz +from seafevents.app.config import TIME_ZONE + +def convert_timezone(dt, from_tz, to_tz): + if not isinstance(dt, datetime): + raise TypeError('Expected a datetime object') + + if from_tz is None: + from_tz = pytz.timezone('UTC') + if to_tz is None: + to_tz = pytz.timezone('UTC') + + aware_datetime = from_tz.normalize(dt.astimezone(pytz.UTC)) + return aware_datetime.astimezone(to_tz) + + def get_org_id(repo_id): global is_org if is_org == -1: @@ -41,12 +57,18 @@ def get_user_activity_stats_by_day(session, start, end, offset='+00:00'): # offset is not supported for now offset='+00:00' - stmt = select(func.date(func.convert_tz(UserActivityStat.timestamp, '+00:00', offset)).label("timestamp"), + # stmt = select(func.date(func.convert_tz(UserActivityStat.timestamp, '+00:00', offset)).label("timestamp"), + # func.count(distinct(UserActivityStat.username)).label("number")).where( + # UserActivityStat.timestamp.between( + # func.convert_tz(start_at_0, offset, '+00:00'), + # func.convert_tz(end_at_23, offset, '+00:00'))).group_by( + # func.date(func.convert_tz(UserActivityStat.timestamp, '+00:00', offset))).order_by("timestamp") + + stmt = select(func.TO_DATE(UserActivityStat.timestamp).label("timestamp"), func.count(distinct(UserActivityStat.username)).label("number")).where( - UserActivityStat.timestamp.between( - func.convert_tz(start_at_0, offset, '+00:00'), - func.convert_tz(end_at_23, offset, '+00:00'))).group_by( - func.date(func.convert_tz(UserActivityStat.timestamp, '+00:00', offset))).order_by("timestamp") + UserActivityStat.timestamp.between(start_at_0, end_at_23)).group_by( + func.TO_DATE(UserActivityStat.timestamp)).order_by("timestamp") + rows = session.execute(stmt).all() ret = [] @@ -79,20 +101,31 @@ def get_org_user_activity_stats_by_day(session, org_id, start, end): def _get_total_storage_stats(session, start, end, offset='+00:00', org_id=0): ret = [] + + off_hour = int(offset[0:3]) try: - stmt = select(func.convert_tz(TotalStorageStat.timestamp, '+00:00', offset).label("timestamp"), - func.sum(TotalStorageStat.total_size).label("total_size")) - if org_id == 0: - stmt = stmt.where(TotalStorageStat.timestamp.between( - func.convert_tz(start, offset, '+00:00'), - func.convert_tz(end, offset, '+00:00'))) - else: - stmt = stmt.where(TotalStorageStat.timestamp.between( - func.convert_tz(start, offset, '+00:00'), - func.convert_tz(end, offset, '+00:00')), - TotalStorageStat.org_id == org_id) - stmt = stmt.group_by("timestamp").order_by("timestamp") - rows = session.execute(stmt).all() + # stmt = select(func.convert_tz(TotalStorageStat.timestamp, '+00:00', offset).label("timestamp"), + # func.sum(TotalStorageStat.total_size).label("total_size")) + # if org_id == 0: + # stmt = stmt.where(TotalStorageStat.timestamp.between( + # func.convert_tz(start, offset, '+00:00'), + # func.convert_tz(end, offset, '+00:00'))) + # else: + # stmt = stmt.where(TotalStorageStat.timestamp.between( + # func.convert_tz(start, offset, '+00:00'), + # func.convert_tz(end, offset, '+00:00')), + # TotalStorageStat.org_id == org_id) + # stmt = stmt.group_by("timestamp").order_by("timestamp") + + sql = f"""SELECT DATEADD(HH, {off_hour}, timestamp) as timestamp, sum(total_size) as total_size FROM TotalStorageStat + WHERE timestamp between DATEADD(HH, {-off_hour}, '{start}') AND DATEADD(HH, {-off_hour}, '{end}') + """ + + if org_id != 0: + sql += f" AND org_id={org_id}" + + sql += "GROUP BY timestamp ORDER BY timestamp" + rows = session.execute(text(sql)).all() for row in rows: ret.append((row[0], row[1])) @@ -156,27 +189,127 @@ def get_org_storage_stats_by_day(session, org_id, start, end, offset='+00:00'): return ret + def get_file_ops_stats_by_day(session, start, end, offset='+00:00'): start_str = start.strftime('%Y-%m-%d 00:00:00') end_str = end.strftime('%Y-%m-%d 23:59:59') start_at_0 = datetime.strptime(start_str, '%Y-%m-%d %H:%M:%S') end_at_23 = datetime.strptime(end_str, '%Y-%m-%d %H:%M:%S') - stmt = select(func.date(func.convert_tz(FileOpsStat.timestamp, '+00:00', offset)).label("timestamp"), - func.sum(FileOpsStat.number).label("number"), - FileOpsStat.op_type).where(FileOpsStat.timestamp.between( - func.convert_tz(start_at_0, offset, '+00:00'), - func.convert_tz(end_at_23, offset, '+00:00'))).group_by( - func.date(func.convert_tz(FileOpsStat.timestamp, '+00:00', offset)), - FileOpsStat.op_type).order_by("timestamp") - - rows = session.execute(stmt).all() + # # 不支持 func.convert_tz 这种语法,需要调整 + # stmt = select(func.date(func.convert_tz(FileOpsStat.timestamp, '+00:00', offset)).label("timestamp"), + # func.sum(FileOpsStat.number).label("number"), + # FileOpsStat.op_type).where(FileOpsStat.timestamp.between( + # func.convert_tz(start_at_0, offset, '+00:00'), + # func.convert_tz(end_at_23, offset, '+00:00'))).group_by( + # func.date(func.convert_tz(FileOpsStat.timestamp, '+00:00', offset)), + # FileOpsStat.op_type).order_by("timestamp") + # + + # 按下面的方式改就不需要在Python中进行时区转化了 + # from_tz = pytz.timezone(TIME_ZONE) + # end_at_23 = convert_timezone(end_at_23, from_tz=from_tz, to_tz=pytz.UTC) + # start_at_0 = convert_timezone(start_at_0, from_tz=from_tz, to_tz=pytz.UTC) + # off_hour = int(offset[0:3]) + # sql = f"""SELECT TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS') as timestamp, sum("number") as "number", op_type + # FROM FileOpsStat WHERE timestamp between '{start_at_0}' AND '{end_at_23}' + # GROUP BY YEAR(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + # MONTH(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + # DAY(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), op_type """ + + off_hour = int(offset[0:3]) + sql = f"""SELECT TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS') as timestamp, sum("number") as "number", op_type + FROM FileOpsStat WHERE timestamp between DATEADD(HH, {-off_hour}, '{start_at_0}') AND DATEADD(HH, {-off_hour}, '{end_at_23}') + GROUP BY YEAR(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + MONTH(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + DAY(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), op_type """ + + rows = session.execute(text(sql)).fetchall() ret = [] - for row in rows: - ret.append((datetime.strptime(str(row[0]), '%Y-%m-%d'), row[2], int(row[1]))) + ret.append((datetime.strptime(str(datetime.date(row[0])), '%Y-%m-%d'), row[2], int(row[1]))) + return ret +# def get_file_ops_stats_by_day(session, start, end, offset='+00:00'): +# start_str = start.strftime('%Y-%m-%d 00:00:00') +# end_str = end.strftime('%Y-%m-%d 23:59:59') +# start_at_0 = datetime.strptime(start_str, '%Y-%m-%d %H:%M:%S') +# end_at_23 = datetime.strptime(end_str, '%Y-%m-%d %H:%M:%S') +# +# from_tz = pytz.timezone(TIME_ZONE) +# end_at_23 = convert_timezone(end_at_23, from_tz=from_tz, to_tz=pytz.UTC) +# start_at_0 = convert_timezone(start_at_0, from_tz=from_tz, to_tz=pytz.UTC) +# +# # # 不支持 func.convert_tz 这种语法,需要调整 +# # stmt = select(func.date(func.convert_tz(FileOpsStat.timestamp, '+00:00', offset)).label("timestamp"), +# # func.sum(FileOpsStat.number).label("number"), +# # FileOpsStat.op_type).where(FileOpsStat.timestamp.between( +# # func.convert_tz(start_at_0, offset, '+00:00'), +# # func.convert_tz(end_at_23, offset, '+00:00'))).group_by( +# # func.date(func.convert_tz(FileOpsStat.timestamp, '+00:00', offset)), +# # FileOpsStat.op_type).order_by("timestamp") +# +# # 它这里分组的时候实际上不需要转换时区,转换和不转换都是一样的结果,所以直接按照timestamp分组就可以了 +# # 数据库中的数据是这样的 2025-01-15 06:00:00.000000,分组的话不会受时区的影响 +# print('start_at_0') +# print(start_at_0) +# print(end_at_23) +# # SELECT DATEADD(HH, 4, '2022-09-19 16:09:35'); +# sql = f"""SELECT timestamp, sum("number") as "number", op_type FROM FileOpsStat WHERE timestamp between '{start_at_0}' AND '{end_at_23}' GROUP BY DAY(DATEADD(HH, 8, timestamp)), op_type """ +# # sql = f"""SELECT timestamp, "number", op_type FROM FileOpsStat WHERE timestamp between '{start_at_0}' AND '{end_at_23}'""" +# # sql = """SELECT FROM_TZ(timestamp, '+09:00'), sum("number") as "number", op_type FROM FileOpsStat""" +# # sql = """SELECT timestamp, sum("number") as "number", op_type FROM FileOpsStat WHERE timestamp between '2025-01-13' AND '2025-01-16';""" +# # sql = """SELECT timestamp, sum("number") as "number", op_type FROM FileOpsStat WHERE timestamp between '2025-01-13' AND '2025-01-16 06:00:00.000000';""" +# print('sql') +# print(sql) +# print(text(sql).compile(compile_kwargs={"literal_binds": True})) +# rows = session.execute(text(sql)).fetchall() +# ret = [] +# # SELECT timestamp, "number", op_type FROM FileOpsStat WHERE timestamp between '2025-01-13 16:00:00+00:00' AND '2025-01-14 23:59:59+00:00'; +# +# # 下午需要看一下,这个的group后的数据不是安装 timestamp 分组的吗,如果是,那么seahub中为什么只取了最后一个呢? +# print('len(rows)') +# print(len(rows)) +# for row in rows: +# # 数据库中的数据是这样的 2025-01-15 06:00:00.000000, 所以返回的时间有可能是两个不同的时间 +# print('row') +# print(row) +# timestamp = row[0] +# number = row[1] +# op_type = row[2] +# print(str(timestamp)) +# print(timestamp.tzinfo) +# +# # from_tz = pytz.timezone('UTC') +# # aware_datetime = from_tz.normalize(timestamp.astimezone(pytz.UTC)) +# # print('aware_datetime') +# # print(aware_datetime) +# +# # 2025-01-15 06:00:00 +# +# # start_at_0 = datetime.strptime(timestamp.split('.')[0], '%Y-%m-%d %H:%M:%S') +# timestamp = convert_timezone(timestamp, from_tz=pytz.UTC, to_tz=from_tz) +# print('timestamp') +# print(timestamp) +# tz_offset = timestamp.utcoffset() +# +# print(timestamp.utcoffset()) +# print(timestamp.utcoffset() + timestamp) +# print(timestamp.strftime('%Y-%m-%d %H:%M:%S')) +# +# # 2025-01-15 06:00:00+08:00 +# +# +# t = str(row[0])[:10] +# # ret.append((datetime.strptime(str(row[0]), '%Y-%m-%d'), row[2], int(row[1]))) +# # 第一个数据只是为了统计,返回值也只是精确到天,所以没必要转化 2025-01-15T00:00:00+08:00 +# ret.append((datetime.strptime(t, '%Y-%m-%d'), row[2], int(row[1]))) +# +# print('ret') +# print(ret) +# return ret + def get_org_file_ops_stats_by_day(session, org_id, start, end, offset='+00:00'): start_str = start.strftime('%Y-%m-%d 00:00:00') end_str = end.strftime('%Y-%m-%d 23:59:59') @@ -185,16 +318,26 @@ def get_org_file_ops_stats_by_day(session, org_id, start, end, offset='+00:00'): ret = [] try: - stmt = select(func.date(func.convert_tz(FileOpsStat.timestamp, '+00:00', offset)).label("timestamp"), - func.sum(FileOpsStat.number).label("number"), FileOpsStat.op_type).where( - FileOpsStat.timestamp.between( - func.convert_tz(start_at_0, offset, '+00:00'), - func.convert_tz(end_at_23, offset, '+00:00')), - FileOpsStat.org_id == org_id).group_by( - func.date(func.convert_tz(FileOpsStat.timestamp, '+00:00', offset)), - FileOpsStat.op_type).order_by("timestamp") - - rows = session.execute(stmt).all() + # stmt = select(func.date(func.convert_tz(FileOpsStat.timestamp, '+00:00', offset)).label("timestamp"), + # func.sum(FileOpsStat.number).label("number"), FileOpsStat.op_type).where( + # FileOpsStat.timestamp.between( + # func.convert_tz(start_at_0, offset, '+00:00'), + # func.convert_tz(end_at_23, offset, '+00:00')), + # FileOpsStat.org_id == org_id).group_by( + # func.date(func.convert_tz(FileOpsStat.timestamp, '+00:00', offset)), + # FileOpsStat.op_type).order_by("timestamp") + # + # rows = session.execute(stmt).all() + + off_hour = int(offset[0:3]) + sql = f"""SELECT TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS') as timestamp, sum("number") as "number", op_type + FROM FileOpsStat WHERE timestamp between DATEADD(HH, {-off_hour}, '{start_at_0}') AND DATEADD(HH, {-off_hour}, '{end_at_23}') AND org_id={org_id} + GROUP BY YEAR(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + MONTH(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + DAY(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), op_type + ORDER BY timestamp""" + + rows = session.execute(text(sql)).fetchall() for row in rows: timestamp = datetime.strptime(str(row[0]), '%Y-%m-%d') @@ -294,33 +437,52 @@ def get_org_traffic_by_day(session, org_id, start, end, offset='+00:00', op_type # offset is not supported for now offset='+00:00' + off_hour = int(offset[0:3]) if op_type == 'web-file-upload' or op_type == 'web-file-download' or op_type == 'sync-file-download' \ or op_type == 'sync-file-upload' or op_type == 'link-file-upload' or op_type == 'link-file-download': - stmt = select(func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)).label("timestamp"), - func.sum(SysTraffic.size).label("size"), - SysTraffic.op_type).where(SysTraffic.timestamp.between( - func.convert_tz(start_at_0, offset, '+00:00'), - func.convert_tz(end_at_23, offset, '+00:00')), - SysTraffic.org_id == org_id, - SysTraffic.op_type == op_type).group_by( - SysTraffic.org_id, - func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)), - SysTraffic.op_type).order_by("timestamp") + # stmt = select(func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)).label("timestamp"), + # func.sum(SysTraffic.size).label("size"), + # SysTraffic.op_type).where(SysTraffic.timestamp.between( + # func.convert_tz(start_at_0, offset, '+00:00'), + # func.convert_tz(end_at_23, offset, '+00:00')), + # SysTraffic.org_id == org_id, + # SysTraffic.op_type == op_type).group_by( + # SysTraffic.org_id, + # func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)), + # SysTraffic.op_type).order_by("timestamp") + + sql = f"""SELECT TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS') as timestamp, sum("size") as "size", op_type + FROM SysTraffic WHERE timestamp between DATEADD(HH, {-off_hour}, '{start_at_0}') AND DATEADD(HH, {-off_hour}, '{end_at_23}') + AND org_id={org_id} AND op_type={op_type} + GROUP BY org_id, YEAR(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + MONTH(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + DAY(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), op_type + ORDER BY timestamp """ + elif op_type == 'all': - stmt = select(func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)).label("timestamp"), - func.sum(SysTraffic.size).label("size"), - SysTraffic.op_type).where(SysTraffic.timestamp.between( - func.convert_tz(start_at_0, offset, '+00:00'), - func.convert_tz(end_at_23, offset, '+00:00')), - SysTraffic.org_id == org_id).group_by( - SysTraffic.org_id, - func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)), - SysTraffic.op_type).order_by("timestamp") + # stmt = select(func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)).label("timestamp"), + # func.sum(SysTraffic.size).label("size"), + # SysTraffic.op_type).where(SysTraffic.timestamp.between( + # func.convert_tz(start_at_0, offset, '+00:00'), + # func.convert_tz(end_at_23, offset, '+00:00')), + # SysTraffic.org_id == org_id).group_by( + # SysTraffic.org_id, + # func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)), + # SysTraffic.op_type).order_by("timestamp") + + sql = f"""SELECT TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS') as timestamp, sum("size") as "size", op_type + FROM SysTraffic WHERE timestamp between DATEADD(HH, {-off_hour}, '{start_at_0}') AND DATEADD(HH, {-off_hour}, '{end_at_23}') + AND org_id={org_id} + GROUP BY org_id, YEAR(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + MONTH(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + DAY(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), op_type + ORDER BY timestamp """ else: return [] - rows = session.execute(stmt).all() + # rows = session.execute(stmt).all() + rows = session.execute(text(sql)).fetchall() ret = [] for row in rows: @@ -335,29 +497,45 @@ def get_system_traffic_by_day(session, start, end, offset='+00:00', op_type='all # offset is not supported for now offset='+00:00' + off_hour = int(offset[0:3]) if op_type == 'web-file-upload' or op_type == 'web-file-download' or op_type == 'sync-file-download' \ or op_type == 'sync-file-upload' or op_type == 'link-file-upload' or op_type == 'link-file-download': - stmt = select(func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)).label("timestamp"), - func.sum(SysTraffic.size).label("size"), - SysTraffic.op_type).where(SysTraffic.timestamp.between( - func.convert_tz(start_at_0, offset, '+00:00'), - func.convert_tz(end_at_23, offset, '+00:00')), - SysTraffic.op_type == op_type).group_by( - func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)), - SysTraffic.op_type).order_by("timestamp") + # stmt = select(func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)).label("timestamp"), + # func.sum(SysTraffic.size).label("size"), + # SysTraffic.op_type).where(SysTraffic.timestamp.between( + # func.convert_tz(start_at_0, offset, '+00:00'), + # func.convert_tz(end_at_23, offset, '+00:00')), + # SysTraffic.op_type == op_type).group_by( + # func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)), + # SysTraffic.op_type).order_by("timestamp") + sql = f"""SELECT TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS') as timestamp, sum("size") as "size", op_type + FROM SysTraffic WHERE timestamp between DATEADD(HH, {-off_hour}, '{start_at_0}') AND DATEADD(HH, {-off_hour}, '{end_at_23}') + AND op_type={op_type} + GROUP BY YEAR(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + MONTH(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + DAY(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), op_type + ORDER BY timestamp """ elif op_type == 'all': - stmt = select(func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)).label("timestamp"), - func.sum(SysTraffic.size).label("size"), - SysTraffic.op_type).where(SysTraffic.timestamp.between( - func.convert_tz(start_at_0, offset, '+00:00'), - func.convert_tz(end_at_23, offset, '+00:00'))).group_by( - func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)), - SysTraffic.op_type).order_by("timestamp") + # stmt = select(func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)).label("timestamp"), + # func.sum(SysTraffic.size).label("size"), + # SysTraffic.op_type).where(SysTraffic.timestamp.between( + # func.convert_tz(start_at_0, offset, '+00:00'), + # func.convert_tz(end_at_23, offset, '+00:00'))).group_by( + # func.date(func.convert_tz(SysTraffic.timestamp, '+00:00', offset)), + # SysTraffic.op_type).order_by("timestamp") + + sql = f"""SELECT TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS') as timestamp, sum("size") as "size", op_type + FROM SysTraffic WHERE timestamp between DATEADD(HH, {-off_hour}, '{start_at_0}') AND DATEADD(HH, {-off_hour}, '{end_at_23}') + GROUP BY org_id, YEAR(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + MONTH(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), + DAY(TO_DATE(DATEADD(HH, {off_hour}, timestamp), 'YYYY-MM-DD HH24:MI:SS')), op_type + ORDER BY timestamp """ else: return [] - rows = session.execute(stmt).all() + # rows = session.execute(stmt).all() + rows = session.execute(text(sql)).fetchall() ret = [] for row in rows: diff --git a/statistics/models.py b/statistics/models.py index bf056066..185c18cc 100644 --- a/statistics/models.py +++ b/statistics/models.py @@ -6,7 +6,7 @@ class TotalStorageStat(Base): - __tablename__ = 'TotalStorageStat' + __tablename__ = 'TOTALSTORAGESTAT' id = mapped_column(Integer, primary_key=True, autoincrement=True) timestamp = mapped_column(DateTime, nullable=False) @@ -23,14 +23,14 @@ def __init__(self, org_id, timestamp, total_size): class FileOpsStat(Base): - __tablename__ = 'FileOpsStat' + __tablename__ = 'FILEOPSSTAT' id = mapped_column(Integer, primary_key=True, autoincrement=True) timestamp = mapped_column(DateTime, nullable=False) op_type = mapped_column(String(length=16), nullable=False) number = mapped_column(Integer, nullable=False) org_id = mapped_column(Integer, nullable=False) - + __table_args__ = (Index('idx_file_ops_time_org', 'timestamp', 'org_id'), ) def __init__(self, org_id, timestamp, op_type, number): @@ -42,7 +42,7 @@ def __init__(self, org_id, timestamp, op_type, number): class UserActivityStat(Base): - __tablename__ = 'UserActivityStat' + __tablename__ = 'USERACTIVITYSTAT' id = mapped_column(Integer, primary_key=True, autoincrement=True) name_time_md5 = mapped_column(String(length=32), unique=True) @@ -61,7 +61,7 @@ def __init__(self, name_time_md5, org_id, username, timestamp): class UserTraffic(Base): - __tablename__ = 'UserTraffic' + __tablename__ = 'USERTRAFFIC' id = mapped_column(Integer, primary_key=True, autoincrement=True) user = mapped_column(String(length=255), nullable=False) @@ -82,7 +82,7 @@ def __init__(self, user, timestamp, op_type, size, org_id): class SysTraffic(Base): - __tablename__ = 'SysTraffic' + __tablename__ = 'SYSTRAFFIC' id = mapped_column(Integer, primary_key=True, autoincrement=True) org_id = mapped_column(Integer, index=True) @@ -101,7 +101,7 @@ def __init__(self, timestamp, op_type, size, org_id): class MonthlyUserTraffic(Base): - __tablename__ = 'MonthlyUserTraffic' + __tablename__ = 'MONTHLYUSERTRAFFIC' id = mapped_column(Integer, primary_key=True, autoincrement=True) user = mapped_column(String(length=255), nullable=False) @@ -130,7 +130,7 @@ def __init__(self, user, org_id, timestamp, size_dict): class MonthlySysTraffic(Base): - __tablename__ = 'MonthlySysTraffic' + __tablename__ = 'MONTHLYSYSTRAFFIC' id = mapped_column(Integer, primary_key=True, autoincrement=True) org_id = mapped_column(Integer) diff --git a/utils/ccnet_db.py b/utils/ccnet_db.py index c7fdcc42..bd91b817 100644 --- a/utils/ccnet_db.py +++ b/utils/ccnet_db.py @@ -2,23 +2,22 @@ import configparser import logging from seafevents.app.config import get_config +from seafevents.db import init_db_session_class +from sqlalchemy.sql import text logger = logging.getLogger('seafevents') def get_ccnet_db_name(): - return os.environ.get('SEAFILE_MYSQL_DB_CCNET_DB_NAME', '') or 'ccnet_db' + return os.environ.get('SEAFILE_MYSQL_DB_CCNET_DB_NAME', '') or 'SYSDBA' class CcnetDB(object): def __init__(self): - self.ccnet_db_conn = None - self.ccnet_db_cursor = None + self.db_session = None self.init_ccnet_db() self.db_name = get_ccnet_db_name() - if self.ccnet_db_cursor is None: - raise RuntimeError('Failed to init ccnet db.') def __enter__(self): return self @@ -27,13 +26,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.close_ccnet_db() def init_ccnet_db(self): - try: - import pymysql - pymysql.install_as_MySQLdb() - except ImportError as e: - logger.warning('Failed to init ccnet db: %s.' % e) - return - seafile_conf_dir = os.environ.get('SEAFILE_CENTRAL_CONF_DIR') or os.environ.get('SEAFILE_CONF_DIR') if not seafile_conf_dir: logging.warning('Environment variable seafile_conf_dir is not define') @@ -46,31 +38,10 @@ def init_ccnet_db(self): logger.warning('Failed to init ccnet db, can not find db info in seafile.conf.') return - if seafile_config.get('database', 'type') != 'mysql': - logger.warning('Failed to init ccnet db, only mysql db supported.') - return - - db_name = os.environ.get('SEAFILE_MYSQL_DB_CCNET_DB_NAME', '') or 'ccnet_db' - db_host = seafile_config.get('database', 'host', fallback='127.0.0.1') - db_port = seafile_config.getint('database', 'port', fallback=3306) - db_user = seafile_config.get('database', 'user') - db_passwd = seafile_config.get('database', 'password') - - try: - self.ccnet_db_conn = pymysql.connect(host=db_host, port=db_port, user=db_user, - passwd=db_passwd, db=db_name, charset='utf8') - self.ccnet_db_conn.autocommit(True) - self.ccnet_db_cursor = self.ccnet_db_conn.cursor() - except Exception as e: - self.cursor = None - logger.warning('Failed to init ccnet db: %s.' % e) - return + self.db_session = init_db_session_class(seafile_config, 'seafile')() def close_ccnet_db(self): - if self.ccnet_db_cursor: - self.ccnet_db_cursor.close() - if self.ccnet_db_conn: - self.ccnet_db_conn.close() + self.db_session.close() def get_group_info(self, group): info = { @@ -88,37 +59,30 @@ def get_groups_by_ids(self, group_ids): sql = f""" SELECT * FROM - `{self.db_name}`.`Group` + {self.db_name}."Group" WHERE group_id IN ({group_ids_str}) """ - with self.ccnet_db_cursor as cursor: - if not group_ids: - return {} - cursor.execute(sql) - groups_map = {} - for item in cursor.fetchall(): - groups_map[item[0]] = self.get_group_info(item) + if not group_ids: + return {} + cursor = self.db_session.execute(text(sql)) + groups_map = {} + for item in cursor.fetchall(): + groups_map[item[0]] = self.get_group_info(item) - return groups_map + return groups_map def get_org_user_count(self, org_id): sql = f""" - SELECT COUNT(1) FROM `{self.db_name}`.`OrgUser` WHERE org_id={org_id} + SELECT COUNT(1) FROM {self.db_name}.OrgUser WHERE org_id={org_id} """ - with self.ccnet_db_cursor as cursor: - cursor.execute(sql) - - return cursor.fetchone()[0] + return self.db_session.execute(text(sql)).fetchone()[0] def get_user_role(self, email): sql = f""" - SELECT role FROM `{self.db_name}`.`UserRole` + SELECT role FROM {self.db_name}.UserRole WHERE email="{email}" """ - with self.ccnet_db_cursor as cursor: - cursor.execute(sql) - result = cursor.fetchone() - - return result[0] if result else 'default' + result = self.db_session.execute(text(sql)).fetchone() + return result[0] if result else 'default' diff --git a/utils/seafile_db.py b/utils/seafile_db.py index b1f61e73..9aec2630 100644 --- a/utils/seafile_db.py +++ b/utils/seafile_db.py @@ -2,7 +2,8 @@ import configparser import logging from seafevents.app.config import get_config - +from seafevents.db import init_db_session_class +from sqlalchemy.sql import text logger = logging.getLogger('seafevents') @@ -18,24 +19,24 @@ def get_seafile_db_name(): config.read(seafile_conf_path) if config.has_section('database'): - db_name = config.get('database', 'db_name', fallback='seafile') + # 暂时的调整, + # db_name = config.get('database', 'db_name', fallback='seafile') + db_name = config.get('database', 'user', fallback='seafile') else: - db_name = 'seafile' + db_name = 'sysdba' - if config.get('database', 'type') != 'mysql': - error_msg = 'Failed to init seafile db, only mysql db supported.' - return None, error_msg + # if config.get('database', 'type') != 'mysql': + # error_msg = 'Failed to init seafile db, only mysql db supported.' + # print(error_msg) + # # return None, error_msg return db_name, None class SeafileDB(object): def __init__(self): - self.seafile_db_conn = None - self.seafile_db_cursor = None + self.db_session = None self.init_seafile_db() self.db_name = get_seafile_db_name()[0] - if self.seafile_db_cursor is None: - raise RuntimeError('Failed to init seafile db.') def __enter__(self): return self @@ -44,13 +45,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.close_seafile_db() def init_seafile_db(self): - try: - import pymysql - pymysql.install_as_MySQLdb() - except ImportError as e: - logger.warning('Failed to init seafile db: %s.' % e) - return - seafile_conf_dir = os.environ.get('SEAFILE_CENTRAL_CONF_DIR') or os.environ.get('SEAFILE_CONF_DIR') if not seafile_conf_dir: logging.warning('Environment variable seafile_conf_dir is not define') @@ -59,35 +53,10 @@ def init_seafile_db(self): seafile_conf_path = os.path.join(seafile_conf_dir, 'seafile.conf') seafile_config = get_config(seafile_conf_path) - if not seafile_config.has_section('database'): - logger.warning('Failed to init seafile db, can not find db info in seafile.conf.') - return - - if seafile_config.get('database', 'type') != 'mysql': - logger.warning('Failed to init seafile db, only mysql db supported.') - return - - db_name = seafile_config.get('database', 'db_name', fallback='seafile') - db_host = seafile_config.get('database', 'host', fallback='127.0.0.1') - db_port = seafile_config.getint('database', 'port', fallback=3306) - db_user = seafile_config.get('database', 'user') - db_passwd = seafile_config.get('database', 'password') - - try: - self.seafile_db_conn = pymysql.connect(host=db_host, port=db_port, user=db_user, - passwd=db_passwd, db=db_name, charset='utf8') - self.seafile_db_conn.autocommit(True) - self.seafile_db_cursor = self.seafile_db_conn.cursor() - except Exception as e: - self.cursor = None - logger.warning('Failed to init seafile db: %s.' % e) - return + self.db_session = init_db_session_class(seafile_config, 'seafile')() def close_seafile_db(self): - if self.seafile_db_cursor: - self.seafile_db_cursor.close() - if self.seafile_db_conn: - self.seafile_db_conn.close() + self.db_session.close() def repo_info(self, item): info = { @@ -97,43 +66,42 @@ def repo_info(self, item): return info def get_repo_info_by_ids(self, repo_ids): + if not repo_ids: + return {} repo_ids_str = ','.join(["'%s'" % str(repo_id) for repo_id in repo_ids]) sql1 = f""" SELECT r.repo_id, name, owner_id - FROM `{self.db_name}`.`RepoInfo` r - LEFT JOIN `{self.db_name}`.`RepoOwner` o + FROM {self.db_name}.RepoInfo r + LEFT JOIN {self.db_name}.RepoOwner o ON o.repo_id = r.repo_id WHERE r.repo_id IN ({repo_ids_str}) """ sql2 = f""" SELECT r.repo_id, name, user - FROM `{self.db_name}`.`RepoInfo` r - LEFT JOIN `{self.db_name}`.`OrgRepo` o + FROM {self.db_name}.RepoInfo r + LEFT JOIN {self.db_name}.OrgRepo o ON o.repo_id = r.repo_id WHERE r.repo_id IN ({repo_ids_str}) """ - with self.seafile_db_cursor as cursor: - if not repo_ids: - return {} - cursor.execute(sql1) - rows1 = cursor.fetchall() - cursor.execute(sql2) - rows2 = cursor.fetchall() - rows = rows1 + rows2 - repos_map = {} - for row in rows: - if row[0] not in repos_map or repos_map[row[0]]['owner'] is None: - repos_map[row[0]] = self.repo_info(row) - - return repos_map + cursor = self.db_session.execute(text(sql1)) + rows1 = cursor.fetchall() + cursor = self.db_session.execute(text(sql2)) + rows2 = cursor.fetchall() + rows = rows1 + rows2 + repos_map = {} + for row in rows: + if row[0] not in repos_map or repos_map[row[0]]['owner'] is None: + repos_map[row[0]] = self.repo_info(row) + + return repos_map def reset_download_rate_limit(self): sql1 = f""" - TRUNCATE TABLE `{self.db_name}`.`UserDownloadRateLimit`; + TRUNCATE TABLE {self.db_name}.UserDownloadRateLimit; """ sql2 = f""" - TRUNCATE TABLE `{self.db_name}`.`OrgDownloadRateLimit` + TRUNCATE TABLE {self.db_name}.OrgDownloadRateLimit """ - with self.seafile_db_cursor as cursor: - cursor.execute(sql1) - cursor.execute(sql2) + + self.db_session.execute(sql1) + self.db_session.execute(sql2) diff --git a/virus_scanner/models.py b/virus_scanner/models.py index e2dfae40..22a032be 100644 --- a/virus_scanner/models.py +++ b/virus_scanner/models.py @@ -5,7 +5,7 @@ class VirusScanRecord(Base): - __tablename__ = 'VirusScanRecord' + __tablename__ = 'VIRUSSCANRECORD' repo_id = mapped_column(String(length=36), nullable=False, primary_key=True) scan_commit_id = mapped_column(String(length=40), nullable=False) @@ -18,7 +18,7 @@ def __init__(self, repo_id, scan_commit_id): class VirusFile(Base): - __tablename__ = 'VirusFile' + __tablename__ = 'VIRUSFILE' vid = mapped_column(Integer, primary_key=True, autoincrement=True) repo_id = mapped_column(String(length=36), nullable=False, index=True)