Skip to content

Commit

Permalink
Users and privileges overhaul (#339)
Browse files Browse the repository at this point in the history
  • Loading branch information
stijn-uva authored Jul 3, 2023
1 parent 0f8c28b commit ba3a675
Show file tree
Hide file tree
Showing 252 changed files with 28,432 additions and 2,325 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker_pr_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
- name: Print log on failure
if: failure()
run: |
docker cp 4cat_backend:/4cat/data/logs/backend_4cat.log ./backend_4cat.log
docker cp 4cat_backend:/usr/src/app/logs/backend_4cat.log ./backend_4cat.log
echo "::group::Backend logs"
cat backend_4cat.log
echo "::endgroup::"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

# actual files that are part of 4CAT but should not be included
config.py
module_config.bin
.current-version
deploy.sh
module_cache.pb
Expand Down
2 changes: 1 addition & 1 deletion 4cat-daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
# we can only import this here, because the version check above needs to be
# done first, as it may detect that the user needs to migrate first before
# the config manager can be run properly
import common.config_manager as config
from common.config_manager import config
from common.lib.helpers import call_api
# ---------------------------------------------
# Check validity of configuration file
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
1.33
1.34

This file should not be modified. It is used by 4CAT to determine whether it
needs to run migration scripts to e.g. update the database structure to a more
Expand Down
9 changes: 7 additions & 2 deletions backend/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from backend.lib.manager import WorkerManager
from common.lib.logger import Logger

import common.config_manager as config
from common.config_manager import config

def run(as_daemon=True):
pidfile = Path(config.get('PATH_ROOT'), config.get('PATH_LOCKFILE'), "4cat.pid")
Expand Down Expand Up @@ -54,13 +54,18 @@ def run(as_daemon=True):
log = Logger(output=not as_daemon)

log.info("4CAT Backend started, logger initialised")
db = Database(logger=log, appname="main")
db = Database(logger=log, appname="main",
dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT)
queue = JobQueue(logger=log, database=db)

# clean up after ourselves
db.commit()
queue.release_all()

# ensure database consistency for settings table
config.with_db(db)
config.ensure_database()

# make it happen
# this is blocking until the back-end is shut down
WorkerManager(logger=log, database=db, queue=queue, as_daemon=as_daemon)
Expand Down
67 changes: 31 additions & 36 deletions backend/database.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@

-- 4CAT settings table
CREATE TABLE IF NOT EXISTS settings (
name TEXT UNIQUE PRIMARY KEY,
value TEXT DEFAULT '{}'
name TEXT DEFAULT '' NOT NULL,
value TEXT DEFAULT '{}' NOT NULL,
tag TEXT DEFAULT '' NOT NULL
);

CREATE UNIQUE INDEX IF NOT EXISTS unique_setting
ON settings (
name, tag
);

-- jobs table
CREATE TABLE IF NOT EXISTS jobs (
id SERIAL PRIMARY KEY,
Expand Down Expand Up @@ -38,7 +44,6 @@ CREATE TABLE IF NOT EXISTS datasets (
key text,
type text DEFAULT 'search',
key_parent text DEFAULT '',
owner VARCHAR DEFAULT 'anonymous',
query text,
job integer DEFAULT 0,
parameters text,
Expand All @@ -54,6 +59,15 @@ CREATE TABLE IF NOT EXISTS datasets (
annotation_fields text DEFAULT ''
);

CREATE TABLE datasets_owners (
"name" text DEFAULT 'anonymous'::text,
key text NOT NULL,
role TEXT DEFAULT 'owner'
);

CREATE UNIQUE INDEX datasets_owners_user_key_idx ON datasets_owners("name" text_ops,key text_ops);


-- annotations
CREATE TABLE IF NOT EXISTS annotations (
key text UNIQUE PRIMARY KEY,
Expand All @@ -75,10 +89,12 @@ CREATE TABLE IF NOT EXISTS users (
password TEXT,
is_admin BOOLEAN DEFAULT FALSE,
register_token TEXT DEFAULT '',
timestamp_created INTEGER DEFAULT 0,
timestamp_token INTEGER DEFAULT 0,
timestamp_seen INTEGER DEFAULT 0,
userdata TEXT DEFAULT '{}',
is_deactivated BOOLEAN DEFAULT FALSE
is_deactivated BOOLEAN DEFAULT FALSE,
tags JSONB DEFAULT '[]'
);

INSERT INTO users
Expand Down Expand Up @@ -136,35 +152,14 @@ CREATE FUNCTION count_estimate(query text) RETURNS bigint AS $$
END;
$$ LANGUAGE plpgsql VOLATILE STRICT;


-- fourcat settings insert default settings
-- TODO SHOULD BE ABLE TO REMOVE; all these should have corresponding values in common/lib/config_definitions given defaults
INSERT INTO settings
(name, value)
Values
('4cat.datasources', '["bitchute", "custom", "douban", "customimport", "reddit", "telegram", "twitterv2", "tiktok", "instagram", "9gag", "imgur", "linkedin", "parler", "douyin", "twitter-import"]'),
('4cat.name', '"4CAT"'),
('4cat.name_long', '"4CAT: Capture and Analysis Toolkit"'),
('4cat.github_url', '"https://github.com/digitalmethodsinitiative/4cat"'),
('4cat.phone_home_url', '"https://ping.4cat.nl"'),
('path.versionfile', '".git-checked-out"'),
('expire.timeout', '0'),
('expire.allow_optout', 'true'),
('expire.datasources', '{"tumblr": {"timeout": 259200, "allow_optout": false}}'),
('logging.slack.level', '"WARNING"'),
('logging.slack.webhook', 'null'),
('mail.admin_email', 'null'),
('mail.ssl', 'false'),
('mail.username', 'null'),
('mail.password', 'null'),
('mail.noreply', '"noreply@localhost"'),
('fourchan.image_interval', '3600'),
('explorer.max_posts', '100000'),
('flask.flask_app', '"webtool/fourcat"'),
('flask.secret_key', concat('"', substr(md5(random()::text), 0, 25), '"')),
('flask.https', 'false'),
('flask.server_name', '"localhost"'),
('flask.autologin.name', '"Automatic login"'),
('flask.autologin.hostnames', '["localhost"]'),
('flask.autologin.api', '["localhost"]')
ON CONFLICT DO NOTHING;
-- default admin privileges
INSERT INTO settings (name, value, tag) VALUES
('privileges.admin.can_view_status', 'true', 'admin'),
('privileges.admin.can_manage_users', 'true', 'admin'),
('privileges.admin.can_manage_settings', 'true', 'admin'),
('privileges.admin.can_manage_datasources', 'true', 'admin'),
('privileges.admin.can_manage_notifications', 'true', 'admin'),
('privileges.admin.can_manage_tags', 'true', 'admin'),
('privileges.admin.can_restart', 'true', 'admin'),
('privileges.can_view_all_datasets', 'true', 'admin'),
('privileges.can_view_private_datasets', 'true', 'admin');
2 changes: 0 additions & 2 deletions backend/lib/database_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import pymysql.connections as mysqlconnections
import pymysql

import common.config_manager as config

class MySQLDatabase:
"""
Simple database handler for MySQL connections
Expand Down
10 changes: 8 additions & 2 deletions backend/lib/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,14 @@ def __init__(self, queue, database, logger, as_daemon=True):
if hasattr(worker, "ensure_job"):
self.queue.add_job(jobtype=worker_name, **worker.ensure_job)

self.log.info('4CAT Started')
self.log.info("4CAT Started")

# flush module collector log buffer
# the logger is not available when this initialises
# but it is now!
if all_modules.log_buffer:
self.log.warning(all_modules.log_buffer)
all_modules.log_buffer = ""

# it's time
self.loop()
Expand Down Expand Up @@ -87,7 +94,6 @@ def delegate(self):
# worker slots, start a new worker to run it
if len(self.worker_pool[jobtype]) < worker_class.max_workers:
try:
self.log.debug("Starting new worker for job %s" % jobtype)
job.claim()
worker = worker_class(logger=self.log, manager=self, job=job, modules=all_modules)
worker.start()
Expand Down
2 changes: 1 addition & 1 deletion backend/abstract/preset.py → backend/lib/preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Queue a series of processors at once via a preset
"""
import abc
from backend.abstract.processor import BasicProcessor
from backend.lib.processor import BasicProcessor

from common.lib.dataset import DataSet

Expand Down
69 changes: 23 additions & 46 deletions backend/abstract/processor.py → backend/lib/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@

from pathlib import Path, PurePath

import backend
from backend.abstract.worker import BasicWorker
from backend.lib.worker import BasicWorker
from common.lib.dataset import DataSet
from common.lib.fourcat_module import FourcatModule
from common.lib.helpers import get_software_version, remove_nuls
from common.lib.exceptions import WorkerInterruptedException, ProcessorInterruptedException, ProcessorException
from common.config_manager import config, ConfigWrapper

csv.field_size_limit(1024 * 1024 * 1024)

Expand All @@ -34,9 +34,8 @@ class BasicProcessor(FourcatModule, BasicWorker, metaclass=abc.ABCMeta):
To determine whether a processor can process a given dataset, you can
define a `is_compatible_with(FourcatModule module=None):) -> bool` class
method which takes a dataset *or* processor as argument and returns a bool
that determines if this processor is considered compatible with that
dataset or processor. For example:
method which takes a dataset as argument and returns a bool that determines
if this processor is considered compatible with that dataset. For example:
.. code-block:: python
Expand All @@ -56,6 +55,9 @@ def is_compatible_with(cls, module=None):
#: The dataset object that the processor is *creating*.
dataset = None

#: Owner (username) of the dataset
owner = None

#: The dataset object that the processor is *processing*.
source_dataset = None

Expand All @@ -74,6 +76,9 @@ def is_compatible_with(cls, module=None):
#: Configurable options for this processor
options = {}

#: 4CAT settings from the perspective of the dataset's owner
config = None

#: Values for the processor's options, populated by user input
parameters = {}

Expand All @@ -93,13 +98,22 @@ def work(self):
up.
"""
try:
# a dataset can have multiple owners, but the creator is the user
# that actually queued the processor, so their config is relevant
self.dataset = DataSet(key=self.job.data["remote_id"], db=self.db)
except TypeError:
self.owner = self.dataset.creator
except TypeError as e:
# query has been deleted in the meantime. finish without error,
# as deleting it will have been a conscious choice by a user
self.job.finish()
return

# set up config reader using the worker's DB connection and the dataset
# creator. This ensures that if a value has been overriden for the owner,
# the overridden value is used instead.
config.with_db(self.db)
self.config = ConfigWrapper(config=config, user=self.owner)

if self.dataset.data.get("key_parent", None):
# search workers never have parents (for now), so we don't need to
# find out what the source_dataset dataset is if it's a search worker
Expand Down Expand Up @@ -242,7 +256,7 @@ def after_process(self):
parent=self.dataset.key,
extension=available_processors[next_type].extension,
is_private=self.dataset.is_private,
owner=self.dataset.owner
owner=self.dataset.creator
)
self.queue.add_job(next_type, remote_id=next_analysis.key)
else:
Expand Down Expand Up @@ -649,45 +663,6 @@ def get_status(cls):
"""
return cls.status if hasattr(cls, "status") else None

@classmethod
def get_available_processors(cls, self):
"""
Get list of processors compatible with this processor
Checks whether this dataset type is one that is listed as being accepted
by the processor, for each known type: if the processor does not
specify accepted types (via the `is_compatible_with` method), it is
assumed it accepts any top-level datasets
:return dict: Compatible processors, `name => class` mapping
"""
processors = backend.all_modules.processors

available = []
for processor_type, processor in processors.items():
if processor_type.endswith("-search"):
continue

# consider a processor compatible if its is_compatible_with
# method returns True *or* if it has no explicit compatibility
# check and this dataset is top-level (i.e. has no parent)
if hasattr(processor, "is_compatible_with"):
if processor.is_compatible_with(module=self):
available.append(processor)

return available

@classmethod
def is_dataset(cls):
"""
Confirm this is *not* a dataset, but a processor.
Used for processor compatibility checks.
:return bool: Always `False`, because this is a processor.
"""
return False

@classmethod
def is_top_dataset(cls):
"""
Expand Down Expand Up @@ -761,3 +736,5 @@ def process(self):
To be defined by the child processor.
"""
pass


4 changes: 2 additions & 2 deletions backend/abstract/scraper.py → backend/lib/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
import abc

from pathlib import Path
from backend.abstract.worker import BasicWorker
from backend.lib.worker import BasicWorker

import common.config_manager as config
from common.config_manager import config

class BasicHTTPScraper(BasicWorker, metaclass=abc.ABCMeta):
"""
Expand Down
5 changes: 3 additions & 2 deletions backend/abstract/search.py → backend/lib/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from pathlib import Path
from abc import ABC, abstractmethod

import common.config_manager as config
from common.config_manager import config
from common.lib.dataset import DataSet
from backend.abstract.processor import BasicProcessor
from backend.lib.processor import BasicProcessor
from common.lib.helpers import strip_tags, dict_search_and_update, remove_nuls, HashCache
from common.lib.exceptions import WorkerInterruptedException, ProcessorInterruptedException

Expand Down Expand Up @@ -189,6 +189,7 @@ def import_from_file(self, path):
}

path.unlink()
self.dataset.delete_parameter("file")

def items_to_csv(self, results, filepath):
"""
Expand Down
Loading

0 comments on commit ba3a675

Please sign in to comment.