diff --git a/VERSION b/VERSION index d45beb68b..bef9f2c9d 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ -1.24 +1.25 This file should not be modified. It is used by 4CAT to determine whether it needs to run migration scripts to e.g. update the database structure to a more diff --git a/backend/abstract/processor.py b/backend/abstract/processor.py index e4a974e6e..988488a3f 100644 --- a/backend/abstract/processor.py +++ b/backend/abstract/processor.py @@ -230,8 +230,15 @@ def after_process(self): self.log.info("Not running follow-up processor of type %s for dataset %s, no input data for follow-up" % (next_type, self.dataset.key)) elif next_type in available_processors: - next_analysis = DataSet(parameters=next_parameters, type=next_type, db=self.db, parent=self.dataset.key, - extension=available_processors[next_type].extension) + next_analysis = DataSet( + parameters=next_parameters, + type=next_type, + db=self.db, + parent=self.dataset.key, + extension=available_processors[next_type].extension, + is_private=self.dataset.is_private, + owner=self.dataset.owner + ) self.queue.add_job(next_type, remote_id=next_analysis.key) else: self.log.warning("Dataset %s (of type %s) wants to run processor %s next, but it is incompatible" % (self.dataset.key, self.type, next_type)) diff --git a/backend/database.sql b/backend/database.sql index 552f34847..d0bfeb9e8 100644 --- a/backend/database.sql +++ b/backend/database.sql @@ -32,6 +32,7 @@ CREATE TABLE IF NOT EXISTS datasets ( key text, type text DEFAULT 'search', key_parent text DEFAULT '', + owner VARCHAR DEFAULT 'anonymous', query text, job integer DEFAULT 0, parameters text, @@ -40,6 +41,7 @@ CREATE TABLE IF NOT EXISTS datasets ( status text, num_rows integer DEFAULT 0, is_finished boolean DEFAULT FALSE, + is_private boolean DEFAULT TRUE, software_version text, software_file text DEFAULT '', annotation_fields text DEFAULT '' diff --git a/backend/workers/api.py b/backend/workers/api.py index 3a432e502..dd16299d2 100644 --- a/backend/workers/api.py +++ b/backend/workers/api.py @@ -239,7 +239,7 @@ def process_request(self, request, payload): "is_recurring": (int(job["interval"]) > 0), "is_maybe_crashed": job["timestamp_claimed"] > 0 and not worker, "dataset_key": worker.dataset.key if hasattr(worker, "dataset") else None, - "dataset_user": worker.dataset.parameters.get("user", None) if hasattr(worker, "dataset") else None, + "dataset_user": worker.dataset.owner if hasattr(worker, "dataset") else None, "dataset_parent_key": worker.dataset.top_parent().key if hasattr(worker, "dataset") else None, "timestamp_queued": job["timestamp"], "timestamp_claimed": job["timestamp_lastclaimed"] diff --git a/common/lib/dataset.py b/common/lib/dataset.py index 9d64682f7..40c05baf5 100644 --- a/common/lib/dataset.py +++ b/common/lib/dataset.py @@ -48,7 +48,7 @@ class DataSet(FourcatModule): staging_area = None def __init__(self, parameters={}, key=None, job=None, data=None, db=None, parent=None, extension="csv", - type=None): + type=None, is_private=True, owner="anonymous"): """ Create new dataset object @@ -101,12 +101,14 @@ def __init__(self, parameters={}, key=None, job=None, data=None, db=None, parent self.data = { "key": self.key, "query": self.get_label(parameters, default=type), + "owner": owner, "parameters": json.dumps(parameters), "result_file": "", "status": "", "type": type, "timestamp": int(time.time()), "is_finished": False, + "is_private": is_private, "software_version": get_software_version(), "software_file": "", "num_rows": 0, @@ -455,6 +457,23 @@ def delete(self): # already deleted, apparently pass + def update_children(self, **kwargs): + """ + Update an attribute for all child datasets + + Can be used to e.g. change the owner, version, finished status for all + datasets in a tree + + :param kwargs: Parameters corresponding to known dataset attributes + """ + children = self.db.fetchall("SELECT * FROM datasets WHERE key_parent = %s", (self.key,)) + for child in children: + child = DataSet(key=child["key"], db=self.db) + for attr, value in kwargs.items(): + child.__setattr__(attr, value) + + child.update_children(**kwargs) + def is_finished(self): """ Check if dataset is finished diff --git a/helper-scripts/migrate/migrate-1.24-1.25.py b/helper-scripts/migrate/migrate-1.24-1.25.py index e69de29bb..6b352cf53 100644 --- a/helper-scripts/migrate/migrate-1.24-1.25.py +++ b/helper-scripts/migrate/migrate-1.24-1.25.py @@ -0,0 +1,42 @@ +# Add 'is_deactivated' column to user table +import sys +import os + +sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)) + "'/../..") +from common.lib.database import Database +from common.lib.logger import Logger + +import config + +log = Logger(output=True) +db = Database(logger=log, dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, + port=config.DB_PORT, appname="4cat-migrate") + +print(" Checking if datasets table has a column 'is_private'...") +has_column = db.fetchone("SELECT COUNT(*) AS num FROM information_schema.columns WHERE table_name = 'datasets' AND column_name = 'is_private'") +if has_column["num"] == 0: + print(" ...No, adding.") + db.execute("ALTER TABLE datasets ADD COLUMN is_private BOOLEAN DEFAULT TRUE") + db.commit() + + # make existing datasets all non-private, as they were before + db.execute("UPDATE datasets SET is_private = FALSE") + db.commit() +else: + print(" ...Yes, nothing to update.") + +print(" Checking if datasets table has a column 'owner'...") +has_column = db.fetchone("SELECT COUNT(*) AS num FROM information_schema.columns WHERE table_name = 'datasets' AND column_name = 'owner'") +if has_column["num"] == 0: + print(" ...No, adding.") + db.execute("ALTER TABLE datasets ADD COLUMN owner VARCHAR DEFAULT 'anonymous'") + db.commit() + + # make existing datasets all non-private, as they were before + db.execute("UPDATE datasets SET owner = parameters::json->>'user' WHERE parameters::json->>'user' IS NOT NULL") + db.commit() +else: + print(" ...Yes, nothing to update.") + + +print(" Done!") \ No newline at end of file diff --git a/webtool/api_standalone.py b/webtool/api_standalone.py index cda1a31c0..28be12d62 100644 --- a/webtool/api_standalone.py +++ b/webtool/api_standalone.py @@ -138,7 +138,14 @@ def process_standalone(processor): return error(402, error="Input is empty") # ok, valid input! - temp_dataset = DataSet(extension="csv", type="standalone", parameters={"user": current_user.get_id(), "after": [processor]}, db=db) + temp_dataset = DataSet( + extension="csv", + type="standalone", + parameters={"next": [processor]}, + db=db, + owner=current_user.get_id(), + is_private=True + ) temp_dataset.finish(len(input)) # make sure the file is deleted later, whichever way this request is diff --git a/webtool/api_tool.py b/webtool/api_tool.py index a98943484..fcf81f973 100644 --- a/webtool/api_tool.py +++ b/webtool/api_tool.py @@ -242,7 +242,12 @@ def import_dataset(): if not worker: return error(404, message="Unknown platform or source format") - dataset = DataSet(parameters={"user": current_user.get_id(), "datasource": platform}, type=worker.type, db=db) + dataset = DataSet( + parameters={"datasource": platform}, + type=worker.type, + db=db, + owner=current_user.get_id() + ) dataset.update_status("Importing uploaded file...") # store the file at the result path for the dataset, but with a different suffix @@ -295,7 +300,6 @@ def queue_dataset(): status and results. :return-error 404: If the datasource does not exist. """ - datasource_id = request.form.get("datasource", "") if datasource_id not in backend.all_modules.datasources: return error(404, message="Datasource '%s' does not exist" % datasource_id) @@ -318,14 +322,21 @@ def queue_dataset(): else: raise NotImplementedError("Data sources MUST sanitise input values with validate_query") - sanitised_query["user"] = current_user.get_id() sanitised_query["datasource"] = datasource_id sanitised_query["type"] = search_worker_id sanitised_query["pseudonymise"] = bool(request.form.to_dict().get("pseudonymise", False)) + is_private = bool(request.form.to_dict().get("make-private", True)) extension = search_worker.extension if hasattr(search_worker, "extension") else "csv" - dataset = DataSet(parameters=sanitised_query, db=db, type=search_worker_id, extension=extension) + dataset = DataSet( + parameters=sanitised_query, + db=db, + type=search_worker_id, + extension=extension, + is_private=is_private, + owner=current_user.get_id() + ) if request.form.get("label"): dataset.update_label(request.form.get("label")) @@ -374,6 +385,9 @@ def check_dataset(): except TypeError: return error(404, error="Dataset does not exist.") + if not current_user.can_access_dataset(dataset): + return error(403, error="Dataset is private") + results = dataset.check_dataset_finished() if results == 'empty': dataset_data = dataset.data @@ -438,7 +452,7 @@ def edit_dataset_label(key): except TypeError: return error(404, error="Dataset does not exist.") - if not current_user.is_admin and not current_user.get_id() == dataset.parameters.get("user"): + if not current_user.is_admin and not current_user.get_id() == dataset.owner: return error(403, message="Not allowed") dataset.update_label(label) @@ -594,7 +608,7 @@ def delete_dataset(key=None): except TypeError: return error(404, error="Dataset does not exist.") - if not current_user.is_admin and not current_user.get_id() == dataset.parameters.get("user"): + if not current_user.is_admin and not current_user.get_id() == dataset.owner: return error(403, message="Not allowed") # if there is an active or queued job for some child dataset, cancel and @@ -658,6 +672,9 @@ def toggle_favourite(key): except TypeError: return error(404, error="Dataset does not exist.") + if not current_user.can_access_dataset(dataset): + return error(403, error="This dataset is private") + current_status = db.fetchone("SELECT * FROM users_favourites WHERE name = %s AND key = %s", (current_user.get_id(), dataset.key)) if not current_status: @@ -667,6 +684,38 @@ def toggle_favourite(key): db.delete("users_favourites", where={"name": current_user.get_id(), "key": dataset.key}) return jsonify({"success": True, "favourite_status": False}) +@app.route("/api/toggle-dataset-private/") +@login_required +@openapi.endpoint("tool") +def toggle_private(key): + """ + Toggle whether a dataset is private or not + + Private datasets cannot be viewed by users that are not an admin or the + owner of the dataset. An exception is datasets assigned to the user + 'anonymous', which can be viewed by anyone. Only admins and owners can + toggle private status of a dataset. + + :param str key: Key of the dataset to mark as (not) private + + :return: A JSON object with the status of the request + :return-schema: {type=object,properties={success={type=boolean},is_private={type=boolean}}} + + :return-error 404: If the dataset key was not found + """ + try: + dataset = DataSet(key=key, db=db) + except TypeError: + return error(404, error="Dataset does not exist.") + + if dataset.owner != current_user.get_id() and not current_user.is_admin(): + return error(403, error="This dataset is private") + + # apply status to dataset and all children + dataset.is_private = not dataset.is_private + dataset.update_children(is_private=dataset.is_private) + + return jsonify({"success": True, "is_private": dataset.is_private}) @app.route("/api/queue-processor/", methods=["POST"]) @api_ratelimit @@ -731,6 +780,9 @@ def queue_processor(key=None, processor=None): print("KEY", key) return error(404, error="Not a valid dataset key.") + if not current_user.can_access_dataset(dataset): + return error(403, error="You cannot run processors on private datasets") + # check if processor is available for this dataset available_processors = dataset.get_available_processors() if processor not in available_processors: @@ -741,12 +793,19 @@ def queue_processor(key=None, processor=None): # create a dataset now try: options = UserInput.parse_all(available_processors[processor].get_options(dataset, current_user), request.form.to_dict(), silently_correct=False) - options["user"] = current_user.get_id() except QueryParametersException as e: return error(400, error=str(e)) - analysis = DataSet(parent=dataset.key, parameters=options, db=db, - extension=available_processors[processor].extension, type=processor) + # private or not is inherited from parent dataset + analysis = DataSet(parent=dataset.key, + parameters=options, + db=db, + extension=available_processors[processor].extension, + type=processor, + is_private=dataset.is_private, + owner=current_user.get_id() + ) + if analysis.is_new: # analysis has not been run or queued before - queue a job to run it queue.add_job(jobtype=processor, remote_id=analysis.key) @@ -803,6 +862,9 @@ def check_processor(): except TypeError: continue + if not current_user.can_access_dataset(dataset): + continue + genealogy = dataset.get_genealogy() parent = genealogy[-2] top_parent = genealogy[0] diff --git a/webtool/lib/user.py b/webtool/lib/user.py index ac140df70..897963cc6 100644 --- a/webtool/lib/user.py +++ b/webtool/lib/user.py @@ -86,7 +86,35 @@ def get_by_token(db, token): else: return User(db, user) - def __init__(self, db, data, authenticated=False): + def can_access_dataset(self, dataset): + """ + Check if this user should be able to access a given dataset. + + This depends mostly on the dataset's owner, which should match the + user if the dataset is private. If the dataset is not private, or + if the user is an admin or the dataset is private but assigned to + an anonymous user, the dataset can be accessed. + + :param dataset: The dataset to check access to + :return bool: + """ + if not dataset.is_private: + return True + + elif self.is_admin(): + return True + + elif self.get_id() == dataset.owner: + return True + + elif dataset.owner == "anonymous": + return True + + else: + return False + + + def __init__(self, data, authenticated=False): """ Instantiate user object @@ -325,4 +353,4 @@ def set_password(self, password): salt = bcrypt.gensalt() password_hash = bcrypt.hashpw(password.encode("ascii"), salt) - self.db.update("users", where={"name": self.data["name"]}, data={"password": password_hash.decode("utf-8")}) \ No newline at end of file + self.db.update("users", where={"name": self.data["name"]}, data={"password": password_hash.decode("utf-8")}) diff --git a/webtool/templates/create-dataset.html b/webtool/templates/create-dataset.html index 32c9ea73f..eefab9364 100644 --- a/webtool/templates/create-dataset.html +++ b/webtool/templates/create-dataset.html @@ -46,10 +46,19 @@

Create new dataset

appropriate.

-
+
+
+ +
+ +
+ + + +
diff --git a/webtool/templates/result-child.html b/webtool/templates/result-child.html index e0432486a..c81933d68 100644 --- a/webtool/templates/result-child.html +++ b/webtool/templates/result-child.html @@ -111,7 +111,7 @@

{{ processors[item.type].title if not deprecated else "(Deprecated analysis) {% endif %} {% endif %} - {% if current_user.is_authenticated and (current_user.get_id() == dataset.parameters.user or current_user.is_admin or item.parameters.userparameters.user == current_user.get_id()) %} + {% if current_user.is_authenticated and (current_user.get_id() == dataset.owner or current_user.is_admin or item.owner == current_user.get_id()) %}
  • Delete this analysis diff --git a/webtool/templates/result-details.html b/webtool/templates/result-details.html index 0f2af9509..633b3fb2c 100644 --- a/webtool/templates/result-details.html +++ b/webtool/templates/result-details.html @@ -13,8 +13,11 @@

  • Explore & annotate
  • {% endif %}
  • {% if is_favourite %}Delete from{% else %}Add to{% endif %} favourites
  • + {% if current_user.is_authenticated and (current_user.is_admin() or current_user.get_id() == dataset.owner) %} +
  • {% if dataset.is_private %}Make public{% else %}Make private{% endif %}
  • + {% endif %}
  • Permalink
  • - {% if current_user.get_id() == dataset.parameters.user or current_user.is_admin %} + {% if current_user.get_id() == dataset.owner or current_user.is_admin %}
  • Delete dataset
  • Re-run dataset
  • {% endif %} @@ -48,10 +51,10 @@

    {{ dataset.timestamp|datetime(fmt="%d %b %Y, %H:%M") }}

  • - {% if current_user.is_authenticated and current_user.is_admin and dataset.parameters.user %} + {% if current_user.is_authenticated and current_user.is_admin and dataset.owner %}
    Queued by
    -
    {{ dataset.parameters.user }}
    +
    {{ dataset.owner }}
    {% endif %} diff --git a/webtool/templates/result-metadata.html b/webtool/templates/result-metadata.html index 27832ba4d..d4f85d973 100644 --- a/webtool/templates/result-metadata.html +++ b/webtool/templates/result-metadata.html @@ -1,3 +1,8 @@ +{% if dataset.is_private %} + This dataset is private and can only be viewed by your user and instance maintainers + +{% endif %} + {% if "pseudonymise" in dataset.parameters and dataset.parameters.pseudonymise %} Usernames have been pseudonymised diff --git a/webtool/views.py b/webtool/views.py index b5bb689a6..36e287839 100644 --- a/webtool/views.py +++ b/webtool/views.py @@ -7,6 +7,9 @@ import csv import json import glob + +import flask + import config import markdown @@ -21,7 +24,7 @@ from webtool import app, db, log from webtool.lib.helpers import Pagination, error -from webtool.api_tool import delete_dataset, toggle_favourite, queue_processor, nuke_dataset +from webtool.api_tool import delete_dataset, toggle_favourite, toggle_private, queue_processor, nuke_dataset from common.lib.dataset import DataSet from common.lib.queue import JobQueue @@ -271,6 +274,9 @@ def get_mapped_result(key): except TypeError: abort(404) + if dataset.is_private and not (current_user.is_admin() or dataset.owner == current_user.get_id()): + return error(403, error="This dataset is private.") + if dataset.get_extension() == ".csv": # if it's already a csv, just return the existing file return url_for(get_result, query_file=dataset.get_results_path().name) @@ -333,7 +339,7 @@ def show_results(page): depth = "own" if depth == "own": - where.append("parameters::json->>'user' = %s") + where.append("owner = %s") replacements.append(current_user.get_id()) if depth == "favourites": @@ -344,6 +350,10 @@ def show_results(page): where.append("query LIKE %s") replacements.append("%" + query_filter + "%") + if not current_user.is_admin(): + where.append("(is_private = FALSE OR owner = %s)") + replacements.append(current_user.get_id()) + where = " AND ".join(where) num_datasets = db.fetchone("SELECT COUNT(*) AS num FROM datasets WHERE " + where, tuple(replacements))["num"] @@ -353,6 +363,9 @@ def show_results(page): datasets = db.fetchall("SELECT key FROM datasets WHERE " + where + " ORDER BY timestamp DESC LIMIT %s OFFSET %s", tuple(replacements)) + print("SELECT key FROM datasets WHERE " + where + " ORDER BY timestamp DESC LIMIT %s OFFSET %s") + print(replacements) + if not datasets and page != 1: abort(404) @@ -384,7 +397,10 @@ def show_result(key): try: dataset = DataSet(key=key, db=db) except TypeError: - abort(404) + return error(404) + + if not current_user.can_access_dataset(dataset): + return error(403, error="This dataset is private.") # child datasets are not available via a separate page - redirect to parent if dataset.key_parent: @@ -432,7 +448,10 @@ def preview_items(key): try: dataset = DataSet(key=key, db=db) except TypeError: - return error(404, "Dataset not found.") + return error(404, error="Dataset not found.") + + if dataset.is_private and not (current_user.is_admin() or dataset.owner == current_user.get_id()): + return error(403, error="This dataset is private.") preview_size = 1000 @@ -506,6 +525,34 @@ def toggle_favourite_interactive(key): return render_template("error.html", message="Error while toggling favourite status for dataset %s." % key) +@app.route("/result//toggle-private/") +@login_required +def toggle_private_interactive(key): + """ + Toggle dataset 'private' status + + Uses code from corresponding API endpoint, but redirects to a normal page + rather than returning JSON as the API does, so this can be used for + 'normal' links. + + :param str key: Dataset key + :return: + """ + success = toggle_private(key) + if not success.is_json: + return success + + if success.json["success"]: + if success.json["is_private"]: + flash("Dataset has been made private") + else: + flash("Dataset has been made public") + + return redirect("/results/" + key + "/") + else: + return render_template("error.html", message="Error while toggling private status for dataset %s." % key) + + @app.route("/result//restart/") @login_required def restart_dataset(key): @@ -522,8 +569,11 @@ def restart_dataset(key): dataset = DataSet(key=key, db=db) except TypeError: return error(404, message="Dataset not found.") + + if dataset.is_private and not (current_user.is_admin() or dataset.owner == current_user.get_id()): + return error(403, error="This dataset is private.") - if current_user.get_id() != dataset.parameters.get("user", "") and not current_user.is_admin: + if current_user.get_id() != dataset.owner and not current_user.is_admin(): return error(403, message="Not allowed.") if not dataset.is_finished(): @@ -561,6 +611,9 @@ def nuke_dataset_interactive(key): dataset = DataSet(key=key, db=db) except TypeError: return error(404, message="Dataset not found.") + + if not current_user.can_access_dataset(dataset): + return error(403, error="This dataset is private.") top_key = dataset.top_parent().key reason = request.form.get("reason", "") @@ -592,6 +645,9 @@ def delete_dataset_interactive(key): dataset = DataSet(key=key, db=db) except TypeError: return error(404, message="Dataset not found.") + + if not current_user.can_access_dataset(dataset): + return error(403, error="This dataset is private.") top_key = dataset.top_parent().key