diff --git a/VERSION b/VERSION
index d45beb68b..bef9f2c9d 100644
--- a/VERSION
+++ b/VERSION
@@ -1,4 +1,4 @@
-1.24
+1.25
This file should not be modified. It is used by 4CAT to determine whether it
needs to run migration scripts to e.g. update the database structure to a more
diff --git a/backend/abstract/processor.py b/backend/abstract/processor.py
index e4a974e6e..988488a3f 100644
--- a/backend/abstract/processor.py
+++ b/backend/abstract/processor.py
@@ -230,8 +230,15 @@ def after_process(self):
self.log.info("Not running follow-up processor of type %s for dataset %s, no input data for follow-up" % (next_type, self.dataset.key))
elif next_type in available_processors:
- next_analysis = DataSet(parameters=next_parameters, type=next_type, db=self.db, parent=self.dataset.key,
- extension=available_processors[next_type].extension)
+ next_analysis = DataSet(
+ parameters=next_parameters,
+ type=next_type,
+ db=self.db,
+ parent=self.dataset.key,
+ extension=available_processors[next_type].extension,
+ is_private=self.dataset.is_private,
+ owner=self.dataset.owner
+ )
self.queue.add_job(next_type, remote_id=next_analysis.key)
else:
self.log.warning("Dataset %s (of type %s) wants to run processor %s next, but it is incompatible" % (self.dataset.key, self.type, next_type))
diff --git a/backend/database.sql b/backend/database.sql
index 552f34847..d0bfeb9e8 100644
--- a/backend/database.sql
+++ b/backend/database.sql
@@ -32,6 +32,7 @@ CREATE TABLE IF NOT EXISTS datasets (
key text,
type text DEFAULT 'search',
key_parent text DEFAULT '',
+ owner VARCHAR DEFAULT 'anonymous',
query text,
job integer DEFAULT 0,
parameters text,
@@ -40,6 +41,7 @@ CREATE TABLE IF NOT EXISTS datasets (
status text,
num_rows integer DEFAULT 0,
is_finished boolean DEFAULT FALSE,
+ is_private boolean DEFAULT TRUE,
software_version text,
software_file text DEFAULT '',
annotation_fields text DEFAULT ''
diff --git a/backend/workers/api.py b/backend/workers/api.py
index 3a432e502..dd16299d2 100644
--- a/backend/workers/api.py
+++ b/backend/workers/api.py
@@ -239,7 +239,7 @@ def process_request(self, request, payload):
"is_recurring": (int(job["interval"]) > 0),
"is_maybe_crashed": job["timestamp_claimed"] > 0 and not worker,
"dataset_key": worker.dataset.key if hasattr(worker, "dataset") else None,
- "dataset_user": worker.dataset.parameters.get("user", None) if hasattr(worker, "dataset") else None,
+ "dataset_user": worker.dataset.owner if hasattr(worker, "dataset") else None,
"dataset_parent_key": worker.dataset.top_parent().key if hasattr(worker, "dataset") else None,
"timestamp_queued": job["timestamp"],
"timestamp_claimed": job["timestamp_lastclaimed"]
diff --git a/common/lib/dataset.py b/common/lib/dataset.py
index 9d64682f7..40c05baf5 100644
--- a/common/lib/dataset.py
+++ b/common/lib/dataset.py
@@ -48,7 +48,7 @@ class DataSet(FourcatModule):
staging_area = None
def __init__(self, parameters={}, key=None, job=None, data=None, db=None, parent=None, extension="csv",
- type=None):
+ type=None, is_private=True, owner="anonymous"):
"""
Create new dataset object
@@ -101,12 +101,14 @@ def __init__(self, parameters={}, key=None, job=None, data=None, db=None, parent
self.data = {
"key": self.key,
"query": self.get_label(parameters, default=type),
+ "owner": owner,
"parameters": json.dumps(parameters),
"result_file": "",
"status": "",
"type": type,
"timestamp": int(time.time()),
"is_finished": False,
+ "is_private": is_private,
"software_version": get_software_version(),
"software_file": "",
"num_rows": 0,
@@ -455,6 +457,23 @@ def delete(self):
# already deleted, apparently
pass
+ def update_children(self, **kwargs):
+ """
+ Update an attribute for all child datasets
+
+ Can be used to e.g. change the owner, version, finished status for all
+ datasets in a tree
+
+ :param kwargs: Parameters corresponding to known dataset attributes
+ """
+ children = self.db.fetchall("SELECT * FROM datasets WHERE key_parent = %s", (self.key,))
+ for child in children:
+ child = DataSet(key=child["key"], db=self.db)
+ for attr, value in kwargs.items():
+ child.__setattr__(attr, value)
+
+ child.update_children(**kwargs)
+
def is_finished(self):
"""
Check if dataset is finished
diff --git a/helper-scripts/migrate/migrate-1.24-1.25.py b/helper-scripts/migrate/migrate-1.24-1.25.py
index e69de29bb..6b352cf53 100644
--- a/helper-scripts/migrate/migrate-1.24-1.25.py
+++ b/helper-scripts/migrate/migrate-1.24-1.25.py
@@ -0,0 +1,42 @@
+# Add 'is_deactivated' column to user table
+import sys
+import os
+
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)) + "'/../..")
+from common.lib.database import Database
+from common.lib.logger import Logger
+
+import config
+
+log = Logger(output=True)
+db = Database(logger=log, dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST,
+ port=config.DB_PORT, appname="4cat-migrate")
+
+print(" Checking if datasets table has a column 'is_private'...")
+has_column = db.fetchone("SELECT COUNT(*) AS num FROM information_schema.columns WHERE table_name = 'datasets' AND column_name = 'is_private'")
+if has_column["num"] == 0:
+ print(" ...No, adding.")
+ db.execute("ALTER TABLE datasets ADD COLUMN is_private BOOLEAN DEFAULT TRUE")
+ db.commit()
+
+ # make existing datasets all non-private, as they were before
+ db.execute("UPDATE datasets SET is_private = FALSE")
+ db.commit()
+else:
+ print(" ...Yes, nothing to update.")
+
+print(" Checking if datasets table has a column 'owner'...")
+has_column = db.fetchone("SELECT COUNT(*) AS num FROM information_schema.columns WHERE table_name = 'datasets' AND column_name = 'owner'")
+if has_column["num"] == 0:
+ print(" ...No, adding.")
+ db.execute("ALTER TABLE datasets ADD COLUMN owner VARCHAR DEFAULT 'anonymous'")
+ db.commit()
+
+ # make existing datasets all non-private, as they were before
+ db.execute("UPDATE datasets SET owner = parameters::json->>'user' WHERE parameters::json->>'user' IS NOT NULL")
+ db.commit()
+else:
+ print(" ...Yes, nothing to update.")
+
+
+print(" Done!")
\ No newline at end of file
diff --git a/webtool/api_standalone.py b/webtool/api_standalone.py
index cda1a31c0..28be12d62 100644
--- a/webtool/api_standalone.py
+++ b/webtool/api_standalone.py
@@ -138,7 +138,14 @@ def process_standalone(processor):
return error(402, error="Input is empty")
# ok, valid input!
- temp_dataset = DataSet(extension="csv", type="standalone", parameters={"user": current_user.get_id(), "after": [processor]}, db=db)
+ temp_dataset = DataSet(
+ extension="csv",
+ type="standalone",
+ parameters={"next": [processor]},
+ db=db,
+ owner=current_user.get_id(),
+ is_private=True
+ )
temp_dataset.finish(len(input))
# make sure the file is deleted later, whichever way this request is
diff --git a/webtool/api_tool.py b/webtool/api_tool.py
index a98943484..fcf81f973 100644
--- a/webtool/api_tool.py
+++ b/webtool/api_tool.py
@@ -242,7 +242,12 @@ def import_dataset():
if not worker:
return error(404, message="Unknown platform or source format")
- dataset = DataSet(parameters={"user": current_user.get_id(), "datasource": platform}, type=worker.type, db=db)
+ dataset = DataSet(
+ parameters={"datasource": platform},
+ type=worker.type,
+ db=db,
+ owner=current_user.get_id()
+ )
dataset.update_status("Importing uploaded file...")
# store the file at the result path for the dataset, but with a different suffix
@@ -295,7 +300,6 @@ def queue_dataset():
status and results.
:return-error 404: If the datasource does not exist.
"""
-
datasource_id = request.form.get("datasource", "")
if datasource_id not in backend.all_modules.datasources:
return error(404, message="Datasource '%s' does not exist" % datasource_id)
@@ -318,14 +322,21 @@ def queue_dataset():
else:
raise NotImplementedError("Data sources MUST sanitise input values with validate_query")
- sanitised_query["user"] = current_user.get_id()
sanitised_query["datasource"] = datasource_id
sanitised_query["type"] = search_worker_id
sanitised_query["pseudonymise"] = bool(request.form.to_dict().get("pseudonymise", False))
+ is_private = bool(request.form.to_dict().get("make-private", True))
extension = search_worker.extension if hasattr(search_worker, "extension") else "csv"
- dataset = DataSet(parameters=sanitised_query, db=db, type=search_worker_id, extension=extension)
+ dataset = DataSet(
+ parameters=sanitised_query,
+ db=db,
+ type=search_worker_id,
+ extension=extension,
+ is_private=is_private,
+ owner=current_user.get_id()
+ )
if request.form.get("label"):
dataset.update_label(request.form.get("label"))
@@ -374,6 +385,9 @@ def check_dataset():
except TypeError:
return error(404, error="Dataset does not exist.")
+ if not current_user.can_access_dataset(dataset):
+ return error(403, error="Dataset is private")
+
results = dataset.check_dataset_finished()
if results == 'empty':
dataset_data = dataset.data
@@ -438,7 +452,7 @@ def edit_dataset_label(key):
except TypeError:
return error(404, error="Dataset does not exist.")
- if not current_user.is_admin and not current_user.get_id() == dataset.parameters.get("user"):
+ if not current_user.is_admin and not current_user.get_id() == dataset.owner:
return error(403, message="Not allowed")
dataset.update_label(label)
@@ -594,7 +608,7 @@ def delete_dataset(key=None):
except TypeError:
return error(404, error="Dataset does not exist.")
- if not current_user.is_admin and not current_user.get_id() == dataset.parameters.get("user"):
+ if not current_user.is_admin and not current_user.get_id() == dataset.owner:
return error(403, message="Not allowed")
# if there is an active or queued job for some child dataset, cancel and
@@ -658,6 +672,9 @@ def toggle_favourite(key):
except TypeError:
return error(404, error="Dataset does not exist.")
+ if not current_user.can_access_dataset(dataset):
+ return error(403, error="This dataset is private")
+
current_status = db.fetchone("SELECT * FROM users_favourites WHERE name = %s AND key = %s",
(current_user.get_id(), dataset.key))
if not current_status:
@@ -667,6 +684,38 @@ def toggle_favourite(key):
db.delete("users_favourites", where={"name": current_user.get_id(), "key": dataset.key})
return jsonify({"success": True, "favourite_status": False})
+@app.route("/api/toggle-dataset-private/Create new dataset
appropriate.
This will only hide your dataset from other users. It will NOT encrypt your data and instance maintainers will still be able to view it. If you are working with sensitive data, you should consider running your own 4CAT instance.
+