From 68c5131a24477a15701d9f6c2ae8e563428ecdab Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 15 Feb 2022 12:12:14 +0100 Subject: [PATCH 1/8] 'instant' argument for 'DataSet.delete_parameter' --- common/lib/dataset.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/common/lib/dataset.py b/common/lib/dataset.py index c2a517406..9034cda2f 100644 --- a/common/lib/dataset.py +++ b/common/lib/dataset.py @@ -822,14 +822,15 @@ def update_version(self, version): return updated > 0 - def delete_parameter(self, parameter): + def delete_parameter(self, parameter, instant=True): """ Delete a parameter from the dataset metadata :param string parameter: Parameter to delete + :param bool instant: Also delete parameters in this instance object? :return bool: Update successul? """ - parameters = self.parameters + parameters = self.parameters.copy() if parameter in parameters: del parameters[parameter] else: @@ -837,7 +838,9 @@ def delete_parameter(self, parameter): updated = self.db.update("datasets", where={"key": self.data["key"]}, data={"parameters": json.dumps(parameters)}) - self.parameters = parameters + + if instant: + self.parameters = parameters return updated > 0 From 5e30d7cc4bfec63b8e933984c7130668d3eea8a9 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 15 Feb 2022 12:39:56 +0100 Subject: [PATCH 2/8] Some misc Flask view cleanup & fixing --- webtool/views/api_tool.py | 2 +- webtool/views/views_dataset.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/webtool/views/api_tool.py b/webtool/views/api_tool.py index ccfbf233f..80ac96379 100644 --- a/webtool/views/api_tool.py +++ b/webtool/views/api_tool.py @@ -947,7 +947,7 @@ def datasource_call(datasource, action): return error(400, error="Datasource '%s' has no call '%s'" % (datasource, action)) folder = backend.all_modules.datasources[datasource]["path"] - views_file = folder.joinpath("webtool", "views_misc.py") + views_file = folder.joinpath("webtool", "views.py") if not views_file.exists(): return error(400, error="Datasource '%s' has no call '%s'" % (datasource, action)) diff --git a/webtool/views/views_dataset.py b/webtool/views/views_dataset.py index f93a55a6f..274473cfe 100644 --- a/webtool/views/views_dataset.py +++ b/webtool/views/views_dataset.py @@ -9,7 +9,7 @@ import flask import markdown -from flask import render_template, abort, request, redirect, send_from_directory, flash, get_flashed_messages, \ +from flask import render_template, request, redirect, send_from_directory, flash, get_flashed_messages, \ url_for, stream_with_context from flask_login import login_required, current_user @@ -49,7 +49,7 @@ def show_page(page): page_path = page_folder + "/" + page + ".md" if not os.path.exists(page_path): - abort(404) + return error(404, error="Page not found") with open(page_path, encoding="utf-8") as file: page_raw = file.read() @@ -117,7 +117,7 @@ def show_results(page): print(replacements) if not datasets and page != 1: - abort(404) + return error(404) pagination = Pagination(page, page_size, num_datasets) filtered = [] @@ -165,7 +165,7 @@ def get_mapped_result(key): try: dataset = DataSet(key=key, db=db) except TypeError: - abort(404) + return error(404, error="Dataset not found.") if dataset.is_private and not (current_user.is_admin or dataset.owner == current_user.get_id()): return error(403, error="This dataset is private.") @@ -176,7 +176,7 @@ def get_mapped_result(key): if not hasattr(dataset.get_own_processor(), "map_item"): # cannot map without a mapping method - abort(404) + return error(404, error="File not found.") mapper = dataset.get_own_processor().map_item @@ -216,7 +216,7 @@ def view_log(key): try: dataset = DataSet(key=key, db=db) except TypeError: - return error(404, "Dataset not found.") + return error(404, error="Dataset not found.") if dataset.is_private and not (current_user.is_admin or dataset.owner == current_user.get_id()): return error(403, error="This dataset is private.") @@ -553,10 +553,10 @@ def delete_dataset_interactive(key): @login_required def erase_credentials_interactive(key): """ - Erase sensitive parameters from dataset + Erase sensitive parameters from dataset - Removes all parameters starting with `api_`. This heuristic could be made - more expansive if more fine-grained control is required. + Removes all parameters starting with `api_`. This heuristic could be made + more expansive if more fine-grained control is required. Uses code from corresponding API endpoint, but redirects to a normal page rather than returning JSON as the API does, so this can be used for From 7b23d43c86a10bfb6bfac263f0b2250b7e3c38a9 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 15 Feb 2022 12:41:58 +0100 Subject: [PATCH 3/8] Don't crash RankFlow if no items left after filter --- processors/visualisation/rankflow.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/processors/visualisation/rankflow.py b/processors/visualisation/rankflow.py index b9883d909..8729994cb 100644 --- a/processors/visualisation/rankflow.py +++ b/processors/visualisation/rankflow.py @@ -159,6 +159,10 @@ def process(self): max_weight = max(max_weight, weight) max_item_length = max(max_item_length, len(row["date"])) + if not items: + return self.dataset.finish_with_error("No items remain after filtering. Try disabling 'Remove items that " + "do not occur...'.") + # determine per-period changes # this is used for determining what colour to give to nodes, and # visualise outlying items in the data From 9f9e5e5b906e4104d0921c7c17a6e77e3da4381c Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 15 Feb 2022 12:44:16 +0100 Subject: [PATCH 4/8] =?UTF-8?q?Don't=20re-use=20the=20same=20processor=20I?= =?UTF-8?q?D=20=F0=9F=A4=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- processors/visualisation/download-telegram-images.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processors/visualisation/download-telegram-images.py b/processors/visualisation/download-telegram-images.py index ab9f87629..727ec29aa 100644 --- a/processors/visualisation/download-telegram-images.py +++ b/processors/visualisation/download-telegram-images.py @@ -28,7 +28,7 @@ class TelegramImageDownloader(BasicProcessor): Downloads attached images from Telegram messages and saves as zip archive """ - type = "image-downloader" # job type ID + type = "image-downloader-telegram" # job type ID category = "Visual" # category title = "Download images" # title displayed in UI description = "Download images and compress as a zip file. May take a while to complete as images are downloaded " \ From a735b09b825423e3a733fb1482250037b526c2d9 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 15 Feb 2022 13:12:41 +0100 Subject: [PATCH 5/8] Fix Telegram datasource API credential handling --- datasources/telegram/search_telegram.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/datasources/telegram/search_telegram.py b/datasources/telegram/search_telegram.py index 0fa41379d..51a680418 100644 --- a/datasources/telegram/search_telegram.py +++ b/datasources/telegram/search_telegram.py @@ -127,12 +127,13 @@ def get_items(self, query): "creating it again from scratch.", is_final=True) return None - if not query.get("save-sensitive"): - self.dataset.delete_parameter("api_hash", instant=False) - self.dataset.delete_parameter("api_phone", instant=False) - self.dataset.delete_parameter("api_id", instant=False) - results = asyncio.run(self.execute_queries()) + + if not query.get("save-sensitive"): + self.dataset.delete_parameter("api_hash", instant=True) + self.dataset.delete_parameter("api_phone", instant=True) + self.dataset.delete_parameter("api_id", instant=True) + return results async def execute_queries(self): @@ -506,6 +507,7 @@ def validate_query(query, request, user): "api_id": query.get("api_id"), "api_hash": query.get("api_hash"), "api_phone": query.get("api_phone"), + "save-sensitive": query.get("save-sensitive"), "min_date": min_date, "max_date": max_date } From 3656fe9c007535d7d9722351034ec051383fa79b Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 15 Feb 2022 13:16:20 +0100 Subject: [PATCH 6/8] A space can go a long way --- webtool/templates/result-details.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webtool/templates/result-details.html b/webtool/templates/result-details.html index 86e6cc396..048f81707 100644 --- a/webtool/templates/result-details.html +++ b/webtool/templates/result-details.html @@ -67,7 +67,7 @@

API Credentials
- {% for credential in has_credentials %}{{ credential }}{% endfor %} + {% for credential in has_credentials %}{{ credential }} {% endfor %} (erase)
From a01772484216b58598ea4d9d27d86d2f55862715 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 15 Feb 2022 13:17:24 +0100 Subject: [PATCH 7/8] Don't confuse Reddit API track for sensitive param --- datasources/reddit/search_reddit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datasources/reddit/search_reddit.py b/datasources/reddit/search_reddit.py index a62e77c00..d508fab8b 100644 --- a/datasources/reddit/search_reddit.py +++ b/datasources/reddit/search_reddit.py @@ -41,7 +41,7 @@ class SearchReddit(SearchWithScope): "careful** when using this privilege.", "requires": "reddit.can_query_without_keyword" }, - "api_type": { + "pushshift_track": { "type": UserInput.OPTION_CHOICE, "help": "API version", "options": { @@ -149,7 +149,7 @@ def get_items_complex(self, query): :return list: Posts, sorted by thread and post ID, in ascending order """ scope = query.get("search_scope") - self.api_type = query.get("api_type", "regular") + self.api_type = query.get("pushshift_track", "regular") # first, build the request parameters if self.api_type == "regular": @@ -646,7 +646,7 @@ def validate_query(query, request, user): raise QueryParametersException("Please provide body queries that do not start with a minus sign.") # URL queries are not possible (yet) for the beta API - if query.get("api_type") == "beta" and query.get("subject_url", None): + if query.get("pushshift_track") == "beta" and query.get("subject_url", None): raise QueryParametersException("URL querying is not possible (yet) for the beta endpoint.") # both dates need to be set, or none From 8b536de20d7aa6fa89b77d6ab05eeaba5b7b907f Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 15 Feb 2022 13:21:04 +0100 Subject: [PATCH 8/8] Image processor compatibility with Telegram images --- processors/metrics/google_vision_api.py | 2 +- processors/visualisation/image_wall.py | 2 +- processors/visualisation/pix-plot.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/processors/metrics/google_vision_api.py b/processors/metrics/google_vision_api.py index ec1898bbd..5c98a6b00 100644 --- a/processors/metrics/google_vision_api.py +++ b/processors/metrics/google_vision_api.py @@ -46,7 +46,7 @@ def is_compatible_with(cls, module=None): :param module: Dataset or processor to determine compatibility with """ - return module.type == "image-downloader" + return module.type.startswith("image-downloader") options = { "amount": { diff --git a/processors/visualisation/image_wall.py b/processors/visualisation/image_wall.py index 0ce758622..1978b2440 100644 --- a/processors/visualisation/image_wall.py +++ b/processors/visualisation/image_wall.py @@ -80,7 +80,7 @@ def is_compatible_with(cls, module=None): :param module: Dataset or processor to determine compatibility with """ - return module.type == "image-downloader" + return module.type.startswith("image-downloader") def process(self): """ diff --git a/processors/visualisation/pix-plot.py b/processors/visualisation/pix-plot.py index cbef45c36..ab90b4f4f 100644 --- a/processors/visualisation/pix-plot.py +++ b/processors/visualisation/pix-plot.py @@ -98,7 +98,7 @@ def is_compatible_with(cls, module=None): :param module: Dataset or processor to determine compatibility with """ - return module.type == "image-downloader" and hasattr(config, 'PIXPLOT_SERVER') and config.PIXPLOT_SERVER + return module.type.startswith("image-downloader") and hasattr(config, 'PIXPLOT_SERVER') and config.PIXPLOT_SERVER def process(self): """