Skip to content

Commit

Permalink
Add finished_at timestamp to datasets (#462)
Browse files Browse the repository at this point in the history
* get last_update timestamp from log; display in dataset results

* get_last_update catch unable to parse datetime

* add migrate script and use database to store finished_at timestamp

* rename finished_at to timestamp_finished
  • Loading branch information
dale-wahl authored Feb 18, 2025
1 parent 9b9fab9 commit 6cf7ae7
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 29 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
1.46
1.47

This file should not be modified. It is used by 4CAT to determine whether it
needs to run migration scripts to e.g. update the database structure to a more
Expand Down
39 changes: 20 additions & 19 deletions backend/database.sql
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,26 @@ CREATE UNIQUE INDEX IF NOT EXISTS unique_job

-- queries
CREATE TABLE IF NOT EXISTS datasets (
id SERIAL PRIMARY KEY,
key text,
type text DEFAULT 'search',
key_parent text DEFAULT '' NOT NULL,
creator VARCHAR DEFAULT 'anonymous',
query text,
job BIGINT DEFAULT 0,
parameters text,
result_file text DEFAULT '',
timestamp integer,
status text,
num_rows integer DEFAULT 0,
progress float DEFAULT 0.0,
is_finished boolean DEFAULT FALSE,
is_private boolean DEFAULT TRUE,
software_version text,
software_file text DEFAULT '',
software_source text DEFAULT '',
annotation_fields text DEFAULT ''
id SERIAL PRIMARY KEY,
key text,
type text DEFAULT 'search',
key_parent text DEFAULT '' NOT NULL,
creator VARCHAR DEFAULT 'anonymous',
query text,
job BIGINT DEFAULT 0,
parameters text,
result_file text DEFAULT '',
timestamp integer,
status text,
num_rows integer DEFAULT 0,
progress float DEFAULT 0.0,
is_finished boolean DEFAULT FALSE,
timestamp_finished integer DEFAULT NULL,
is_private boolean DEFAULT TRUE,
software_version text,
software_file text DEFAULT '',
software_source text DEFAULT '',
annotation_fields text DEFAULT ''
);

CREATE TABLE datasets_owners (
Expand Down
2 changes: 1 addition & 1 deletion common/lib/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ def finish(self, num_rows=0):
raise RuntimeError("Cannot finish a finished dataset again")

self.db.update("datasets", where={"key": self.data["key"]},
data={"is_finished": True, "num_rows": num_rows, "progress": 1.0})
data={"is_finished": True, "num_rows": num_rows, "progress": 1.0, "timestamp_finished": int(time.time())})
self.data["is_finished"] = True
self.data["num_rows"] = num_rows

Expand Down
67 changes: 67 additions & 0 deletions helper-scripts/migrate/migrate-1.46-1.47.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Add timestamp_finished column to datasets table
import json
import sys
import os
import datetime

from pathlib import Path

sys.path.insert(0, os.path.join(os.path.abspath(os.path.dirname(__file__)), "../.."))
from common.lib.database import Database
from common.lib.logger import Logger
from common.lib.dataset import DataSet
from common.lib.helpers import get_last_line

log = Logger(output=True)

import configparser

ini = configparser.ConfigParser()
ini.read(Path(__file__).parent.parent.parent.resolve().joinpath("config/config.ini"))
db_config = ini["DATABASE"]

db = Database(logger=log, dbname=db_config["db_name"], user=db_config["db_user"], password=db_config["db_password"],
host=db_config["db_host"], port=db_config["db_port"], appname="4cat-migrate")

print(" Checking if datasets table has a column 'timestamp_finished'...")
has_column = db.fetchone(
"SELECT COUNT(*) AS num FROM information_schema.columns WHERE table_name = 'datasets' AND column_name = 'timestamp_finished'")
if has_column["num"] == 0:
print(" ...No, adding.")
db.execute("ALTER TABLE datasets ADD COLUMN timestamp_finished INTEGER DEFAULT NULL")
print(" ...Added column. Updating datasets with information based on logs.")
dataset_ids = db.fetchall("SELECT key FROM datasets WHERE is_finished = TRUE")
unable_to_update = []
update_data = []

for dataset in dataset_ids:
key = dataset["key"]
dataset = DataSet(key=key, db=db)

if dataset.get_log_path().exists():
try:
timestamp_finished = datetime.datetime.strptime(get_last_line(dataset.get_log_path())[:24], "%c")
update_data.append((key, int(timestamp_finished.timestamp())))
except ValueError as e:
# Unable to parse datetime from last line
print(f" ...Unable to parse datetime from last line for dataset {key}: {e}")
unable_to_update.append(key)
else:
# No log file; unable to determine timestamp_finished
print(f" ...Unable to determine timestamp_finished for dataset {key}; no log file.")
unable_to_update.append(key)

if update_data:
db.execute_many("UPDATE datasets SET timestamp_finished = data.timestamp_finished FROM (VALUES %s) AS data (key, timestamp_finished) WHERE datasets.key = data.key", replacements=update_data)

db.commit()
print(f" ...Updated {len(update_data)} datasets.")
if len(unable_to_update) > 0:
print(" ...Unable to update the following datasets:")
for key in unable_to_update:
print(f" {key}")
else:
print(" ...All datasets updated.")

else:
print(" ...Yes, nothing to update.")
23 changes: 15 additions & 8 deletions webtool/templates/components/result-details.html
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,21 @@ <h2 class="blocktitle{% if current_user.is_authenticated and (__user_config("pri
{% endif %}

<div class="onequarter">
<dt>Created</dt>
<dd>
{% if dataset.parameters.original_timestamp %}
<i class="fa fa-file-import tooltip-trigger" aria-hidden="true" aria-controls="tooltip-{{ dataset.key }}-imported"></i> <span class="sr-only">This dataset was imported and was originally created on {{ dataset.parameters.original_timestamp|datetime(fmt="%d %B %Y, %H:%M")|safe }}.</span>
<span role="tooltip" aria-hidden="true" id="tooltip-{{ dataset.key }}-imported">This dataset was imported and was originally created on {{ dataset.parameters.original_timestamp|datetime(fmt="%d %B %Y, %H:%M")|safe }}.</span>
{% endif %}
{{ dataset.timestamp|datetime(fmt="%d %B %Y, %H:%M")|safe }}
</dd>
{% if dataset.is_finished() and dataset.timestamp_finished %}
<dt>Finished</dt>
<dd>
{{ dataset.timestamp_finished|int|datetime(fmt="%d %B %Y, %H:%M")|safe }}
</dd>
{% else %}
<dt>Created</dt>
<dd>
{% if dataset.parameters.original_timestamp %}
<i class="fa fa-file-import tooltip-trigger" aria-hidden="true" aria-controls="tooltip-{{ dataset.key }}-imported"></i> <span class="sr-only">This dataset was imported and was originally created on {{ dataset.parameters.original_timestamp|datetime(fmt="%d %B %Y, %H:%M")|safe }}.</span>
<span role="tooltip" aria-hidden="true" id="tooltip-{{ dataset.key }}-imported">This dataset was imported and was originally created on {{ dataset.parameters.original_timestamp|datetime(fmt="%d %B %Y, %H:%M")|safe }}.</span>
{% endif %}
{{ dataset.timestamp|datetime(fmt="%d %B %Y, %H:%M")|safe }}
</dd>
{% endif %}
</div>

<div class="threequarters dataset-owner-list">
Expand Down

0 comments on commit 6cf7ae7

Please sign in to comment.