Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion data_pipeline_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"link_read",
"link_write",
"finalise",
"search",
"raise_issue_by_data_product",
"raise_issue_by_index",
"raise_issue_with_config",
Expand All @@ -15,7 +16,7 @@

from .fdp_utils import get_handle_index_from_path
from .link import link_read, link_write
from .pipeline import finalise, initialise
from .pipeline import finalise, initialise, search
from .raise_issue import (
raise_issue_by_data_product,
raise_issue_by_existing_data_product,
Expand Down
46 changes: 46 additions & 0 deletions data_pipeline_api/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,58 @@
import datetime
import json
import logging
import os

import requests
import yaml

from data_pipeline_api import fdp_utils


def search(
handle: dict,
token: str,
wildcard: str,
endpoint: str,
key: str,
) -> str:
"""
Reads 'read' information from handle, wildcard, endpoint and key and returns a list of items found at the endpoint where the key contains the wildcard
Args:
| handle: pipeline handle
| token: registry token
| wildcard: string to search
| key: key of the results dictionary where the wildcard is searched
| endpoint: api endpoint where the search is performed
Returns:
| json: a list of dictionaries where the wildcard was found
"""

try:
api_version = handle["yaml"]["run_metadata"]["api_version"]
registry_url = handle["yaml"]["run_metadata"][
"local_data_registry_url"
]
except KeyError:
return json.dumps([], indent=4, sort_keys=True)

headers = fdp_utils.get_headers(token=token, api_version=api_version)
if registry_url[-1] != "/":
registry_url += "/"
registry_url += endpoint + "/?"

response = requests.get(registry_url, headers=headers)
if response.status_code != 200:
return json.dumps([], indent=4, sort_keys=True)
data = response.json()

res = [obj for obj in data["results"] if wildcard in obj[key]] # type: ignore

return json.dumps(res, indent=4, sort_keys=True)


def initialise(token: str, config: str, script: str) -> dict:
"""Reads in token, config file and script, creates necessary registry items
and creates new code run.
Expand Down
78 changes: 36 additions & 42 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
[tool.poetry]
authors = [
"Ryan J Field <[email protected]>",
"Dennis Reddyhoff <[email protected]>",
"Robert D Turner <[email protected]>",
"Bruno Viola <[email protected]>",
"Kristian Zarebski <[email protected]>",
]
description = "Python api to interact with the Fair Data Pipeline"
name = "data_pipeline_api"
version = "0.7.5"
description = "Python api to interact with the Fair Data Pipeline"
authors = [
"Ryan J Field <[email protected]>",
"Dennis Reddyhoff <[email protected]>",
"Robert D Turner <[email protected]>",
"Bruno Viola <[email protected]>",
"Kristian Zarebski <[email protected]>"
]


homepage = "https://www.fairdatapipeline.org/"

Expand All @@ -18,74 +17,69 @@ repository = "https://github.com/FAIRDataPipeline/pyDataPipeline"
license = "GNU General Public License v3.0"

packages = [
{include = "data_pipeline_api"}
{include = "data_pipeline_api"},
]

readme = "README.md"

keywords = [
"FAIR Data Pipeline",
"FAIR",
"Data Management",
"Provenance"
"FAIR Data Pipeline",
"FAIR",
"Data Management",
"Provenance",
]

[tool.poetry.dependencies]
PyYAML = "^6.0"
h5py = "^3.6.0"
matplotlib = "^3.5.1"
python = "^3.7.1,<3.11"
requests = "^2.27.1"
matplotlib = "^3.5.1"
scipy = "^1.7.3"
h5py = "^3.6.0"
PyYAML = "^6.0"

[tool.poetry.dev-dependencies]
pytest = "^7.0.0"
Sphinx = "^4.3.2"
black = "^22.1"
mypy = "^0.931"
flake8 = "^4.0.1"
isort = "^5.10.1"
mypy = "^0.931"
poetry = "^1.1.6"
pytest-mock = "^3.7.0"
pytest-dependency = "^0.5.1"
pytest-cov = "^3.0.0"
pre-commit = "^2.17.0"
isort = "^5.10.1"
Sphinx = "^4.3.2"
pytest = "^7.0.0"
pytest-cov = "^3.0.0"
pytest-dependency = "^0.5.1"
pytest-mock = "^3.7.0"
sphinx-rtd-theme = "^1.0.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

requires = ["poetry-core>=1.0.0"]

[tool.poetry.urls]
"Issue Tracker" = "https://github.com/FAIRDataPipeline/pyDataPipeline/issues"



[tool.isort]
profile = 'black'
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
ensure_newline_before_comments = true
force_grid_wrap = 0
include_trailing_comma = true
line_length = 79


multi_line_output = 3
profile = 'black'
use_parentheses = true

[tool.black]
line-length = 79

[tool.pytest.ini_options]
addopts = '-s -v --cov=fairdatapipeline --cov-report=html --cov-report=term'
addopts = '-s -v --cov=data_pipeline_api --cov-report=html --cov-report=term'
markers = [
"pipeline: tests for 'pipeline' module ",
"issue: tests for raising issues ",
"utilities: tests for 'utilities' functions ",
"apiversion: tests for api versioning ",
"pipeline: tests for 'pipeline' module ",
"issue: tests for raising issues ",
"utilities: tests for 'utilities' functions ",
"apiversion: tests for api versioning ",
]

[tool.mypy]
ignore_missing_imports = true
disallow_untyped_defs = true
disallow_untyped_calls = true
disallow_untyped_defs = true
ignore_missing_imports = true
20 changes: 20 additions & 0 deletions tests/ext/find_csv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
run_metadata:
description: Write csv file
local_data_registry_url: http://127.0.0.1:8000/api/
remote_data_registry_url: https://data.scrc.uk/api/
default_input_namespace: testing
default_output_namespace: testing
write_data_store: tmp
local_repo: ./
script: |-
python3 py.test
public: true
latest_commit: 221bfe8b52bbfb3b2dbdc23037b7dd94b49aaa70
remote_repo: https://github.com/FAIRDataPipeline/pyDataPipeline

write:
- data_product: find/csv
description: test csv to test find data products
file_type: csv
use:
version: 0.0.1
31 changes: 31 additions & 0 deletions tests/test_raise_issue.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
import shutil

Expand Down Expand Up @@ -29,6 +30,11 @@ def config(test_dir: str) -> str:
return os.path.join(test_dir, "write_csv.yaml")


@pytest.fixture
def fconfig(test_dir: str) -> str:
return os.path.join(test_dir, "find_csv.yaml")


@pytest.mark.pipeline
def test_initialise(token: str, config: str, script: str) -> None:
handle = pipeline.initialise(token, config, script)
Expand Down Expand Up @@ -122,6 +128,31 @@ def test_link_read(
assert type(link_read_1) == str and type(link_read_2) == str


@pytest.mark.pipeline
def test_find_data_product(
token: str,
fconfig: str,
script: str,
test_dir: str,
wildcard: str = "find",
key: str = "name",
) -> None:

handle = pipeline.initialise(token, fconfig, script)
tmp_csv = os.path.join(test_dir, "test.csv")
link_write = pipeline.link_write(handle, "find/csv")
shutil.copy(tmp_csv, link_write)
pipeline.finalise(token, handle)
config = os.path.join(test_dir, "read_csv.yaml")
handle = pipeline.initialise(token, config, script)

results = pipeline.search(handle, token, wildcard, "data_product", "name")
res = json.loads(results)
assert len(res) == 1
result = fdp_utils.get_first_entry(res)
assert wildcard in result[key]


@pytest.mark.pipeline
def test_link_read_data_product_exists(
token: str, config: str, script: str, test_dir: str
Expand Down