Skip to content

Commit

Permalink
feat: Pluggable transfers and multipart transfer implementation
Browse files Browse the repository at this point in the history
* Implementation of RFC 0072 - Pluggable Transfer Types for Record Files

* Pluggable transfer types in invenio_records_resources/services/files/transfers

* Implementation of multipart transfer in the same place

* Permission generator for per-transfer-type permissions

Co-authored-by: Mirek Simek <[email protected]>
  • Loading branch information
mesemus and mesemus committed Jan 20, 2025
1 parent d8cc925 commit 29f5365
Show file tree
Hide file tree
Showing 35 changed files with 1,839 additions and 411 deletions.
14 changes: 14 additions & 0 deletions invenio_records_resources/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# Copyright (C) 2020-2022 CERN.
# Copyright (C) 2020 Northwestern University.
# Copyright (C) 2025 CESNET.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand All @@ -24,3 +25,16 @@

RECORDS_RESOURCES_ALLOW_EMPTY_FILES = True
"""Allow empty files to be uploaded."""

RECORDS_RESOURCES_TRANSFERS = [
"invenio_records_resources.services.files.transfer.LocalTransfer",
"invenio_records_resources.services.files.transfer.FetchTransfer",
"invenio_records_resources.services.files.transfer.RemoteTransfer",
"invenio_records_resources.services.files.transfer.MultipartTransfer",
]
"""List of transfer classes to register."""


RECORDS_RESOURCES_DEFAULT_TRANSFER_TYPE = "L"
"""Default transfer class to use.
One of 'L' (local), 'F' (fetch), 'R' (point to remote), 'M' (multipart)."""
19 changes: 19 additions & 0 deletions invenio_records_resources/ext.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020-2022 CERN.
# Copyright (C) 2025 CESNET.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
# details.

"""Invenio Records Resources module to create REST APIs."""

from functools import cached_property

from invenio_base.utils import obj_or_import_string

from . import config
from .registry import NotificationRegistry, ServiceRegistry

Expand All @@ -22,11 +27,25 @@ def __init__(self, app=None):

def init_app(self, app):
"""Flask application initialization."""
self.app = app
self.init_config(app)
self.registry = ServiceRegistry()
self.notification_registry = NotificationRegistry()
app.extensions["invenio-records-resources"] = self

@cached_property
def transfer_registry(self):
"""Return the transfer registry."""
# imported here to prevent circular imports
from .services.files.transfer.registry import TransferRegistry

registry = TransferRegistry(
self.app.config["RECORDS_RESOURCES_DEFAULT_TRANSFER_TYPE"]
)
for transfer_cls in self.app.config["RECORDS_RESOURCES_TRANSFERS"]:
registry.register(obj_or_import_string(transfer_cls))
return registry

def init_config(self, app):
"""Initialize configuration."""
for k in dir(config):
Expand Down
5 changes: 5 additions & 0 deletions invenio_records_resources/proxies.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2021-2022 CERN.
# Copyright (C) 2025 CESNET.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand All @@ -21,3 +22,7 @@
lambda: current_app.extensions["invenio-records-resources"].notification_registry
)
"""Helper proxy to get the current notifications registry."""

current_transfer_registry = LocalProxy(
lambda: current_app.extensions["invenio-records-resources"].transfer_registry
)
5 changes: 5 additions & 0 deletions invenio_records_resources/records/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# Copyright (C) 2020-2024 CERN.
# Copyright (C) 2020 Northwestern University.
# Copyright (C) 2025 CESNET.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand All @@ -20,6 +21,8 @@
from invenio_records.systemfields import DictField, SystemField, SystemFieldsMixin
from invenio_records.systemfields.model import ModelField

from .transfer import TransferField


class Record(RecordBase, SystemFieldsMixin):
"""Base class for record APIs.
Expand Down Expand Up @@ -224,6 +227,8 @@ def remove_all(cls, record_id):
record_id = ModelField()
_record = ModelField("record", dump=False)

transfer = TransferField()

def __repr__(
self,
):
Expand Down
14 changes: 13 additions & 1 deletion invenio_records_resources/records/systemfields/files/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# Copyright (C) 2020-2024 CERN.
# Copyright (C) 2020-2021 Northwestern University.
# Copyright (C) 2025 CESNET.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand Down Expand Up @@ -153,7 +154,16 @@ def unlock(self):

# TODO: "create" and "update" should be merged somehow...
@ensure_enabled
def create(self, key, obj=None, stream=None, data=None, **kwargs):
def create(
self,
key,
*,
obj=None,
stream=None,
data=None,
transfer=None,
**kwargs,
):
"""Create/initialize a file."""
assert not (obj and stream)

Expand All @@ -172,6 +182,8 @@ def create(self, key, obj=None, stream=None, data=None, **kwargs):
rf.object_version = obj
if data:
rf.update(data)
if transfer:
rf.transfer = transfer
rf.commit()
self._entries[key] = rf
return rf
Expand Down
98 changes: 98 additions & 0 deletions invenio_records_resources/records/transfer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020-2023 CERN.
# Copyright (C) 2020-2021 Northwestern University.
# Copyright (C) 2025 CESNET.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
# details.
"""Transfer-related system fields."""

#
# Implementation Note:
#
# This module cannot be placed under `systemfields/files` because `systemfields/files`
# imports several classes from outside the `records` module (e.g., `FilesAttrConfig`
# and `PartialFileDumper`). In turn, those classes import `records.api`, creating a
# circular import.
#
# Furthermore, we need `TransferField` defined directly on `FileRecord`. We cannot
# delegate this to the user (as is done with `FilesField`) because if a target
# repository has not declared the `transfer` field on its own `FileRecord`, file
# uploads would fail. Therefore, `TransferField` must be defined here.
#
# TODO: A cleaner solution would be to refactor `systemfields/files` so that it does
# not introduce dependencies outside the `records` module.
#

from collections.abc import Mapping

from invenio_records.systemfields import SystemField


class TransferFieldData(Mapping):
"""TransferType field data."""

def __init__(self, field):
"""Initialize the field."""
self._field = field

@property
def transfer_type(self):
"""Get the transfer type."""
return self._field.get("type", None)

@transfer_type.setter
def transfer_type(self, value):
"""Set the transfer type."""
self._field["type"] = value

def get(self, key, default=None):
"""Get the value from the transfer metadata."""
return self._field.get(key, default)

def set(self, values):
"""Set values of transfer metadata, keeping the transfer type."""
transfer_type = self.transfer_type
self._field.clear()
self._field.update(values)
self.transfer_type = transfer_type

def __iter__(self):
"""Iterate over the transfer metadata."""
return iter(self._field)

def __len__(self):
"""Length of the transfer metadata."""
return len(self._field)

def __getitem__(self, key):
"""Get a value from the transfer metadata."""
return self._field[key]

def __setitem__(self, key, value):
"""Set a value in the transfer metadata."""
self._field[key] = value


class TransferField(SystemField):
"""TransferType field.
Gets/sets the transfer type of the file record.
"""

def __get__(self, record, owner=None):
"""Getting the attribute value."""
if record is None:
return self
ret = self.get_dictkey(record)
if ret is None:
ret = {}
self.set_dictkey(record, ret)

return TransferFieldData(ret)

def __set__(self, record, value):
"""Setting a new value."""
self.set_dictkey(record, value)
15 changes: 14 additions & 1 deletion invenio_records_resources/resources/files/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
#
# Copyright (C) 2020 CERN.
# Copyright (C) 2020 Northwestern University.
# Copyright (C) 2025 CESNET.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
# details.

"""File resource configuration."""

from flask_resources import ResourceConfig
from flask_resources import HTTPJSONException, ResourceConfig, create_error_handler

from invenio_records_resources.services.errors import TransferException


class FileResourceConfig(ResourceConfig):
Expand All @@ -24,6 +27,16 @@ class FileResourceConfig(ResourceConfig):
"list": "/files",
"item": "/files/<path:key>",
"item-content": "/files/<path:key>/content",
"item-multipart-content": "/files/<path:key>/content/<int:part>",
"item-commit": "/files/<path:key>/commit",
"list-archive": "/files-archive",
}
error_handlers = {
**ResourceConfig.error_handlers,
TransferException: create_error_handler(
lambda e: HTTPJSONException(
code=400,
description=str(e),
)
),
}
39 changes: 31 additions & 8 deletions invenio_records_resources/resources/files/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (C) 2020 CERN.
# Copyright (C) 2020 Northwestern University.
# Copyright (C) 2023 TU Wien.
# Copyright (C) 2025 CESNET.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand All @@ -27,8 +28,6 @@
from invenio_stats.proxies import current_stats
from zipstream import ZIP_STORED, ZipStream

from invenio_records_resources.services.errors import FailedFileUploadException

from ..errors import ErrorHandlersMixin
from .parser import RequestStreamParser

Expand All @@ -50,6 +49,15 @@
default_content_type="application/octet-stream",
)

request_multipart_args = request_parser(
{
"pid_value": ma.fields.Str(required=True),
"key": ma.fields.Str(),
"part": ma.fields.Int(),
},
location="view_args",
)


#
# Resource
Expand Down Expand Up @@ -83,6 +91,11 @@ def create_url_rules(self):
route("DELETE", routes["item"], self.delete),
route("POST", routes["item-commit"], self.create_commit),
route("PUT", routes["item-content"], self.update_content),
route(
"PUT",
routes["item-multipart-content"],
self.upload_multipart_content,
),
]
return url_rules

Expand Down Expand Up @@ -181,7 +194,7 @@ def read_content(self):
if obj is not None and emitter is not None:
emitter(current_app, record=item._record, obj=obj, via_api=True)

return item.send_file(), 200
return item.send_file()

@request_view_args
def read_archive(self):
Expand Down Expand Up @@ -229,10 +242,20 @@ def update_content(self):
content_length=resource_requestctx.data["request_content_length"],
)

# if errors are set then there was a `TransferException` raised
if item.to_dict().get("errors"):
raise FailedFileUploadException(
file_key=item.file_id, recid=item.id, file=item.to_dict()
)
return item.to_dict(), 200

@request_multipart_args
@request_stream
@response_handler()
def upload_multipart_content(self):
"""Upload multipart file content."""
item = self.service.set_multipart_file_content(
g.identity,
resource_requestctx.view_args["pid_value"],
resource_requestctx.view_args["key"],
resource_requestctx.view_args["part"],
resource_requestctx.data["request_stream"],
content_length=resource_requestctx.data["request_content_length"],
)

return item.to_dict(), 200
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2021 CERN.
# Copyright (C) 2025 CESNET.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand All @@ -11,11 +12,13 @@
from .base import FileServiceComponent
from .content import FileContentComponent
from .metadata import FileMetadataComponent
from .multipart import FileMultipartContentComponent
from .processor import FileProcessorComponent

__all__ = (
"FileContentComponent",
"FileMetadataComponent",
"FileProcessorComponent",
"FileServiceComponent",
"FileMultipartContentComponent",
)
Loading

0 comments on commit 29f5365

Please sign in to comment.