Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

files: Add tiles processor for tif generation #574

Merged
merged 3 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
Changes
=======

Version 5.7.0 (released 2024-05-06)

- files-schema: hide `uri` from serialization
- records: added access field to files

Version 5.6.0 (released 2024-04-23)

- services: add support for nested links
Expand Down
2 changes: 1 addition & 1 deletion invenio_records_resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@

from .ext import InvenioRecordsResources

__version__ = "5.6.0"
__version__ = "5.7.0"

__all__ = ("__version__", "InvenioRecordsResources")
99 changes: 97 additions & 2 deletions invenio_records_resources/records/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020-2023 CERN.
# Copyright (C) 2020-2024 CERN.
# Copyright (C) 2020 Northwestern University.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
Expand All @@ -17,7 +17,7 @@
from invenio_files_rest.models import FileInstance, ObjectVersion
from invenio_records.api import Record as RecordBase
from invenio_records.dumpers import SearchDumper
from invenio_records.systemfields import DictField, SystemFieldsMixin
from invenio_records.systemfields import DictField, SystemField, SystemFieldsMixin
from invenio_records.systemfields.model import ModelField


Expand Down Expand Up @@ -50,6 +50,98 @@ class Record(RecordBase, SystemFieldsMixin):
# files = FilesField(...)


# NOTE: Defined here to avoid circular imports
class FileAccess:
"""Access management for files."""

def __init__(self, hidden=None):
"""Create a new FileAccess object for a file."""
self._hidden = hidden or False
self.dirty = hidden is not None

@property
def hidden(self):
"""Get the hidden status."""
return self._hidden

@hidden.setter
def hidden(self, value):
"""Set the hidden status."""
if not isinstance(value, bool):
raise ValueError("Invalid value for 'hidden', it must be a boolean.")
self._hidden = value
self.dirty = True

@classmethod
def from_dict(cls, access_dict):
"""Create a new FileAccess object from the specified 'access' property."""
# provide defaults in case there is no 'access' property
return cls(
hidden=access_dict.get("hidden", False),
)

def dump(self):
"""Dump the field values as dictionary."""
return {
"hidden": self.hidden,
}


class FileAccessField(SystemField):
"""File access field."""

def __init__(self, key=None, access_obj_class=FileAccess):
"""Initialize the access field."""
self._access_obj_class = access_obj_class
super().__init__(key=key)

def obj(self, instance):
"""Get the access object."""
obj = self._get_cache(instance)
if obj is not None:
return obj

data = self.get_dictkey(instance)
if data:
obj = self._access_obj_class.from_dict(data)
else:
obj = self._access_obj_class()

self._set_cache(instance, obj)
return obj

def set_obj(self, record, obj):
"""Set the access object."""
# We accept both dicts and access class objects.
if isinstance(obj, dict):
obj = self._access_obj_class.from_dict(obj)

assert isinstance(obj, self._access_obj_class)

# We do not dump the object until the pre_commit hook
# I.e. record.access != record['access']
self._set_cache(record, obj)

def __get__(self, record, owner=None):
"""Get the record's access object."""
if record is None:
# access by class
return self

# access by object
return self.obj(record)

def __set__(self, record, obj):
"""Set the records access object."""
self.set_obj(record, obj)

def pre_commit(self, record):
"""Dump the configured values before the record is committed."""
obj = self.obj(record)
if obj.dirty:
record["access"] = obj.dump()


class FileRecord(RecordBase, SystemFieldsMixin):
"""Base class for a record describing a file."""

Expand Down Expand Up @@ -123,6 +215,9 @@ def remove_all(cls, record_id):
#: Metadata system field.
metadata = DictField(clear_none=True, create_if_missing=True)

#: Access system field
access = FileAccessField()

key = ModelField()
object_version_id = ModelField()
object_version = ModelField(dump=False)
Expand Down
10 changes: 8 additions & 2 deletions invenio_records_resources/records/dumpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022-2023 CERN.
# Copyright (C) 2022-2024 CERN.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand Down Expand Up @@ -53,6 +53,9 @@ def dump(self, record, data):
"metadata": deepcopy(dict(record.get("metadata", {}))),
"key": record.key,
}
access = record.get("access")
if access:
data.update({"access": access})
if record.file:
data.update(record.file.dumps())
return data
Expand All @@ -66,7 +69,10 @@ def load(self, data, record_cls):
"record_id": data["record_id"],
"object_version_id": data["object_version_id"],
}
record_data = {"metadata": data["metadata"]}
record_data = {"metadata": data.get("metadata", {})}
access = data.get("access")
if access:
record_data["access"] = access
model = record_cls.model_cls(**model_data)
record = record_cls(record_data, model=model)
f = File.from_dump(data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,17 @@
"key": {
"description": "Key (filename) of the file.",
"type": "string"
},
"access": {
"description": "Access properties of the file.",
"type": "object",
"additionalProperties": false,
"properties": {
"hidden": {
"description": "Control whether to hide the file in the UI.",
"type": "boolean"
}
}
}
}
},
Expand Down
15 changes: 8 additions & 7 deletions invenio_records_resources/records/systemfields/files/manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020-2023 CERN.
# Copyright (C) 2020-2024 CERN.
# Copyright (C) 2020-2021 Northwestern University.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
Expand Down Expand Up @@ -171,7 +171,7 @@ def create(self, key, obj=None, stream=None, data=None, **kwargs):
rf.object_version_id = obj.version_id
rf.object_version = obj
if data:
rf.metadata = data
rf.update(data)
rf.commit()
self._entries[key] = rf
return rf
Expand All @@ -190,6 +190,7 @@ def create_obj(self, key, stream, data=None, **kwargs):
def update(self, key, obj=None, stream=None, data=None, **kwargs):
"""Update a file."""
assert not (obj and stream)
data = data or {}
rf = self.get(key)

if stream:
Expand All @@ -198,8 +199,8 @@ def update(self, key, obj=None, stream=None, data=None, **kwargs):
rf.object_version_id = obj.version_id
rf.object_version = obj
if data:
rf.metadata = data
rf.commit()
rf.update(data)
rf.commit()
return rf

@ensure_enabled
Expand Down Expand Up @@ -298,7 +299,7 @@ def copy(self, src_files, copy_obj=True):
"record_id": record_id,
"version_id": 1,
"object_version_id": ovs_by_key[key]["version_id"],
"json": rf.metadata or {},
"json": dict(rf),
}
rf_to_bulk_insert.append(new_rf)

Expand All @@ -322,10 +323,9 @@ def copy(self, src_files, copy_obj=True):

# Copy file record
if rf.metadata is not None:
self[key] = dst_obj, rf.metadata
self[key] = dst_obj, dict(rf)
else:
self[key] = dst_obj

self.default_preview = src_files.default_preview
self.order = src_files.order

Expand All @@ -342,6 +342,7 @@ def sync(self, src_files, delete_extras=True):
True
Logic follows the bucket sync logic
"""
# TODO record file data is not synced
self.default_preview = src_files.default_preview
self.order = src_files.order

Expand Down
21 changes: 13 additions & 8 deletions invenio_records_resources/services/files/components/metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2021-2022 CERN.
# Copyright (C) 2021-2024 CERN.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand All @@ -11,7 +11,6 @@
from copy import deepcopy

from ...errors import FilesCountExceededException
from ..schema import InitFileSchema
from ..transfer import Transfer
from .base import FileServiceComponent

Expand All @@ -21,7 +20,7 @@ class FileMetadataComponent(FileServiceComponent):

def init_files(self, identity, id, record, data):
"""Init files handler."""
schema = InitFileSchema(many=True)
schema = self.service.file_schema.schema(many=True)
validated_data = schema.load(data)

# All brand-new drafts don't allow exceeding files limit (while added via rest API).
Expand All @@ -36,18 +35,24 @@ def init_files(self, identity, id, record, data):
max_files=maxFiles, resulting_files_count=resulting_files_count
)

for file_metadata in validated_data:
temporary_obj = deepcopy(file_metadata)
file_type = temporary_obj.pop("storage_class", None)
for file_data in validated_data:
copy_fdata = deepcopy(file_data)
file_type = copy_fdata.pop("storage_class", None)
transfer = Transfer.get_transfer(
file_type, service=self.service, uow=self.uow
)
_ = transfer.init_file(record, temporary_obj)
_ = transfer.init_file(record, copy_fdata)

def update_file_metadata(self, identity, id, file_key, record, data):
"""Update file metadata handler."""
# FIXME: move this call to a transfer call
record.files.update(file_key, data=data)
schema = self.service.file_schema.schema(many=False)

# 'key' is required in the schema, but might not be in the data
if "key" not in data:
data["key"] = file_key
validated_data = schema.load(data)
record.files.update(file_key, data=validated_data)

# TODO: `commit_file` might vary based on your storage backend (e.g. S3)
def commit_file(self, identity, id, file_key, record):
Expand Down
2 changes: 0 additions & 2 deletions invenio_records_resources/services/files/processors/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@

"""Image metadata extractor."""

import os

import pkg_resources
from flask import current_app

Expand Down
29 changes: 21 additions & 8 deletions invenio_records_resources/services/files/schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020-2022 CERN.
# Copyright (C) 2020-2024 CERN.
# Copyright (C) 2020 European Union.
#
# Invenio-Records-Resources is free software; you can redistribute it and/or
Expand All @@ -22,8 +22,8 @@
validate,
validates,
)
from marshmallow.fields import UUID, Dict, Integer, Str
from marshmallow_utils.fields import GenMethod, Links, SanitizedUnicode, TZDateTime
from marshmallow.fields import UUID, Boolean, Dict, Integer, Nested, Str
from marshmallow_utils.fields import GenMethod, Links, TZDateTime

from .transfer import TransferType

Expand Down Expand Up @@ -55,7 +55,7 @@ class Meta:

key = Str(required=True)
storage_class = Str()
uri = Str()
uri = Str(load_only=True)
checksum = Str()
size = Integer()

Expand Down Expand Up @@ -101,6 +101,17 @@ def fields_from_file_obj(self, data, **kwargs):
return data


class FileAccessSchema(Schema):
"""Schema for file access."""

class Meta:
"""Meta."""

unknown = RAISE

hidden = Boolean()


class FileSchema(InitFileSchema):
"""Service schema for files."""

Expand All @@ -113,11 +124,13 @@ class Meta:
updated = TZDateTime(timezone=timezone.utc, format="iso", dump_only=True)

status = GenMethod("dump_status")
metadata = Dict(dump_only=True)
mimetype = Str(dump_only=True, attribute="file.mimetype")
version_id = UUID(attribute="file.version_id")
file_id = UUID(attribute="file.file_id")
bucket_id = UUID(attribute="file.bucket_id")
version_id = UUID(attribute="file.version_id", dump_only=True)
file_id = UUID(attribute="file.file_id", dump_only=True)
bucket_id = UUID(attribute="file.bucket_id", dump_only=True)

metadata = Dict()
access = Nested(FileAccessSchema)

links = Links()

Expand Down
Loading
Loading