Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,39 @@ CHANGELOG
This project uses `semantic versioning <http://semver.org/>`_.
This change log uses principles from `keep a changelog <http://keepachangelog.com/>`_.

[3.20.0] - 2025-12-08
---------------------


Added
^^^^^

- ``freeze_with_manifest`` method on ``ProtoDataSet`` class for converting a
proto-dataset to a frozen dataset using a pre-computed manifest with
client-provided hashes. Validates that README and all manifest items exist
in storage before freezing. Useful for server applications that trust
client-computed hashes.

Changed
^^^^^^^

- changed build system to ``flit``

Deprecated
^^^^^^^^^^


Removed
^^^^^^^


Fixed
^^^^^


Security
^^^^^^^^


[3.19.0] - 2024-12-16
---------------------
Expand Down
104 changes: 104 additions & 0 deletions dtoolcore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,110 @@ def freeze(self, progressbar=None):
# Clean up using the storage broker's post freeze hook.
self._storage_broker.post_freeze_hook()

def freeze_with_manifest(self, manifest, frozen_at=None):
"""
Convert :class:`dtoolcore.ProtoDataSet` to :class:`dtoolcore.DataSet`
using a pre-computed manifest.

This method freezes the dataset without computing hashes server-side.
The caller provides a manifest with pre-computed item properties
(hash, size, timestamp). This is useful for server-side operations
where the client has already computed hashes during upload.

Before freezing, this method validates that:
- The README file exists in storage
- All items listed in the manifest exist in storage

Note: This method does NOT verify that the hashes match - it trusts
the client-provided hashes in the manifest.

:param manifest: dict with structure::

{
"dtoolcore_version": <version>,
"hash_function": <hash_function_name>,
"items": {
<identifier>: {
"relpath": <path>,
"size_in_bytes": <int>,
"hash": <hash_string>,
"utc_timestamp": <float>
}
}
}

:param frozen_at: optional timestamp for when the dataset was frozen.
If not provided, uses the current UTC time.
:raises: DtoolCoreValueError if README or any manifest item is missing
"""
logger.debug("Freeze dataset with manifest {}".format(self))

# Validate that README exists
try:
self._storage_broker.get_readme_content()
except Exception as e:
raise DtoolCoreValueError(
f"README file is missing or cannot be read: {e}"
)

# Validate that all items in the manifest exist in storage
manifest_items = manifest.get("items", {})
if manifest_items:
# Get identifiers of items that actually exist in storage
existing_handles = set(self._storage_broker.iter_item_handles())
existing_identifiers = set(
dtoolcore.utils.generate_identifier(h) for h in existing_handles
)

# Check for missing items
expected_identifiers = set(manifest_items.keys())
missing_identifiers = expected_identifiers - existing_identifiers

if missing_identifiers:
# Get relpaths of missing items for better error message
missing_relpaths = [
manifest_items[ident].get("relpath", ident)
for ident in list(missing_identifiers)[:5] # Limit to 5
]
if len(missing_identifiers) > 5:
missing_relpaths.append(
f"... and {len(missing_identifiers) - 5} more"
)
raise DtoolCoreValueError(
f"Missing {len(missing_identifiers)} item(s) in storage: "
f"{missing_relpaths}"
)

# Call the storage broker pre_freeze hook.
self._storage_broker.pre_freeze_hook()

# Use provided manifest instead of computing
self._storage_broker.put_manifest(manifest)

# Generate and persist overlays from any item metadata that has been
# added.
overlays = self._generate_overlays()
for overlay_name, overlay in overlays.items():
self._put_overlay(overlay_name, overlay)

# Change the type of the dataset from "protodataset" to "dataset"
# in the administrative metadata.
metadata_update = {"type": "dataset"}

# Use provided frozen_at or generate one
if frozen_at is not None:
metadata_update["frozen_at"] = frozen_at
elif "frozen_at" not in self._admin_metadata:
datetime_obj = datetime.datetime.utcnow()
metadata_update["frozen_at"] = dtoolcore.utils.timestamp(datetime_obj)

# Apply the change(s) to the administrative metadata.
self._admin_metadata.update(metadata_update)
self._storage_broker.put_admin_metadata(self._admin_metadata)

# Clean up using the storage broker's post freeze hook.
self._storage_broker.post_freeze_hook()


class DataSetCreator(object):
"""Context manager for creating a dataset.
Expand Down
24 changes: 16 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
[build-system]
requires = ["setuptools>=42", "setuptools_scm[toml]>=6.3"]
build-backend = "setuptools.build_meta"
requires = ["flit_scm"]
build-backend = "flit_scm:buildapi"

[project]
name = "dtoolcore"
description = "Core API for managing (scientific) data"
readme = "README.rst"
license = {file = "LICENSE"}
license = {text = "MIT"}
authors = [
{name = "Tjelvar Olsson", email = "tjelvar.olsson@gmail.com"}
]
dynamic = ["version"]
dependencies = ["setuptools"]
requires-python = ">=3.8"
dependencies = []

[project.optional-dependencies]
test = [
Expand All @@ -29,13 +30,20 @@ Documentation = "https://dtoolcore.readthedocs.io"
Repository = "https://github.com/jic-dtool/dtoolcore"
Changelog = "https://github.com/jic-dtool/dtoolcore/blob/master/CHANGELOG.rst"

[project.entry-points."dtool.storage_brokers"]
DiskStorageBroker = "dtoolcore.storagebroker:DiskStorageBroker"

[tool.flit.module]
name = "dtoolcore"

[tool.setuptools_scm]
version_scheme = "guess-next-dev"
local_scheme = "no-local-version"
write_to = "dtoolcore/version.py"

[tool.setuptools]
packages = ["dtoolcore"]
[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "--cov=dtoolcore --cov-report=term-missing"

[project.entry-points."dtool.storage_brokers"]
"DiskStorageBroker" = "dtoolcore.storagebroker:DiskStorageBroker"
[tool.flake8]
exclude = ["env*", ".tox", ".git", "*.egg", "build", "docs", "venv"]
10 changes: 0 additions & 10 deletions setup.cfg

This file was deleted.

Loading