diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 23604d7..bf3f764 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -4,6 +4,39 @@ CHANGELOG
This project uses `semantic versioning `_.
This change log uses principles from `keep a changelog `_.
+[3.20.0] - 2025-12-08
+---------------------
+
+
+Added
+^^^^^
+
+- ``freeze_with_manifest`` method on ``ProtoDataSet`` class for converting a
+ proto-dataset to a frozen dataset using a pre-computed manifest with
+ client-provided hashes. Validates that README and all manifest items exist
+ in storage before freezing. Useful for server applications that trust
+ client-computed hashes.
+
+Changed
+^^^^^^^
+
+- changed build system to ``flit``
+
+Deprecated
+^^^^^^^^^^
+
+
+Removed
+^^^^^^^
+
+
+Fixed
+^^^^^
+
+
+Security
+^^^^^^^^
+
[3.19.0] - 2024-12-16
---------------------
diff --git a/dtoolcore/__init__.py b/dtoolcore/__init__.py
index 211aa30..208ea9f 100644
--- a/dtoolcore/__init__.py
+++ b/dtoolcore/__init__.py
@@ -858,6 +858,110 @@ def freeze(self, progressbar=None):
# Clean up using the storage broker's post freeze hook.
self._storage_broker.post_freeze_hook()
+ def freeze_with_manifest(self, manifest, frozen_at=None):
+ """
+ Convert :class:`dtoolcore.ProtoDataSet` to :class:`dtoolcore.DataSet`
+ using a pre-computed manifest.
+
+ This method freezes the dataset without computing hashes server-side.
+ The caller provides a manifest with pre-computed item properties
+ (hash, size, timestamp). This is useful for server-side operations
+ where the client has already computed hashes during upload.
+
+ Before freezing, this method validates that:
+ - The README file exists in storage
+ - All items listed in the manifest exist in storage
+
+ Note: This method does NOT verify that the hashes match - it trusts
+ the client-provided hashes in the manifest.
+
+ :param manifest: dict with structure::
+
+ {
+ "dtoolcore_version": ,
+ "hash_function": ,
+ "items": {
+ : {
+ "relpath": ,
+ "size_in_bytes": ,
+ "hash": ,
+ "utc_timestamp":
+ }
+ }
+ }
+
+ :param frozen_at: optional timestamp for when the dataset was frozen.
+ If not provided, uses the current UTC time.
+ :raises: DtoolCoreValueError if README or any manifest item is missing
+ """
+ logger.debug("Freeze dataset with manifest {}".format(self))
+
+ # Validate that README exists
+ try:
+ self._storage_broker.get_readme_content()
+ except Exception as e:
+ raise DtoolCoreValueError(
+ f"README file is missing or cannot be read: {e}"
+ )
+
+ # Validate that all items in the manifest exist in storage
+ manifest_items = manifest.get("items", {})
+ if manifest_items:
+ # Get identifiers of items that actually exist in storage
+ existing_handles = set(self._storage_broker.iter_item_handles())
+ existing_identifiers = set(
+ dtoolcore.utils.generate_identifier(h) for h in existing_handles
+ )
+
+ # Check for missing items
+ expected_identifiers = set(manifest_items.keys())
+ missing_identifiers = expected_identifiers - existing_identifiers
+
+ if missing_identifiers:
+ # Get relpaths of missing items for better error message
+ missing_relpaths = [
+ manifest_items[ident].get("relpath", ident)
+ for ident in list(missing_identifiers)[:5] # Limit to 5
+ ]
+ if len(missing_identifiers) > 5:
+ missing_relpaths.append(
+ f"... and {len(missing_identifiers) - 5} more"
+ )
+ raise DtoolCoreValueError(
+ f"Missing {len(missing_identifiers)} item(s) in storage: "
+ f"{missing_relpaths}"
+ )
+
+ # Call the storage broker pre_freeze hook.
+ self._storage_broker.pre_freeze_hook()
+
+ # Use provided manifest instead of computing
+ self._storage_broker.put_manifest(manifest)
+
+ # Generate and persist overlays from any item metadata that has been
+ # added.
+ overlays = self._generate_overlays()
+ for overlay_name, overlay in overlays.items():
+ self._put_overlay(overlay_name, overlay)
+
+ # Change the type of the dataset from "protodataset" to "dataset"
+ # in the administrative metadata.
+ metadata_update = {"type": "dataset"}
+
+ # Use provided frozen_at or generate one
+ if frozen_at is not None:
+ metadata_update["frozen_at"] = frozen_at
+ elif "frozen_at" not in self._admin_metadata:
+ datetime_obj = datetime.datetime.utcnow()
+ metadata_update["frozen_at"] = dtoolcore.utils.timestamp(datetime_obj)
+
+ # Apply the change(s) to the administrative metadata.
+ self._admin_metadata.update(metadata_update)
+ self._storage_broker.put_admin_metadata(self._admin_metadata)
+
+ # Clean up using the storage broker's post freeze hook.
+ self._storage_broker.post_freeze_hook()
+
class DataSetCreator(object):
"""Context manager for creating a dataset.
diff --git a/pyproject.toml b/pyproject.toml
index 9dbfb00..cb56204 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,17 +1,18 @@
[build-system]
-requires = ["setuptools>=42", "setuptools_scm[toml]>=6.3"]
-build-backend = "setuptools.build_meta"
+requires = ["flit_scm"]
+build-backend = "flit_scm:buildapi"
[project]
name = "dtoolcore"
description = "Core API for managing (scientific) data"
readme = "README.rst"
-license = {file = "LICENSE"}
+license = {text = "MIT"}
authors = [
{name = "Tjelvar Olsson", email = "tjelvar.olsson@gmail.com"}
]
dynamic = ["version"]
-dependencies = ["setuptools"]
+requires-python = ">=3.8"
+dependencies = []
[project.optional-dependencies]
test = [
@@ -29,13 +30,20 @@ Documentation = "https://dtoolcore.readthedocs.io"
Repository = "https://github.com/jic-dtool/dtoolcore"
Changelog = "https://github.com/jic-dtool/dtoolcore/blob/master/CHANGELOG.rst"
+[project.entry-points."dtool.storage_brokers"]
+DiskStorageBroker = "dtoolcore.storagebroker:DiskStorageBroker"
+
+[tool.flit.module]
+name = "dtoolcore"
+
[tool.setuptools_scm]
version_scheme = "guess-next-dev"
local_scheme = "no-local-version"
write_to = "dtoolcore/version.py"
-[tool.setuptools]
-packages = ["dtoolcore"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "--cov=dtoolcore --cov-report=term-missing"
-[project.entry-points."dtool.storage_brokers"]
-"DiskStorageBroker" = "dtoolcore.storagebroker:DiskStorageBroker"
+[tool.flake8]
+exclude = ["env*", ".tox", ".git", "*.egg", "build", "docs", "venv"]
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 95f3f57..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,10 +0,0 @@
-[flake8]
-exclude=env*,.tox,.git,*.egg,build,docs,venv
-
-[tool:pytest]
-testpaths = tests
-addopts = --cov=dtoolcore --cov-report=term-missing
-#addopts = -x --pdb
-
-[cov:run]
-source = dtoolcore
diff --git a/tests/test_freeze_with_manifest.py b/tests/test_freeze_with_manifest.py
new file mode 100644
index 0000000..db35550
--- /dev/null
+++ b/tests/test_freeze_with_manifest.py
@@ -0,0 +1,583 @@
+"""Test the freeze_with_manifest method of ProtoDataSet."""
+
+import os
+import uuid as uuid_module
+
+import pytest
+
+from . import tmp_dir_fixture # NOQA
+
+from dtoolcore.utils import (
+ IS_WINDOWS,
+ generous_parse_uri,
+ windows_to_unix_path,
+ generate_identifier,
+)
+
+
+def _sanitise_base_uri(tmp_dir):
+ base_uri = tmp_dir
+ if IS_WINDOWS:
+ parsed_base_uri = generous_parse_uri(tmp_dir)
+ unix_path = windows_to_unix_path(parsed_base_uri.path)
+ base_uri = "file://{}".format(unix_path)
+ return base_uri
+
+
+def test_freeze_with_manifest_basic(tmp_dir_fixture): # NOQA
+ """Test basic freezing of a proto dataset with provided manifest."""
+ import dtoolcore
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-freeze-manifest"
+ creator_username = "tester"
+ frozen_at = 1234567890.123
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": {},
+ }
+
+ # Create a proto dataset
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ creator_username=creator_username,
+ )
+
+ # Freeze with the provided manifest
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=frozen_at)
+
+ # Load the dataset and verify it's frozen
+ dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
+
+ assert isinstance(dataset, dtoolcore.DataSet)
+ assert dataset.name == name
+ assert dataset.admin_metadata["creator_username"] == creator_username
+ assert dataset.admin_metadata["frozen_at"] == frozen_at
+ assert dataset.admin_metadata["type"] == "dataset"
+
+
+def test_freeze_with_manifest_with_items(tmp_dir_fixture): # NOQA
+ """Test freezing with manifest containing items."""
+ import dtoolcore
+ import tempfile
+ import os
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-items"
+ frozen_at = 1234567890.0
+
+ # Create manifest with items
+ items = {
+ generate_identifier("data/file1.txt"): {
+ "relpath": "data/file1.txt",
+ "size_in_bytes": 100,
+ "hash": "abc123",
+ "utc_timestamp": 1234567890.0,
+ },
+ generate_identifier("data/file2.csv"): {
+ "relpath": "data/file2.csv",
+ "size_in_bytes": 500,
+ "hash": "def456",
+ "utc_timestamp": 1234567891.0,
+ },
+ }
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": items,
+ }
+
+ # Create a proto dataset
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+
+ # Add items to storage
+ temp_files = []
+ for relpath in ["data/file1.txt", "data/file2.csv"]:
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
+ f.write(f"content for {relpath}")
+ temp_files.append((f.name, relpath))
+
+ try:
+ for temp_path, relpath in temp_files:
+ proto_dataset.put_item(temp_path, relpath)
+ finally:
+ for temp_path, _ in temp_files:
+ os.unlink(temp_path)
+
+ # Freeze with the provided manifest
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=frozen_at)
+
+ # Load and verify
+ dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
+
+ assert set(dataset.identifiers) == set(items.keys())
+ for identifier, props in items.items():
+ item_props = dataset.item_properties(identifier)
+ assert item_props["relpath"] == props["relpath"]
+ assert item_props["size_in_bytes"] == props["size_in_bytes"]
+ assert item_props["hash"] == props["hash"]
+
+
+def test_freeze_with_manifest_auto_frozen_at(tmp_dir_fixture): # NOQA
+ """Test that frozen_at is auto-generated if not provided."""
+ import dtoolcore
+ import time
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-auto-frozen-at"
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": {},
+ }
+
+ # Create a proto dataset
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+
+ before_freeze = time.time()
+ proto_dataset.freeze_with_manifest(manifest)
+ after_freeze = time.time()
+
+ # Load and verify frozen_at was auto-generated
+ dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
+
+ assert "frozen_at" in dataset.admin_metadata
+ frozen_at = dataset.admin_metadata["frozen_at"]
+ assert before_freeze <= frozen_at <= after_freeze
+
+
+def test_freeze_with_manifest_with_readme(tmp_dir_fixture): # NOQA
+ """Test freezing with README content."""
+ import dtoolcore
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-readme"
+ readme_content = "---\ndescription: Test dataset\nproject: Testing"
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": {},
+ }
+
+ # Create a proto dataset with README
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ readme_content=readme_content,
+ )
+
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=1234567890.0)
+
+ # Load and verify README persisted
+ dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
+ assert dataset.get_readme_content() == readme_content
+
+
+def test_freeze_with_manifest_with_tags(tmp_dir_fixture): # NOQA
+ """Test freezing with tags added to proto dataset."""
+ import dtoolcore
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-tags"
+ tags = ["production", "validated"]
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": {},
+ }
+
+ # Create a proto dataset and add tags
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+ for tag in tags:
+ proto_dataset.put_tag(tag)
+
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=1234567890.0)
+
+ # Load and verify tags persisted
+ dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
+ assert set(dataset.list_tags()) == set(tags)
+
+
+def test_freeze_with_manifest_with_annotations(tmp_dir_fixture): # NOQA
+ """Test freezing with annotations added to proto dataset."""
+ import dtoolcore
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-annotations"
+ annotations = {
+ "project": "test-project",
+ "version": 42,
+ }
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": {},
+ }
+
+ # Create a proto dataset and add annotations
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+ for ann_name, ann_value in annotations.items():
+ proto_dataset.put_annotation(ann_name, ann_value)
+
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=1234567890.0)
+
+ # Load and verify annotations persisted
+ dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
+ assert set(dataset.list_annotation_names()) == set(annotations.keys())
+ for ann_name, ann_value in annotations.items():
+ assert dataset.get_annotation(ann_name) == ann_value
+
+
+def test_freeze_with_manifest_full(tmp_dir_fixture): # NOQA
+ """Test freezing with all features combined."""
+ import dtoolcore
+ import tempfile
+ import os
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "full-test-dataset"
+ creator_username = "scientist"
+ frozen_at = 1609459200.0 # 2021-01-01 00:00:00 UTC
+ readme_content = "---\nproject: Full Test\ndescription: Complete test"
+ tags = ["experiment", "simulation"]
+ annotations = {
+ "experiment_id": "EXP-001",
+ "parameters": {"temp": 300, "pressure": 1.0},
+ }
+
+ items = {
+ generate_identifier("results.json"): {
+ "relpath": "results.json",
+ "size_in_bytes": 1024,
+ "hash": "result_hash",
+ "utc_timestamp": frozen_at,
+ },
+ }
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": items,
+ }
+
+ # Create proto dataset with all features
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ readme_content=readme_content,
+ creator_username=creator_username,
+ )
+ for tag in tags:
+ proto_dataset.put_tag(tag)
+ for ann_name, ann_value in annotations.items():
+ proto_dataset.put_annotation(ann_name, ann_value)
+
+ # Add item to storage
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:
+ f.write('{"results": "test data"}')
+ temp_path = f.name
+
+ try:
+ proto_dataset.put_item(temp_path, "results.json")
+ finally:
+ os.unlink(temp_path)
+
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=frozen_at)
+
+ # Load and verify everything
+ dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
+ assert dataset.name == name
+ assert dataset.admin_metadata["creator_username"] == creator_username
+ assert dataset.admin_metadata["frozen_at"] == frozen_at
+ assert dataset.get_readme_content() == readme_content
+ assert set(dataset.list_tags()) == set(tags)
+ assert set(dataset.list_annotation_names()) == set(annotations.keys())
+ assert set(dataset.identifiers) == set(items.keys())
+
+
+def test_freeze_with_manifest_different_hash_function(tmp_dir_fixture): # NOQA
+ """Test that hash_function in manifest is preserved."""
+ import dtoolcore
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-hash-function"
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "sha256sum_hexdigest",
+ "items": {},
+ }
+
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=1234567890.0)
+
+ # Load and verify hash function is preserved
+ dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
+ loaded_manifest = dataset._storage_broker.get_manifest()
+ assert loaded_manifest["hash_function"] == "sha256sum_hexdigest"
+
+
+def test_proto_dataset_type_before_freeze(tmp_dir_fixture): # NOQA
+ """Test that proto dataset has type 'protodataset' before freezing."""
+ import dtoolcore
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-proto-type"
+
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+
+ # Before freeze, should be a protodataset
+ assert proto_dataset.admin_metadata["type"] == "protodataset"
+
+ # Can load as ProtoDataSet
+ loaded_proto = dtoolcore.ProtoDataSet.from_uri(proto_dataset.uri)
+ assert loaded_proto.admin_metadata["type"] == "protodataset"
+
+
+def test_dataset_type_after_freeze(tmp_dir_fixture): # NOQA
+ """Test that dataset has type 'dataset' after freezing."""
+ import dtoolcore
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-dataset-type"
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": {},
+ }
+
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=1234567890.0)
+
+ # After freeze, should be a dataset
+ dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
+ assert dataset.admin_metadata["type"] == "dataset"
+
+ # Cannot load as ProtoDataSet anymore
+ with pytest.raises(dtoolcore.DtoolCoreTypeError):
+ dtoolcore.ProtoDataSet.from_uri(proto_dataset.uri)
+
+
+def test_freeze_with_manifest_missing_readme(tmp_dir_fixture): # NOQA
+ """Test that freezing fails if README is missing."""
+ import dtoolcore
+ import os
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-missing-readme"
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": {},
+ }
+
+ # Create proto dataset
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+
+ # Delete the README file
+ from dtoolcore.utils import generous_parse_uri
+ parsed = generous_parse_uri(proto_dataset.uri)
+ readme_path = os.path.join(parsed.path, "README.yml")
+ os.remove(readme_path)
+
+ # Freezing should fail because README is missing
+ with pytest.raises(dtoolcore.DtoolCoreValueError) as excinfo:
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=1234567890.0)
+
+ assert "README" in str(excinfo.value)
+
+
+def test_freeze_with_manifest_missing_items(tmp_dir_fixture): # NOQA
+ """Test that freezing fails if manifest items are missing from storage."""
+ import dtoolcore
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-missing-items"
+
+ # Create manifest with items that don't exist in storage
+ items = {
+ generate_identifier("data/nonexistent1.txt"): {
+ "relpath": "data/nonexistent1.txt",
+ "size_in_bytes": 100,
+ "hash": "abc123",
+ "utc_timestamp": 1234567890.0,
+ },
+ generate_identifier("data/nonexistent2.txt"): {
+ "relpath": "data/nonexistent2.txt",
+ "size_in_bytes": 200,
+ "hash": "def456",
+ "utc_timestamp": 1234567890.0,
+ },
+ }
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": items,
+ }
+
+ # Create proto dataset (no items uploaded)
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+
+ # Freezing should fail because items are missing
+ with pytest.raises(dtoolcore.DtoolCoreValueError) as excinfo:
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=1234567890.0)
+
+ assert "Missing" in str(excinfo.value)
+ assert "2" in str(excinfo.value) # Should mention 2 missing items
+
+
+def test_freeze_with_manifest_partial_items(tmp_dir_fixture): # NOQA
+ """Test that freezing fails if some manifest items are missing."""
+ import dtoolcore
+ import tempfile
+ import os
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-partial-items"
+
+ # Create manifest with 2 items
+ items = {
+ generate_identifier("data/exists.txt"): {
+ "relpath": "data/exists.txt",
+ "size_in_bytes": 100,
+ "hash": "abc123",
+ "utc_timestamp": 1234567890.0,
+ },
+ generate_identifier("data/missing.txt"): {
+ "relpath": "data/missing.txt",
+ "size_in_bytes": 200,
+ "hash": "def456",
+ "utc_timestamp": 1234567890.0,
+ },
+ }
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": items,
+ }
+
+ # Create proto dataset
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+
+ # Create a temporary file to add to the dataset
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
+ f.write("test content")
+ temp_path = f.name
+
+ try:
+ # Add only one item to storage
+ proto_dataset.put_item(temp_path, "data/exists.txt")
+ finally:
+ os.unlink(temp_path)
+
+ # Freezing should fail because one item is missing
+ with pytest.raises(dtoolcore.DtoolCoreValueError) as excinfo:
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=1234567890.0)
+
+ assert "Missing" in str(excinfo.value)
+ assert "1" in str(excinfo.value) # Should mention 1 missing item
+ assert "missing.txt" in str(excinfo.value)
+
+
+def test_freeze_with_manifest_items_exist(tmp_dir_fixture): # NOQA
+ """Test that freezing succeeds when all items exist."""
+ import dtoolcore
+ import tempfile
+ import os
+
+ base_uri = _sanitise_base_uri(tmp_dir_fixture)
+ name = "test-items-exist"
+
+ # Create manifest with items
+ items = {
+ generate_identifier("data/file1.txt"): {
+ "relpath": "data/file1.txt",
+ "size_in_bytes": 100,
+ "hash": "abc123",
+ "utc_timestamp": 1234567890.0,
+ },
+ generate_identifier("data/file2.txt"): {
+ "relpath": "data/file2.txt",
+ "size_in_bytes": 200,
+ "hash": "def456",
+ "utc_timestamp": 1234567890.0,
+ },
+ }
+
+ manifest = {
+ "dtoolcore_version": dtoolcore.__version__,
+ "hash_function": "md5sum_hexdigest",
+ "items": items,
+ }
+
+ # Create proto dataset
+ proto_dataset = dtoolcore.create_proto_dataset(
+ name=name,
+ base_uri=base_uri,
+ )
+
+ # Create temporary files and add them to the dataset
+ temp_files = []
+ for relpath in ["data/file1.txt", "data/file2.txt"]:
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
+ f.write(f"content for {relpath}")
+ temp_files.append((f.name, relpath))
+
+ try:
+ for temp_path, relpath in temp_files:
+ proto_dataset.put_item(temp_path, relpath)
+ finally:
+ for temp_path, _ in temp_files:
+ os.unlink(temp_path)
+
+ # Freezing should succeed because all items exist
+ proto_dataset.freeze_with_manifest(manifest, frozen_at=1234567890.0)
+
+ # Verify the dataset was frozen correctly
+ dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
+ assert dataset.admin_metadata["type"] == "dataset"
+ assert set(dataset.identifiers) == set(items.keys())