Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
48db248
the crop_ds change is now needed, because we now have date instead of…
MarkMelotto Dec 8, 2025
47a918c
ruff checks
MarkMelotto Dec 8, 2025
f98bfcb
ruff format
MarkMelotto Dec 8, 2025
514efd1
big improvement to the code, thanks Bart!
MarkMelotto Dec 9, 2025
7bc8825
ruff things
MarkMelotto Dec 9, 2025
e8ce6ac
small change
MarkMelotto Dec 9, 2025
1a16047
bug fix
MarkMelotto Dec 9, 2025
b118688
test test
MarkMelotto Dec 9, 2025
ed67331
test test
MarkMelotto Dec 9, 2025
451f776
test test
MarkMelotto Dec 9, 2025
34268b2
test test
MarkMelotto Dec 9, 2025
ee249c5
added docstring
MarkMelotto Dec 9, 2025
72e4512
added getting the shapefile from dcache
MarkMelotto Dec 9, 2025
706580d
testing shapefile
MarkMelotto Dec 9, 2025
c34bc8f
testing shapefile
MarkMelotto Dec 9, 2025
47ac024
testing shapefile
MarkMelotto Dec 9, 2025
b56a148
testing shapefile and updated 4tu to v2
MarkMelotto Dec 9, 2025
6a2a2ff
remove test to see results
MarkMelotto Dec 9, 2025
47788c8
remove test to see results
MarkMelotto Dec 9, 2025
0f014b3
added shapefiles dir to check if this works with the tests
MarkMelotto Dec 9, 2025
67918a7
testing
MarkMelotto Dec 9, 2025
746d75f
testing
MarkMelotto Dec 9, 2025
301b53e
testing
MarkMelotto Dec 9, 2025
e5c7e28
testing linter
MarkMelotto Dec 9, 2025
8043f0d
added suggestions from Bart
MarkMelotto Dec 9, 2025
7b6207d
added suggestions from Bart
MarkMelotto Dec 9, 2025
092e5ab
lint fix
MarkMelotto Dec 9, 2025
e4b0dc7
testing fix
MarkMelotto Dec 9, 2025
c8f021c
Update src/ewatercycle/_forcings/caravan.py
MarkMelotto Jan 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,15 @@ files = ["src"]
target-version = "py310"
extend-include = ["*.ipynb"]

# Add this to ignore shapefiles
exclude = [
"shapefiles",
"*.shp",
"*.shx",
"*.dbf",
"*.prj"
]

[tool.ruff.lint]
select = [
"A",
Expand Down
27 changes: 24 additions & 3 deletions src/ewatercycle/_forcings/caravan.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import shutil
import zipfile
from pathlib import Path
Expand All @@ -12,7 +13,7 @@
from ewatercycle.util import get_time

COMMON_URL = "ca13056c-c347-4a27-b320-930c2a4dd207"
OPENDAP_URL = f"https://opendap.4tu.nl/thredds/dodsC/data2/djht/{COMMON_URL}/1/"
OPENDAP_URL = f"https://opendap.4tu.nl/thredds/dodsC/data2/djht/{COMMON_URL}/2/"
SHAPEFILE_URL = (
f"https://data.4tu.nl/file/{COMMON_URL}/bbe94526-cf1a-4b96-8155-244f20094719"
)
Expand Down Expand Up @@ -106,7 +107,12 @@ class CaravanForcing(DefaultForcing):

@classmethod
def get_dataset(cls: type["CaravanForcing"], dataset: str) -> xr.Dataset:
"""Opens specified dataset from data.4tu.nl OPeNDAP server.
"""Opens dataset from data.4tu.nl OPeNDAP server, or cache if available.

By default, it will open the dataset from data.4tu.nl OPeNDAP server
This can be overridden by having an environmental variable: CARAVAN_CACHE.
Set this variable to the directory containing the netCDF files.


Args:
dataset (str): name of dataset, choose from:
Expand All @@ -118,6 +124,10 @@ def get_dataset(cls: type["CaravanForcing"], dataset: str) -> xr.Dataset:
'hysets',
'lamah'
"""
cache_dir = os.environ.get("CARAVAN_CACHE")
# Check if we want to load from 4TU or dCache
if cache_dir:
return xr.open_dataset(Path(cache_dir) / f"{dataset}.nc")
return xr.open_dataset(f"{OPENDAP_URL}{dataset}.nc")

@classmethod
Expand Down Expand Up @@ -246,7 +256,18 @@ def generate( # type: ignore[override]


def get_shapefiles(directory: Path, basin_id: str) -> Path:
"""Retrieve shapefiles from data 4TU.nl ."""
"""Retrieve shapefiles from data 4TU.nl or cache."""
cache_dir = os.environ.get("CARAVAN_CACHE")
# Check if we want to load from 4TU or dCache
if cache_dir:
shape_path = directory / f"{basin_id}.shp"
combined_shapefile_path = Path(cache_dir) / "shapefiles" / "combined.shp"

if not shape_path.is_file():
extract_basin_shapefile(basin_id, combined_shapefile_path, shape_path)

return shape_path

zip_path = directory / "shapefiles.zip"
output_path = directory / "shapefiles"
shape_path = directory / f"{basin_id}.shp"
Expand Down
2 changes: 1 addition & 1 deletion tests/src/base/forcing_files/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ The data only includes a year of forcing for one catchment.

For own use, please download from the original source and cite correctly. The Caravan dataset itself is also a combination of data from seperate sources.

The Carvan dataset is originanly obtained from https://zenodo.org/records/7944025 and is explained in a paper by Kratzert, F. :'Caravan - A global community dataset for large-sample hydrology' found here: https://doi-org.tudelft.idm.oclc.org/10.1038/s41597-023-01975-w
The Caravan dataset is originally obtained from https://zenodo.org/records/7944025 and is explained in a paper by Kratzert, F. :'Caravan - A global community dataset for large-sample hydrology' found here: https://doi-org.tudelft.idm.oclc.org/10.1038/s41597-023-01975-w

Distributed under Creative Commons Attribution 4.0 International.
2 changes: 1 addition & 1 deletion tests/src/base/forcing_files/camels_03439000.cpg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ISO-8859-1
ISO-8859-1
2 changes: 1 addition & 1 deletion tests/src/base/forcing_files/camels_03439000.prj
Original file line number Diff line number Diff line change
@@ -1 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file modified tests/src/base/forcing_files/camels_03439000.shx
Binary file not shown.
1 change: 1 addition & 0 deletions tests/src/base/forcing_files/shapefiles/combined.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
UTF-8
Binary file added tests/src/base/forcing_files/shapefiles/combined.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/src/base/forcing_files/shapefiles/combined.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file not shown.
Binary file added tests/src/base/forcing_files/shapefiles/combined.shx
Binary file not shown.
38 changes: 38 additions & 0 deletions tests/src/base/test_forcing.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,3 +427,41 @@ def test_extract_basin_shapefile(tmp_path: Path):

assert len(records) == 1
assert records[0].attributes["gauge_id"] == basin_id


def test_get_dataset_using_cache(tmp_path, monkeypatch):
# Prepare cache directory
cache_dir = tmp_path / "cache"
cache_dir.mkdir()

basin_id = "camels_01022500"
# Use the existing fake Caravan dataset
test_files_dir = Path(__file__).parent / "forcing_files"
test_file = test_files_dir / "test_caravan_file.nc"
cache_target = cache_dir / "camels.nc"
cache_target.write_bytes(test_file.read_bytes())

# Copy shapefiles into the cache so Fiona can find them
shapefiles_dir = test_files_dir / "shapefiles"
cache_shapefiles_dir = cache_dir / "shapefiles"
copytree(shapefiles_dir, cache_shapefiles_dir)

# Point CARAVAN_CACHE to this directory
monkeypatch.setenv("CARAVAN_CACHE", str(cache_dir))

# Copy other forcing files to tmp_camels_dir
tmp_camels_dir = tmp_path / "camels"
copytree(test_files_dir, tmp_camels_dir)

# Call the method
ds = CaravanForcing.generate(
start_time="1981-01-01T00:00:00Z",
end_time="1981-03-01T00:00:00Z",
directory=str(tmp_camels_dir),
basin_id=basin_id,
).to_xarray()

# Assert that the file was loaded from cache
content = list(ds.data_vars.keys())
expected = ["Q", "evspsblpot", "pr", "tas", "tasmax", "tasmin"]
assert content == expected
Loading