diff --git a/.github/workflows/post-coverage.yml b/.github/workflows/post-coverage.yml new file mode 100644 index 0000000..6aab4b6 --- /dev/null +++ b/.github/workflows/post-coverage.yml @@ -0,0 +1,35 @@ +name: Post coverage report to PR + +on: + workflow_run: + workflows: ["Python test"] + types: + - completed + +permissions: + pull-requests: write + actions: read + +jobs: + comment: + runs-on: ubuntu-latest + if: > + github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' + steps: + - name: Download coverage artifact + uses: actions/download-artifact@v4 + with: + name: coverage-report + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Get PR number + id: pr_number + run: echo "number=$(cat pr_number.txt)" >> $GITHUB_OUTPUT + + - name: Post coverage report to PR + uses: marocchino/sticky-pull-request-comment@v2 + with: + path: cov_report.txt + number: ${{ steps.pr_number.outputs.number }} diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 3c0bbdc..3d3805f 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -20,7 +20,6 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 with: - ref: ${{ github.head_ref }} fetch-depth: 0 - name: Install uv, set the python version, and enable cache @@ -40,11 +39,19 @@ jobs: coverage report -m --format markdown > cov_report.txt coverage xml - - name: Post coverage report to PR + - name: Save PR number if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest' - uses: marocchino/sticky-pull-request-comment@v2 + run: echo ${{ github.event.number }} > pr_number.txt + + - name: Save coverage report and PR number + if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest' + uses: actions/upload-artifact@v4 with: - path: cov_report.txt + name: coverage-report + path: | + cov_report.txt + pr_number.txt + retention-days: 1 - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v4.0.1 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 6de56c5..4c8e3bc 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -60,7 +60,7 @@ representative at an online or offline event. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at -max.pargmann@dlr.de. +artist@lists.kit.edu. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the diff --git a/README.md b/README.md index 5887090..82460be 100644 --- a/README.md +++ b/README.md @@ -71,23 +71,27 @@ The ``PAINT`` repository is structured as shown below: . ├── html # Code for the paint-database.org website ├── markers # Saved markers for the WRI1030197 power plant in Jülich -├── paint # Python package +├── paint # Python package/ │ ├── data │ ├── preprocessing │ └── util ├── plots # Scripts used to generate plots found in our paper ├── preprocessing-scripts # Scripts used for preprocessing and STAC generation ├── scripts # Scripts highlighting example usage of the data -└── test # Tests for the python package - ├── data - ├── preprocessing - └── util +├── test # Tests for the python package/ +│ ├── data +│ ├── preprocessing +│ └── util +└── tutorials # Interactive notebooks showcasing how to get started with PAINT ``` ### Example usage: In the ``scripts`` folder there are multiple scripts highlighting how ``PAINT`` can be used. Detailed descriptions of these scripts are available via our [Documentation](http://paint.readthedocs.io). +Furthermore, an interactive notebook is available in the ``tutorials`` folder - this is the perfect starting point to +dive into ``PAINT``! + ## How to contribute Check out our [contribution guidelines](CONTRIBUTING.md) if you are interested in contributing to the `PAINT` project :fire:. Please also carefully check our [code of conduct](CODE_OF_CONDUCT.md) :blue_heart:. diff --git a/SECURITY.md b/SECURITY.md index 55a3284..529845b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,10 +2,15 @@ ## Supported Versions -We are currently supporting ``PAINT 1.0.0`` +We are currently supporting ``PAINT 2.0.1`` | Version | Supported | -| ------- | ------------------ | +|---------| ------------------ | +| 2.0.1 | :white_check_mark: | +| 2.0.0 | :white_check_mark: | +| 1.0.3 | :white_check_mark: | +| 1.0.2 | :white_check_mark: | +| 1.0.1 | :white_check_mark: | | 1.0.0 | :white_check_mark: | ## Reporting a Vulnerability diff --git a/docs/conf.py b/docs/conf.py index 5c7ae1d..10181d5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,7 +10,7 @@ project = "PAINT" copyright = f"{datetime.now().year}, ARTIST consortium" author = "ARTIST Consortium" -release = "2.0.0" +release = "2.0.1" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/dataset.rst b/docs/dataset.rst index f145c33..1324bd2 100644 --- a/docs/dataset.rst +++ b/docs/dataset.rst @@ -32,7 +32,7 @@ There are three ways of creating a ``PaintCalibrationDataset``: 2. **From a benchmark file** - You can also create the dataset from a benchmark file (see above). In this case, the ``benchmark_file`` must be provided: + You can also create the dataset from a benchmark file (see the :information on dataset splits:`splitter` for details). In this case, the ``benchmark_file`` containing information on the train, validation, and test split must be provided: .. code-block:: python diff --git a/docs/splitter.rst b/docs/splitter.rst index 27bb5e8..5190fbc 100644 --- a/docs/splitter.rst +++ b/docs/splitter.rst @@ -34,7 +34,7 @@ Supported Splits Again, the goal is to create diverse and challenging training and validation datasets. - **Balanced Split:** - This method uses KMeans clustering on azimuth and elevation features to ensure a stratified selection. The process includes: + This method uses k-means clustering on azimuth and elevation features to ensure a stratified selection. The process includes: - Clustering the data into ``validation_size`` clusters. - Selecting one data point per cluster for the validation split. @@ -76,3 +76,5 @@ To generate the splits, simply call the ``get_dataset_splits()`` function: azimuth_splits = splitter.get_dataset_splits( split_type="azimuth", training_size=10, validation_size=30 ) + +This returns a ``pd.Dataframe`` containing information on the splits, i.e. which samples belong to which split, and also saves this information as a CSV file. diff --git a/docs/usage.rst b/docs/usage.rst index 77f8b42..873699e 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -2,7 +2,23 @@ How To Use ========== -Here, you can find an overview of how to use ``PAINT``. + +To get started with ``PAINT`` we have included a interactive notebook, which is available here: https://github.com/ARTIST-Association/PAINT/blob/main/tutorials/paint_data_tutorial.ipynb. + +This tutorial provides an interactive introduction to the PAINT database, demonstrating how to: +- Initialize the STAC client. +- Download and inspect metadata. +- Generate calibration data splits. +- Load calibration data using a dataloader. +- Download and inspect other types of PAINT data. + +To run the tutorial make sure you install the tutorial dependencies, i.e.: + +.. code-block:: console + + $ pip install "paint-csp[tutorial]" + +Most of the concepts covered in the interactive tutorial are also covered in the documentation and associated scripts listed below: .. toctree:: :maxdepth: 1 diff --git a/paint/__init__.py b/paint/__init__.py index a9d1c74..33c0184 100644 --- a/paint/__init__.py +++ b/paint/__init__.py @@ -1,6 +1,12 @@ import os +from importlib.metadata import PackageNotFoundError, version PAINT_ROOT = f"{os.sep}".join(__file__.split(os.sep)[:-2]) """Reference to the root directory of ARTIST.""" +try: + __version__ = version("paint-csp") +except PackageNotFoundError: + # Allows running from source without installation. + __version__ = "0.0.0" -__all__ = ["PAINT_ROOT", "preprocessing", "util"] +__all__ = ["PAINT_ROOT", "preprocessing", "util", "__version__"] diff --git a/paint/data/dataset.py b/paint/data/dataset.py index 964dcdb..fe11e6a 100644 --- a/paint/data/dataset.py +++ b/paint/data/dataset.py @@ -142,7 +142,7 @@ def _check_accepted_keys(key: str) -> None: @classmethod def from_benchmark( cls, - benchmark_file: str | Path, + benchmark_file: str | Path | pd.DataFrame, root_dir: str | Path, item_type: str, download: bool = False, @@ -157,8 +157,8 @@ def from_benchmark( Parameters ---------- - benchmark_file : str | Path - Path to the file containing the benchmark information. + benchmark_file : str | Path | pd.DataFrame + Path to the file containing the benchmark information, or dataframe containing this information. root_dir : str | Path Directory where the dataset will be stored. item_type : str @@ -182,12 +182,29 @@ def from_benchmark( Validation dataset. """ root_dir = Path(root_dir) - log.info( - f"Begining the process of generating benchmark datasets. The file used to generate the benchmarks is:\n" - f" {benchmark_file}!" - ) - # Load the splits data. - splits = pd.read_csv(benchmark_file) + if not isinstance(benchmark_file, pd.DataFrame): + log.info( + f"Begining the process of generating benchmark datasets. The file used to generate the benchmarks is:\n" + f" {benchmark_file}!" + ) + # Load the splits data. + splits = pd.read_csv(benchmark_file) + else: + log.info( + "Begining the process of generating benchmark datasets using provided pandas dataframe!" + ) + benchmark_file.reset_index(inplace=True) + splits = benchmark_file + + expected_cols = ["Id", "HeliostatId", "Split"] + try: + pd.testing.assert_index_equal(splits.columns, pd.Index(expected_cols)) + except AssertionError as e: + raise ValueError( + f"The dataset split file provide has an incorrect schema. Please verify and try again.\n" + f"Expected: {expected_cols}\n" + f"Details: {e}" + ) # Check whether to download the data or not. if download: # pragma: no cover diff --git a/paint/data/dataset_splits.py b/paint/data/dataset_splits.py index 208b30a..2436ef8 100644 --- a/paint/data/dataset_splits.py +++ b/paint/data/dataset_splits.py @@ -457,6 +457,11 @@ def get_dataset_splits( Size of the training split. validation_size : int Size of the validation split. + + Returns + ------- + pd.DataFrame + Dataframe containing information on the dataset splits. """ allowed_split_types = [ mappings.AZIMUTH_SPLIT, diff --git a/paint/data/stac_client.py b/paint/data/stac_client.py index ccce994..3686179 100644 --- a/paint/data/stac_client.py +++ b/paint/data/stac_client.py @@ -69,6 +69,7 @@ def __init__( self.output_dir = pathlib.Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) self.chunk_size = chunk_size + log.info(f"Initializing STAC client to download data to: {output_dir}.") @staticmethod def load_checkpoint(path: pathlib.Path) -> dict[str, Any]: @@ -715,7 +716,7 @@ def get_heliostat_data( # Download the data for each heliostat. for heliostat_catalog in heliostat_catalogs_list: log.info(f"Processing heliostat catalog {heliostat_catalog.id}") - success = False + success = True # Download calibration data. if get_calibration: diff --git a/plots/04_create_distribution_plots.py b/plots/04_create_distribution_plots.py index 744d54e..32dbf91 100644 --- a/plots/04_create_distribution_plots.py +++ b/plots/04_create_distribution_plots.py @@ -79,9 +79,8 @@ def __init__( self.output_path.mkdir(parents=True, exist_ok=True) self.figure_size = (4, 4) - self.data = self._load_data() - # Power plant position as tensor + # Power plant position as tensor. power_plant_lat, power_plant_lon = convert_gk_to_lat_lon( mappings.GK_RIGHT_BASE, mappings.GK_HEIGHT_BASE ) @@ -92,7 +91,11 @@ def __init__( mappings.POWER_PLANT_ALT, ] ) - # Precompute receiver corners once + + # Load data. + self.data = self._load_data() + + # Precompute receiver corners once. self.receiver_coordinates = [ convert_wgs84_coordinates_to_local_enu( torch.tensor(coords), self.power_plant_position diff --git a/pyproject.toml b/pyproject.toml index 82b4071..0ea95ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ packages = ["paint"] [project] name = "paint-csp" -version = "2.0.0" +version = "2.0.1" authors = [ { name="ARTIST Consortium", email="artist@lists.kit.edu" }, ] @@ -17,7 +17,7 @@ requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", - "Development Status :: 1 - Planning", + "Development Status :: 5 - Production/Stable", ] dependencies = [ "numpy", @@ -51,6 +51,7 @@ dev = [ "sphinxcontrib-napoleon", "sphinxemoji" ] +tutorial = ["jupyter"] [project.urls] Homepage = "https://github.com/ARTIST-Association/PAINT" diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py index c5b9c30..4892b20 100644 --- a/tests/data/test_dataset.py +++ b/tests/data/test_dataset.py @@ -4,6 +4,7 @@ import cv2 import deepdiff +import pandas as pd import pytest import torch from torchvision import transforms @@ -191,6 +192,25 @@ def test_from_benchmark( assert len(test) == 4 assert len(val) == 3 + # Test with Pandas dataframe as input instead of file. + benchmark_df = pd.read_csv( + pathlib.Path(PAINT_ROOT) + / "tests" + / "data" + / "test_data" + / "test_benchmark.csv", + index_col=0, + ) + train, test, val = PaintCalibrationDataset.from_benchmark( + benchmark_file=benchmark_df, + root_dir=pathlib.Path(PAINT_ROOT) / "tests" / "data" / "test_data" / "dataset", + item_type=item_type, + download=download, + ) + assert len(train) == 3 + assert len(test) == 4 + assert len(val) == 3 + @pytest.mark.parametrize( "item_type, heliostats", @@ -284,3 +304,24 @@ def test_str_method() -> None: "-The dataset contains 4 items\n" ) assert str(dataset) == expected + + +def test_from_benchmark_fails_with_incorrect_dataframe( + tmp_path: pathlib.Path, +) -> None: + """ + Verify that ``from_benchmark`` raises ``ValueError`` when the input dataframe has incorrect columns. + + Parameters + ---------- + tmp_path : pathlib.Path + Fixture to the temporary folder. + """ + # Create invalid data frame. + invalid_df = pd.DataFrame(columns=["Id", "HeliostatId", "WrongCol"]) + + # Expect a ValueError. + with pytest.raises(ValueError, match="incorrect schema"): + PaintCalibrationDataset.from_benchmark( + benchmark_file=invalid_df, root_dir=tmp_path, item_type="raw_image" + ) diff --git a/tests/test_package.py b/tests/test_package.py new file mode 100644 index 0000000..fa309df --- /dev/null +++ b/tests/test_package.py @@ -0,0 +1,33 @@ +import importlib +import importlib.metadata +from importlib.metadata import PackageNotFoundError +from unittest.mock import MagicMock + +import pytest + +import paint + + +def test_version_fallback_when_package_missing(monkeypatch: pytest.MonkeyPatch) -> None: + """ + Verify that ``__version__`` falls back to '0.0.0' if the package is not installed. + + This test mocks ``importlib.metadata.version`` to raise ``PackageNotFoundError``, + then reloads the module to trigger the except block. + + Parameters + ---------- + monkeypatch : pytest.MonkeyPatch + MonkeyPatch fixture. + """ + # Create a mock that raises the specific error. + mock_raiser = MagicMock(side_effect=PackageNotFoundError) + + # Apply the mock to the standard library function. + monkeypatch.setattr(importlib.metadata, "version", mock_raiser) + + # Reload the module to force the top-level try/except block to run again. + importlib.reload(paint) + + # Assert the fallback behavior. + assert paint.__version__ == "0.0.0" diff --git a/tutorials/paint_data_tutorial.ipynb b/tutorials/paint_data_tutorial.ipynb new file mode 100644 index 0000000..20af578 --- /dev/null +++ b/tutorials/paint_data_tutorial.ipynb @@ -0,0 +1,1909 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "23d8c4e94a4b55f4", + "metadata": {}, + "source": [ + "# ``PAINT`` Data Tutorial\n", + "\n", + "This interactive notebook provides a brief overview of the ``PAINT`` database, demonstrating how to:\n", + "- Initialize the STAC client.\n", + "- Download and inspect metadata.\n", + "- Generate calibration data splits.\n", + "- Load calibration data using a dataloader.\n", + "- Download and inspect other types of ``PAINT`` data.\n", + "\n", + "> **Note:** Python executable scripts for each step are available in the ``scripts`` folder of the [PAINT GitHub](https://github.com/ARTIST-Association/PAINT/tree/main/scripts). We recommend using those scripts if you plan to download and process large amounts of ``PAINT`` data." + ] + }, + { + "cell_type": "markdown", + "id": "3e82d5e92da63968", + "metadata": {}, + "source": [ + "## Getting Started\n", + "\n", + "To run this tutorial, ensure you have the ``PAINT`` tutorial dependencies installed:\n", + "```\n", + "pip install \"paint-csp[tutorial]\"\n", + "```\n", + "To verify the installation, let's import ``PAINT`` and check the version attribute:" + ] + }, + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:12:26.352122Z", + "start_time": "2026-01-30T11:12:26.344571Z" + } + }, + "source": [ + "import paint\n", + "\n", + "print(f\"``PAINT`` is running with version: {paint.__version__}\")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "``PAINT`` is running with version: 2.0.0\n" + ] + } + ], + "execution_count": 1 + }, + { + "cell_type": "markdown", + "id": "b2a2d5be158a05b9", + "metadata": {}, + "source": [ + "We also need to specify a directory where all downloaded data will be saved. **Update the file path below to a location that works for your system:**" + ] + }, + { + "cell_type": "code", + "id": "ee4635a10ae20007", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:12:26.361699Z", + "start_time": "2026-01-30T11:12:26.359796Z" + } + }, + "source": [ + "from pathlib import Path\n", + "\n", + "download_path = Path(\"./PAINT_tutorial_data\")" + ], + "outputs": [], + "execution_count": 2 + }, + { + "cell_type": "markdown", + "id": "57e39a37b0d41383", + "metadata": {}, + "source": [ + "## Downloading Metadata\n", + "\n", + "Before working with the actual ``PAINT`` data, we will inspect the metadata to understand what is available. For this tutorial, we will focus on a small subset of heliostats: those with IDs starting with \"AA\". This includes the range from **AA23 to AA51**.\n", + "\n", + "In the next step, we will:\n", + "- Generate a list of heliostats to access.\n", + "- Create a STAC client.\n", + "- Download the metadata and save it to the specified location." + ] + }, + { + "cell_type": "code", + "id": "95276b7d0af455cc", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:16.335654Z", + "start_time": "2026-01-30T11:12:26.445975Z" + } + }, + "source": [ + "# Import the STAC client.\n", + "from paint.data import StacClient\n", + "\n", + "# Generate heliostat list.\n", + "heliostat_list = [f\"AA{i}\" for i in range(23, 52)]\n", + "\n", + "# Initialize STAC client.\n", + "client = StacClient(output_dir=download_path)\n", + "\n", + "# Download metadata.\n", + "client.get_heliostat_metadata(heliostats=heliostat_list)" + ], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No collections selected - downloading data for all collections!\n", + "Processing Heliostat Catalogs: 0%| | 0/29 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdHeliostatIdlatitudelongitudeElevationDateTime
0AA23-heliostat-propertiesAA2350.9136476.38701288.5900572021-07-20 05:09:00+00:00
1AA24-heliostat-propertiesAA2450.9136466.38707588.5998082021-07-20 05:09:00+00:00
2AA25-heliostat-propertiesAA2550.9136466.38713888.6205982021-07-20 05:09:00+00:00
3AA26-heliostat-propertiesAA2650.9136466.38720088.6030582021-07-20 05:09:00+00:00
4AA27-heliostat-propertiesAA2750.9136466.38726388.6156542021-07-20 05:09:00+00:00
\n", + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 5 + }, + { + "cell_type": "markdown", + "id": "3b7c1e4028dc1cf9", + "metadata": {}, + "source": [ + "Above we can see the first five rows of this metadata table. Now lets look at the calibration metadata:" + ] + }, + { + "cell_type": "code", + "id": "e6ef7037a3e832e0", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:16.426869Z", + "start_time": "2026-01-30T11:13:16.425059Z" + } + }, + "source": [ + "# Inspect the calibration metadata.\n", + "print(\n", + " f\"The calibration metadata file contains {len(calibration_metadata)} rows and {len(calibration_metadata.columns)} columns.\\n\"\n", + " f\"The columns are: {', '.join(calibration_metadata.columns)}\"\n", + ")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The calibration metadata file contains 4691 rows and 17 columns.\n", + "The columns are: Id, HeliostatId, Azimuth, Elevation, lower_left_latitude, lower_left_longitude, lower_left_Elevation, upper_left_latitude, upper_left_longitude, upper_left_Elevation, upper_right_latitude, upper_right_longitude, upper_right_Elevation, lower_right_latitude, lower_right_longitude, lower_right_Elevation, DateTime\n" + ] + } + ], + "execution_count": 6 + }, + { + "cell_type": "markdown", + "id": "e21629a42491489b", + "metadata": {}, + "source": [ + "This dataframe contains significantly more rows because there are often multiple calibration measurements for each heliostat. The columns include:\n", + "- **Id:** The measurement ID of the calibration measurement\n", + "- **HeliostatId:** The ID of the heliostat used for this measurement\n", + "- **Azimuth:** The sun's azimuth at the time of measurement\n", + "- **Elevation:** The sun's elevation at the time of measurement\n", + "- **Target Coordinates:** The latitude, longitude, and elevation for the *lower_left*, *upper_left*, *upper_right*, and *lower_right* corners of the calibration target\n", + "- **DateTime:** The timestamp of the measurement\n", + "\n", + "The first five rows are displayed below:" + ] + }, + { + "cell_type": "code", + "id": "cdf7447636c43830", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:16.561059Z", + "start_time": "2026-01-30T11:13:16.555687Z" + } + }, + "source": [ + "calibration_metadata.head()" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " Id HeliostatId Azimuth Elevation lower_left_latitude \\\n", + "0 225295 AA23 81.839158 37.047879 50.913396 \n", + "1 199617 AA23 -24.275629 48.834090 50.913396 \n", + "2 62302 AA23 -42.017068 8.527271 50.913396 \n", + "3 222963 AA23 -6.400352 62.327916 50.913392 \n", + "4 212358 AA23 66.411607 45.213617 50.913392 \n", + "\n", + " lower_left_longitude lower_left_Elevation upper_left_latitude \\\n", + "0 6.387613 135.789 50.913396 \n", + "1 6.387613 135.789 50.913396 \n", + "2 6.387613 135.789 50.913396 \n", + "3 6.387886 119.268 50.913392 \n", + "4 6.387886 119.268 50.913392 \n", + "\n", + " upper_left_longitude upper_left_Elevation upper_right_latitude \\\n", + "0 6.387613 142.175 50.913397 \n", + "1 6.387613 142.175 50.913397 \n", + "2 6.387613 142.175 50.913397 \n", + "3 6.387886 126.470 50.913392 \n", + "4 6.387886 126.470 50.913392 \n", + "\n", + " upper_right_longitude upper_right_Elevation lower_right_latitude \\\n", + "0 6.387536 142.172 50.913397 \n", + "1 6.387536 142.172 50.913397 \n", + "2 6.387536 142.172 50.913397 \n", + "3 6.387763 126.506 50.913392 \n", + "4 6.387763 126.506 50.913392 \n", + "\n", + " lower_right_longitude lower_right_Elevation DateTime \n", + "0 6.387536 135.783 2023-06-27 05:39:56+00:00 \n", + "1 6.387536 135.783 2023-04-21 10:37:26+00:00 \n", + "2 6.387536 135.783 2022-01-18 13:44:45+00:00 \n", + "3 6.387763 119.279 2023-06-16 09:48:04+00:00 \n", + "4 6.387763 119.279 2023-05-31 06:35:41+00:00 " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdHeliostatIdAzimuthElevationlower_left_latitudelower_left_longitudelower_left_Elevationupper_left_latitudeupper_left_longitudeupper_left_Elevationupper_right_latitudeupper_right_longitudeupper_right_Elevationlower_right_latitudelower_right_longitudelower_right_ElevationDateTime
0225295AA2381.83915837.04787950.9133966.387613135.78950.9133966.387613142.17550.9133976.387536142.17250.9133976.387536135.7832023-06-27 05:39:56+00:00
1199617AA23-24.27562948.83409050.9133966.387613135.78950.9133966.387613142.17550.9133976.387536142.17250.9133976.387536135.7832023-04-21 10:37:26+00:00
262302AA23-42.0170688.52727150.9133966.387613135.78950.9133966.387613142.17550.9133976.387536142.17250.9133976.387536135.7832022-01-18 13:44:45+00:00
3222963AA23-6.40035262.32791650.9133926.387886119.26850.9133926.387886126.47050.9133926.387763126.50650.9133926.387763119.2792023-06-16 09:48:04+00:00
4212358AA2366.41160745.21361750.9133926.387886119.26850.9133926.387886126.47050.9133926.387763126.50650.9133926.387763119.2792023-05-31 06:35:41+00:00
\n", + "
" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 7 + }, + { + "cell_type": "markdown", + "id": "e00b86252619e38e", + "metadata": {}, + "source": [ + "Now finally it is time to inspect the deflectometry metadata:" + ] + }, + { + "cell_type": "code", + "id": "55674ae61e53e85", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:16.621784Z", + "start_time": "2026-01-30T11:13:16.620017Z" + } + }, + "source": [ + "# Inspect the deflectometry metadata.\n", + "print(\n", + " f\"The deflectometry metadata file contains {len(deflectometry_metadata)} rows and {len(deflectometry_metadata.columns)} columns.\\n\"\n", + " f\"The columns are: {', '.join(deflectometry_metadata.columns)}\"\n", + ")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The deflectometry metadata file contains 39 rows and 6 columns.\n", + "The columns are: Id, HeliostatId, latitude, longitude, Elevation, DateTime\n" + ] + } + ], + "execution_count": 8 + }, + { + "cell_type": "markdown", + "id": "9d8b2f0fd819e366", + "metadata": {}, + "source": [ + "Again, we see more rows than the number of heliostats because some heliostats contain multiple deflectometry measurements. The columns are nearly identical to the properties metadata, with one key difference: the **Id** column refers to the *deflectometry STAC ID*, not the properties ID.\n", + "\n", + "The first five rows are displayed below:" + ] + }, + { + "cell_type": "code", + "id": "f4a270d14e53662a", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:16.679975Z", + "start_time": "2026-01-30T11:13:16.676206Z" + } + }, + "source": [ + "deflectometry_metadata.head()" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " Id HeliostatId latitude longitude \\\n", + "0 AA23-2021-10-13Z09-27-07Z-deflectometry AA23 50.913647 6.387012 \n", + "1 AA24-2021-10-13Z09-29-29Z-deflectometry AA24 50.913646 6.387075 \n", + "2 AA25-2021-10-13Z09-32-36Z-deflectometry AA25 50.913646 6.387138 \n", + "3 AA26-2021-10-13Z09-34-21Z-deflectometry AA26 50.913646 6.387200 \n", + "4 AA27-2021-10-12Z13-27-32Z-deflectometry AA27 50.913646 6.387263 \n", + "\n", + " Elevation DateTime \n", + "0 88.590057 2021-10-13 09:27:07+00:00 \n", + "1 88.599808 2021-10-13 09:29:29+00:00 \n", + "2 88.620598 2021-10-13 09:32:36+00:00 \n", + "3 88.603058 2021-10-13 09:34:21+00:00 \n", + "4 88.615654 2021-10-12 13:27:32+00:00 " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdHeliostatIdlatitudelongitudeElevationDateTime
0AA23-2021-10-13Z09-27-07Z-deflectometryAA2350.9136476.38701288.5900572021-10-13 09:27:07+00:00
1AA24-2021-10-13Z09-29-29Z-deflectometryAA2450.9136466.38707588.5998082021-10-13 09:29:29+00:00
2AA25-2021-10-13Z09-32-36Z-deflectometryAA2550.9136466.38713888.6205982021-10-13 09:32:36+00:00
3AA26-2021-10-13Z09-34-21Z-deflectometryAA2650.9136466.38720088.6030582021-10-13 09:34:21+00:00
4AA27-2021-10-12Z13-27-32Z-deflectometryAA2750.9136466.38726388.6156542021-10-12 13:27:32+00:00
\n", + "
" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 9 + }, + { + "cell_type": "markdown", + "id": "cb071d62c5c816ea", + "metadata": {}, + "source": [ + "Since we will be using the calibration dataset later, let's inspect it in more detail. Specifically, we look at how many of our heliostats have calibration measurements and how the number of calibration measurements varies across the heliostats:" + ] + }, + { + "cell_type": "code", + "id": "ae5fafd83c0957e0", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:16.727793Z", + "start_time": "2026-01-30T11:13:16.722279Z" + } + }, + "source": [ + "from IPython.display import HTML, display\n", + "\n", + "# Calculate counts once.\n", + "counts = calibration_metadata[\"HeliostatId\"].value_counts()\n", + "unique_heliostats = calibration_metadata[\"HeliostatId\"].nunique()\n", + "\n", + "# Create DataFrames for better rendering.\n", + "top_5 = counts.head(5).to_frame(name=\"Measurement Count\")\n", + "bottom_5 = counts.tail(5).to_frame(name=\"Measurement Count\")\n", + "\n", + "display(\n", + " HTML(f\"\"\"\n", + "

Unique Heliostats: {unique_heliostats}

\n", + "
\n", + "
\n", + " Top 5 (Most Measurements)\n", + " {top_5.to_html()}\n", + "
\n", + "
\n", + " Bottom 5 (Least Measurements)\n", + " {bottom_5.to_html()}\n", + "
\n", + "
\n", + "\"\"\")\n", + ")" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "

Unique Heliostats: 27

\n", + "
\n", + "
\n", + " Top 5 (Most Measurements)\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Measurement Count
HeliostatId
AA23262
AA24228
AA51223
AA45215
AA49214
\n", + "
\n", + "
\n", + " Bottom 5 (Least Measurements)\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Measurement Count
HeliostatId
AA42140
AA39139
AA31135
AA4195
AA431
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "execution_count": 10 + }, + { + "cell_type": "markdown", + "id": "b0d59ca652a6474c", + "metadata": {}, + "source": [ + "**Key takeaways for the next steps:**\n", + "- We have 27 heliostats with calibration measurements.\n", + "- One heliostat (AA43) has only a single calibration measurement; the rest has at least 95 each." + ] + }, + { + "cell_type": "markdown", + "id": "5f668d8bbd3cbe87", + "metadata": {}, + "source": [ + "## Creating a Calibration Dataset Split from the Metadata\n", + "\n", + "Now that we have inspected the metadata, we can create a dataset split for the calibration data. To summarize the nature of calibration data:\n", + "- Heliostats often have unknown offsets and deformations, which cause deviations from the intended pointing direction.\n", + "- Power plant operators use *calibration targets* to capture photos of the flux image generated by a single heliostat. These images help determine pointing errors.\n", + "- Multiple measurements are taken across different times and seasons to assist with operations.\n", + "\n", + "This data is ideal for machine learning. We can use these images to train algorithms that improve power plant operation.\n", + "\n", + "For such algorithms, training, validation, and test splits are required. ``PAINT`` provides multiple splitting methods, including the *Azimuth Split*, *Solstice Split*, *Balanced Split*, and *High-Variance Split* (see the [documentation here](https://paint.readthedocs.io/en/latest/splitter.html)).\n", + "\n", + "In this tutorial, we will use the **Balanced Split**, which uses k-means clustering on the azimuth and elevation features to ensure a stratified selection:\n", + " - Data is clustered into ``validation_size`` clusters.\n", + " - One data point per cluster is selected for the **validation** subset.\n", + " - A distinct point from the same cluster is selected for the **test** subset (if possible).\n", + " - Missing test samples are filled from the overall pool to maintain balance.\n", + " - Remaining data points are assigned to the **training** subset." + ] + }, + { + "cell_type": "code", + "id": "c9bed2babdbf9116", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:19.261389Z", + "start_time": "2026-01-30T11:13:16.985615Z" + } + }, + "source": [ + "# Import paint mappings and dataset splitter.\n", + "import paint.util.paint_mappings as mappings\n", + "from paint.data.dataset_splits import DatasetSplitter\n", + "\n", + "# Set train and validation size.\n", + "training_size = 25\n", + "validation_size = 5\n", + "\n", + "# Create the dataset splitter.\n", + "splitter = DatasetSplitter(\n", + " input_file=calibration_metadata_file, output_dir=download_path\n", + ")\n", + "\n", + "# Perform the balanced split.\n", + "split_data = splitter.get_dataset_splits(\n", + " split_type=mappings.BALANCED_SPLIT,\n", + " training_size=training_size,\n", + " validation_size=validation_size,\n", + ")\n", + "\n", + "# Inspect the size of the splits.\n", + "split_data.Split.value_counts()" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "Split\n", + "train 650\n", + "validation 130\n", + "test 130\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 11 + }, + { + "cell_type": "markdown", + "id": "64a207a4d996fcbd", + "metadata": {}, + "source": [ + "**Verifying the Split**\n", + "\n", + "Does this output align with our earlier analysis?\n", + "- We have 27 heliostats with calibration measurements, but one (AA43) had only a single measurement.\n", + "- Because our validation size is 5, any heliostat with fewer than 5 measurements is excluded.\n", + "- This leaves 26 heliostats. With a validation size of 5 (and a matching test size of 5), we expect: $26 \\times 5 = 130$ samples for both validation and test sets.\n", + "- With a training size of 25, we expect: $25 \\times 26 = 650$ training samples.\n", + "\n", + "The numbers match! The split data has been automatically saved as a CSV in your download path (e.g., `benchmark_split-balanced_train-25_validation-5.csv`)." + ] + }, + { + "cell_type": "markdown", + "id": "95c3a3b0a33e56c8", + "metadata": {}, + "source": [ + "## Creating a Dataset\n", + "\n", + "Now that we have defined our splits, we can use the built-in ``PAINT`` functionality to create a ``torch.Dataset``. While there are several ways to create datasets (see [this tutorial](https://paint.readthedocs.io/en/latest/dataset.html)), we will use the benchmark split data we just generated.\n", + "\n", + "We must specify:\n", + "- The benchmark split file\n", + "- The root directory for downloads\n", + "- The item type\n", + "- Whether to download the data\n", + "\n", + "In this case, we use calibration images that have been cropped and centered on the **flux center of mass**. These are compressed and pre-processed, ensuring faster download times for this tutorial." + ] + }, + { + "cell_type": "code", + "id": "d57ec839e47c7668", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:39.430908Z", + "start_time": "2026-01-30T11:13:19.306597Z" + } + }, + "source": [ + "from paint.data.dataset import PaintCalibrationDataset\n", + "\n", + "# Initialize dataset from benchmark splits.\n", + "train, test, val = PaintCalibrationDataset.from_benchmark(\n", + " benchmark_file=split_data,\n", + " root_dir=download_path,\n", + " item_type=mappings.CALIBRATION_FLUX_CENTERED_IMAGE_KEY,\n", + " download=True,\n", + ")" + ], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading benchmark data for the test split: 100%|██████████| 130/130 [00:02<00:00, 45.69Item/s]\n", + "Downloading benchmark data for the train split: 100%|██████████| 650/650 [00:12<00:00, 51.56Item/s]\n", + "Downloading benchmark data for the validation split: 100%|██████████| 130/130 [00:02<00:00, 49.34Item/s]\n" + ] + } + ], + "execution_count": 12 + }, + { + "cell_type": "markdown", + "id": "e9e1c60eccb6840e", + "metadata": {}, + "source": [ + "This results in a custom dataset that implements the standard PyTorch ``__getitem__()`` method. We can easily access the data (stored as tensors) for machine learning applications.\n", + "\n", + "Below is an example of loading and plotting the first four measurements from the training dataset:" + ] + }, + { + "cell_type": "code", + "id": "cdde5d097426998f", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:39.871761Z", + "start_time": "2026-01-30T11:13:39.465263Z" + } + }, + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Create a 2x2 grid.\n", + "fig, axes = plt.subplots(2, 2, figsize=(10, 10))\n", + "\n", + "for i in range(4):\n", + " # Determine the row and column index.\n", + " ax = axes[i // 2, i % 2]\n", + "\n", + " # Grab the i-th item from your dataset.\n", + " item = train[i]\n", + "\n", + " # Convert (Channel, Height, Width) -> (Height, Width, Channel) for plotting.\n", + " img_data = item.permute(1, 2, 0).detach().cpu().numpy()\n", + "\n", + " ax.imshow(img_data)\n", + " ax.set_title(f\"Train Index: {i}\")\n", + " ax.axis(\"off\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "
" + ], + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "execution_count": 13 + }, + { + "cell_type": "markdown", + "id": "801bba9f50c235fa", + "metadata": {}, + "source": [ + "## Downloading Further Heliostat Data\n", + "\n", + "We have spent most of this tutorial looking at the calibration data, since this holds the most potential for machine learning applications. However, it is worth considering the other available data as well. We will download deflectometry data and properties data for the heliostat \"AA23\" in the following and briefly inspect it:" + ] + }, + { + "cell_type": "code", + "id": "8687d5e3ddb27454", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:42.504761Z", + "start_time": "2026-01-30T11:13:39.906286Z" + } + }, + "source": [ + "client.get_heliostat_data(\n", + " heliostats=[\"AA23\"],\n", + " collections=[mappings.SAVE_DEFLECTOMETRY.lower(), mappings.SAVE_PROPERTIES.lower()],\n", + ")" + ], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing Items in Heliostat AA23-heliostat-catalog: 100%|██████████| 1/1 [00:01<00:00, 1.85s/Item]\n", + "Processing Items in Heliostat AA23-heliostat-catalog: 100%|██████████| 1/1 [00:00<00:00, 12.34Item/s]\n" + ] + } + ], + "execution_count": 14 + }, + { + "cell_type": "markdown", + "id": "60530f3677d0c415", + "metadata": {}, + "source": [ + "### Properties Data\n", + "\n", + "There should now be a new folder in your download path called ``AA23``. Within this folder there will be two more subfolders, ``Deflectometry`` and ``Properties``.\n", + "\n", + "Let's first look at the properties data:" + ] + }, + { + "cell_type": "code", + "id": "a038b7fc9aa256d6", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:42.515152Z", + "start_time": "2026-01-30T11:13:42.511660Z" + } + }, + "source": [ + "import json\n", + "\n", + "from IPython.display import JSON\n", + "\n", + "# Load the file.\n", + "with open(\n", + " Path(download_path) / \"AA23\" / \"Properties\" / \"AA23-heliostat-properties.json\", \"r\"\n", + ") as f:\n", + " properties_data = json.load(f)\n", + "\n", + "# Display the file (nice formatting).\n", + "JSON(properties_data)" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "application/json": { + "heliostat_position": [ + 50.9136467956509, + 6.387012480022248, + 88.59005737 + ], + "height": 2.559999942779541, + "width": 3.2200000286102295, + "initial_orientation": [ + 0.0, + -1.0, + 0.0 + ], + "kinematic_properties": { + "actuators": [ + { + "type_axis": "linear", + "min_increment": 0, + "max_increment": 69296, + "increment": 154166.6667, + "offset_shift": 0, + "initial_stroke_length": 0.075016089, + "offset": 0.335308, + "pivot_radius": 0.338095, + "radius_shift": 0, + "clockwise_axis_movement": 0, + "initial_angle": 0.005839586, + "min_movement_angle": 0.004434882, + "max_movement_angle": 1.570796327, + "movement_speed": 0 + }, + { + "type_axis": "linear", + "min_increment": 0, + "max_increment": 75451, + "increment": 154166.6667, + "offset_shift": 0, + "initial_stroke_length": 0.078892626, + "offset": 0.340771, + "pivot_radius": 0.3191, + "radius_shift": 0, + "clockwise_axis_movement": 1, + "initial_angle": 0.939715322, + "min_movement_angle": -0.95993, + "max_movement_angle": 0.929079209, + "movement_speed": 0 + } + ], + "joint_translation_e_1": 0.0, + "joint_translation_n_1": 0.0, + "joint_translation_u_1": 0.0, + "joint_translation_e_2": 0.0, + "joint_translation_n_2": 0.0, + "joint_translation_u_2": 0.0, + "concentrator_translation_e": 0.0, + "concentrator_translation_n": 0.175, + "concentrator_translation_u": 0.0 + }, + "facet_properties": { + "canting_type": "receiver canting", + "number_of_facets": 4, + "facets": [ + { + "translation_vector": [ + -0.8075, + 0.6425, + 0.0402 + ], + "canting_e": [ + 0.8024901549337139, + -0.0, + -0.003971726517017195 + ], + "canting_n": [ + 1.244240616731202E-5, + 0.6374950534642229, + 0.0025103732841759313 + ] + }, + { + "translation_vector": [ + 0.8075, + 0.6425, + 0.0402 + ], + "canting_e": [ + 0.8024901549337139, + -0.0, + 0.003971726517017195 + ], + "canting_n": [ + -1.244240616731202E-5, + 0.6374950534642229, + 0.0025103732841759313 + ] + }, + { + "translation_vector": [ + -0.8075, + -0.6425, + 0.0402 + ], + "canting_e": [ + 0.8024901549337139, + -0.0, + -0.003971726517017195 + ], + "canting_n": [ + -1.244240616731202E-5, + 0.6374950534642229, + -0.0025103732841759313 + ] + }, + { + "translation_vector": [ + 0.8075, + -0.6425, + 0.0402 + ], + "canting_e": [ + 0.8024901549337139, + -0.0, + 0.003971726517017195 + ], + "canting_n": [ + 1.244240616731202E-5, + 0.6374950534642229, + -0.0025103732841759313 + ] + } + ] + }, + "renovation": "2021-04-15" + } + }, + "execution_count": 15, + "metadata": { + "application/json": { + "expanded": false, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "execution_count": 15 + }, + { + "cell_type": "markdown", + "id": "403852905b2a9013", + "metadata": {}, + "source": [ + "The JSON output contains detailed information on the heliostat, including:\n", + "- **Position:** Its coordinates in the field\n", + "- **Dimensions:** Its height and width\n", + "- **Orientation:** Its standard initial orientation (East, North, Up coordinates)\n", + "- **Kinematics:** Properties of the actuators and joint offsets\n", + "- **Facets:** The number of facets, canting type, and translation vectors from the center\n", + "- **Renovation:** The date the heliostat was last renovated\n", + "\n", + "Detailed diagrams explaining these parameters are available on the [``PAINT`` website](https://paint-database.org/data)." + ] + }, + { + "cell_type": "markdown", + "id": "be71d0542674d482", + "metadata": {}, + "source": [ + "### Deflectometry Data\n", + "\n", + "Deflectometry data is stored in HDF5 files and contains detailed surface measurements for each heliostat facet. You will find two files in the ``Deflectometry`` folder:\n", + "- A raw deflectometry HDF5 file\n", + "- A \"filled\" HDF5 file, where missing values were substituted with ideal vectors. **Note:** This filling was performed by the measurement company using proprietary software; it is not part of the ``PAINT`` pre-processing.\n", + "\n", + "We can inspect the HDF5 structure using the helper function below:" + ] + }, + { + "cell_type": "code", + "id": "89f178d1a7eb0db3", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:13:42.981624Z", + "start_time": "2026-01-30T11:13:42.616493Z" + } + }, + "source": [ + "import h5py\n", + "\n", + "\n", + "# Helper function to print the structure of the HDF5 file.\n", + "def print_hdf5_structure(name: str, obj: h5py.Group | h5py.Dataset) -> None:\n", + " \"\"\"\n", + " Print clear summary of HDF5 file structures.\n", + "\n", + " Parameters\n", + " ----------\n", + " name : str\n", + " Name of the HDF5 element.\n", + " obj : h5py.Dataset | h5py.Group\n", + " Object to be inspected.\n", + " \"\"\"\n", + " indent = name.count(\"/\") * \" \"\n", + " if isinstance(obj, h5py.Group):\n", + " print(f\"{indent}📂 {name.split('/')[-1]}/\")\n", + " elif isinstance(obj, h5py.Dataset):\n", + " print(\n", + " f\"{indent}📄 {name.split('/')[-1]} (shape: {obj.shape}, type: {obj.dtype})\"\n", + " )\n", + "\n", + "\n", + "filename = (\n", + " Path(download_path)\n", + " / \"AA23\"\n", + " / \"Deflectometry\"\n", + " / \"AA23-filled-2021-10-13Z09-27-07Z-deflectometry.h5\"\n", + ")\n", + "\n", + "with h5py.File(filename, \"r\") as f:\n", + " print(f\"Structure of {filename}:\")\n", + " f.visititems(print_hdf5_structure)" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Structure of PAINT_tutorial_data/AA23/Deflectometry/AA23-filled-2021-10-13Z09-27-07Z-deflectometry.h5:\n", + "📂 facet1/\n", + " 📄 surface_normals (shape: (80760, 3), type: float32)\n", + " 📄 surface_points (shape: (80760, 3), type: float32)\n", + "📂 facet2/\n", + " 📄 surface_normals (shape: (80760, 3), type: float32)\n", + " 📄 surface_points (shape: (80760, 3), type: float32)\n", + "📂 facet3/\n", + " 📄 surface_normals (shape: (80760, 3), type: float32)\n", + " 📄 surface_points (shape: (80760, 3), type: float32)\n", + "📂 facet4/\n", + " 📄 surface_normals (shape: (80760, 3), type: float32)\n", + " 📄 surface_points (shape: (80760, 3), type: float32)\n" + ] + } + ], + "execution_count": 16 + }, + { + "cell_type": "markdown", + "id": "ba0f532b6760688b", + "metadata": {}, + "source": [ + "This file contains detailed measurements (80,760 points) for each of the four facets. These can be used to recreate heliostat surfaces; however, that is beyond the scope of this tutorial." + ] + }, + { + "cell_type": "markdown", + "id": "e2320d302297779c", + "metadata": {}, + "source": [ + "## Further Data\n", + "\n", + "To conclude, let's look at two additional data types available via ``PAINT``.\n", + "\n", + "### Weather Data\n", + "\n", + "Weather data is available from a station located directly next to the tower in Jülich, as well as from the nearest DWD (German Weather Service) station. The code below downloads one month of Jülich data and the complete DWD dataset:" + ] + }, + { + "cell_type": "code", + "id": "a4f7bd6ef3e58743", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:14:08.487920Z", + "start_time": "2026-01-30T11:13:42.994005Z" + } + }, + "source": [ + "from datetime import datetime\n", + "\n", + "client.get_weather_data(\n", + " data_sources=[\"Jülich\"],\n", + " start_date=datetime.strptime(\"2023-01-01Z00:00:00Z\", mappings.TIME_FORMAT),\n", + " end_date=datetime.strptime(\"2023-02-01Z00:00:00Z\", mappings.TIME_FORMAT),\n", + ")\n", + "client.get_weather_data(data_sources=[\"DWD\"])" + ], + "outputs": [], + "execution_count": 17 + }, + { + "cell_type": "markdown", + "id": "8e648142d124475", + "metadata": {}, + "source": [ + "This weather data is also in HDF5 format, but each source has a slightly different structure. We can first consider the DWD data:" + ] + }, + { + "cell_type": "code", + "id": "fe1643257ddd3be5", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:14:08.524070Z", + "start_time": "2026-01-30T11:14:08.515926Z" + } + }, + "source": [ + "dwd_weather = Path(download_path) / \"Weather\" / \"dwd-weather.h5\"\n", + "\n", + "with h5py.File(dwd_weather, \"r\") as f:\n", + " print(\"Structure of DWD Weather Station (ID 15000):\")\n", + " f.visititems(print_hdf5_structure)" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Structure of DWD Weather Station (ID 15000):\n", + "📂 15000/\n", + " 📂 cloud_cover_1h/\n", + " 📄 time (shape: (25561,), type: object)\n", + " 📄 value (shape: (25561,), type: float64)\n", + " 📂 global_radiation_10min/\n", + " 📄 time (shape: (153361,), type: object)\n", + " 📄 value (shape: (153361,), type: float64)\n", + " 📂 humidity_1h/\n", + " 📄 time (shape: (25561,), type: object)\n", + " 📄 value (shape: (25561,), type: float64)\n", + " 📂 long_wave_radiation_10min/\n", + " 📄 time (shape: (153361,), type: object)\n", + " 📄 value (shape: (153361,), type: float64)\n", + " 📂 pressure_vapor_1h/\n", + " 📄 time (shape: (25561,), type: object)\n", + " 📄 value (shape: (25561,), type: float64)\n", + " 📂 short_wave_radiation_10min/\n", + " 📄 time (shape: (153361,), type: object)\n", + " 📄 value (shape: (153361,), type: float64)\n", + " 📂 sunshine_duration_10min/\n", + " 📄 time (shape: (153361,), type: object)\n", + " 📄 value (shape: (153361,), type: float64)\n", + " 📂 visibility_range_1h/\n", + " 📄 time (shape: (25561,), type: object)\n", + " 📄 value (shape: (25561,), type: float64)\n", + " 📂 weather_type_1h/\n", + " 📄 time (shape: (25561,), type: object)\n", + " 📄 value (shape: (25561,), type: float64)\n" + ] + } + ], + "execution_count": 18 + }, + { + "cell_type": "markdown", + "id": "3f2f48a15f309ec9", + "metadata": {}, + "source": [ + "Here the data is grouped by variable and we can clearly see that there are some variables available at 10min resolution and others at 1h resolution. For each variable we have:\n", + "- A time dataset containing the time stamps for each measurement\n", + "- The value dataset containing the recorded values\n", + "\n", + "The Jülich weather data on the other hand is all at the same temporal resolution - a very high 1s resolution which results in the following structure:" + ] + }, + { + "cell_type": "code", + "id": "d29fd15549c7ad30", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:14:08.549541Z", + "start_time": "2026-01-30T11:14:08.543611Z" + } + }, + "source": [ + "juelich_weather = Path(download_path) / \"Weather\" / \"2023-01-juelich-weather.h5\"\n", + "\n", + "with h5py.File(juelich_weather, \"r\") as f:\n", + " print(\"Structure of the Jülich weather data:\")\n", + " f.visititems(print_hdf5_structure)" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Structure of the Jülich weather data:\n", + "📄 atmospheric_pressure (shape: (2678303,), type: float64)\n", + "📄 diffuse_irradiation (shape: (2678303,), type: float64)\n", + "📄 direct_irradiation (shape: (2678303,), type: float64)\n", + "📄 global_irradiation (shape: (2678303,), type: float64)\n", + "📄 precipitation (shape: (2678303,), type: float64)\n", + "📄 relative_humidity (shape: (2678303,), type: float64)\n", + "📄 temperature (shape: (2678303,), type: float64)\n", + "📄 temperature_diffuse (shape: (2678303,), type: float64)\n", + "📄 temperature_direct (shape: (2678303,), type: float64)\n", + "📄 temperature_global (shape: (2678303,), type: float64)\n", + "📄 time (shape: (2678303,), type: object)\n", + "📄 wind_direction (shape: (2678303,), type: float64)\n", + "📄 wind_speed (shape: (2678303,), type: float64)\n" + ] + } + ], + "execution_count": 19 + }, + { + "cell_type": "markdown", + "id": "eb1d987b20864b02", + "metadata": {}, + "source": [ + "The Jülich data utilizes a flatter structure. All weather variable datasets contain the values, while a single ``time`` dataset contains the associated timestamps." + ] + }, + { + "cell_type": "markdown", + "id": "a737c94d4eb7606b", + "metadata": {}, + "source": [ + "### Tower Measurements Data\n", + "\n", + "Finally, we will download a small file containing properties of the solar tower itself." + ] + }, + { + "cell_type": "code", + "id": "b5c80db545b8cc51", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:14:08.762489Z", + "start_time": "2026-01-30T11:14:08.576527Z" + } + }, + "source": [ + "client.get_tower_measurements()" + ], + "outputs": [], + "execution_count": 20 + }, + { + "cell_type": "markdown", + "id": "4a4e245c031848b9", + "metadata": {}, + "source": [ + "After running this code you should see a new JSON file ``WRI1030197-tower-measurements.json`` in your download folder. We can inspect it with the same code as before:" + ] + }, + { + "cell_type": "code", + "id": "c97107916b2c01e0", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-30T11:14:08.784215Z", + "start_time": "2026-01-30T11:14:08.780482Z" + } + }, + "source": [ + "# Load the file.\n", + "with open(Path(download_path) / \"WRI1030197-tower-measurements.json\", \"r\") as f:\n", + " tower_data = json.load(f)\n", + "\n", + "# Display the file (nice formatting).\n", + "JSON(tower_data)" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "application/json": { + "power_plant_properties": { + "ID": "WRI1030197", + "coordinates": [ + 50.913421122592574, + 6.387824755874856, + 87.0 + ] + }, + "solar_tower_juelich_upper": { + "type": "planar", + "normal_vector": [ + 0, + 1, + 0 + ], + "coordinates": { + "center": [ + 50.91339203683997, + 6.387824563513243, + 130.09766666666667 + ], + "upper_left": [ + 50.91339196507306, + 6.387885982262168, + 133.684 + ], + "upper_middle": [ + 50.91339190867827, + 6.387824583774971, + 133.71 + ], + "upper_right": [ + 50.91339211259599, + 6.387763286988281, + 133.719 + ], + "lower_left": [ + 50.913391865959426, + 6.387886052532387, + 126.476 + ], + "lower_right": [ + 50.91339215692524, + 6.387763472205384, + 126.506 + ] + } + }, + "solar_tower_juelich_lower": { + "type": "planar", + "normal_vector": [ + 0, + 1, + 0 + ], + "coordinates": { + "center": [ + 50.91339203683997, + 6.387824563513243, + 122.8815 + ], + "upper_left": [ + 50.913391865959426, + 6.387886052532387, + 126.476 + ], + "upper_right": [ + 50.91339215692524, + 6.387763472205384, + 126.506 + ], + "lower_left": [ + 50.913391839040266, + 6.387886038089168, + 119.268 + ], + "lower_middle": [ + 50.913392106574314, + 6.387824542765121, + 119.269 + ], + "lower_right": [ + 50.9133923375531, + 6.387763217765236, + 119.279 + ] + } + }, + "multi_focus_tower": { + "type": "planar", + "normal_vector": [ + 0, + 1, + 0 + ], + "coordinates": { + "center": [ + 50.91339645088695, + 6.387574436728054, + 138.97975 + ], + "upper_left": [ + 50.91339628900999, + 6.387612983329586, + 142.175 + ], + "upper_right": [ + 50.91339661677292, + 6.387536032350528, + 142.172 + ], + "lower_left": [ + 50.913396343415734, + 6.387612841591359, + 135.789 + ], + "lower_right": [ + 50.91339655432385, + 6.3875358896401675, + 135.783 + ] + } + }, + "receiver": { + "type": "convex_cylinder", + "normal_vector": [ + 0.0, + 0.90630779, + -0.42261826 + ], + "coordinates": { + "center": [ + 50.91341660151, + 6.387825304776098, + 142.22674999999998 + ], + "receiver_outer_upper_left": [ + 50.91342727218299, + 6.387856856914401, + 144.805 + ], + "receiver_outer_upper_right": [ + 50.91342773925188, + 6.387792121250146, + 144.82 + ], + "receiver_outer_lower_left": [ + 50.913405475562435, + 6.387856291534852, + 139.596 + ], + "receiver_outer_lower_right": [ + 50.91340570660374, + 6.3877922506716125, + 139.592 + ], + "receiver_inner_lower_left": [ + 50.913406544144294, + 6.387853925842859, + 139.86 + ], + "receiver_inner_lower_right": [ + 50.91340664929648, + 6.387795301404112, + 139.862 + ], + "receiver_inner_upper_left": [ + 50.91342645401072, + 6.387854205350705, + 144.592 + ], + "receiver_inner_upper_right": [ + 50.913426766473705, + 6.3877954119834275, + 144.593 + ] + } + } + } + }, + "execution_count": 21, + "metadata": { + "application/json": { + "expanded": false, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "execution_count": 21 + }, + { + "cell_type": "markdown", + "id": "8b2e4e8791029079", + "metadata": {}, + "source": [ + "This file contains crucial properties of the solar tower, including:\n", + "- **ID:** The tower identifier\n", + "- **Coordinates:** Latitude, longitude, and elevation\n", + "- **Targets:** Coordinates for the various calibration targets (corners and center) and the receiver" + ] + }, + { + "cell_type": "markdown", + "id": "7c8663ca1667f0bd", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "This concludes the tutorial. Please check our [documentation](https://paint.readthedocs.io/en/latest/usage.html) for further scripts and information. We hope you enjoy using the ``PAINT`` database!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}