From dc5a060b3057dd12b3c2782d1fb91cf50d6d8891 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Fri, 23 Jan 2026 15:14:02 +0100 Subject: [PATCH 01/21] add version attribute --- paint/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/paint/__init__.py b/paint/__init__.py index a9d1c747..003bcbd7 100644 --- a/paint/__init__.py +++ b/paint/__init__.py @@ -1,6 +1,12 @@ import os +from importlib.metadata import PackageNotFoundError, version PAINT_ROOT = f"{os.sep}".join(__file__.split(os.sep)[:-2]) """Reference to the root directory of ARTIST.""" +try: + __version__ = version("paint-csp") +except PackageNotFoundError: + # Allows running from source without installation + __version__ = "0.0.0" -__all__ = ["PAINT_ROOT", "preprocessing", "util"] +__all__ = ["PAINT_ROOT", "preprocessing", "util", "__version__"] From 250580b580715c273e3c34a91c0b7de964032221 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Fri, 23 Jan 2026 15:14:23 +0100 Subject: [PATCH 02/21] update status and version number --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 82b40713..44bb7088 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ packages = ["paint"] [project] name = "paint-csp" -version = "2.0.0" +version = "2.0.1" authors = [ { name="ARTIST Consortium", email="artist@lists.kit.edu" }, ] @@ -17,7 +17,7 @@ requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", - "Development Status :: 1 - Planning", + "Development Status :: 5 - Production/Stable", ] dependencies = [ "numpy", From f0ea548c2ba0d4efde829b8204adaf7579c8c53c Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Fri, 23 Jan 2026 15:23:07 +0100 Subject: [PATCH 03/21] add log message for location of downloaded data --- paint/data/stac_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/paint/data/stac_client.py b/paint/data/stac_client.py index ccce9941..6250a7cc 100644 --- a/paint/data/stac_client.py +++ b/paint/data/stac_client.py @@ -69,6 +69,7 @@ def __init__( self.output_dir = pathlib.Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) self.chunk_size = chunk_size + log.info(f"Initializing STAC client to download data to: {output_dir}.") @staticmethod def load_checkpoint(path: pathlib.Path) -> dict[str, Any]: From c4a92ac1d16733a46d3606524fbdf6df8b02070d Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:06:00 +0100 Subject: [PATCH 04/21] update docs --- docs/dataset.rst | 2 +- docs/splitter.rst | 4 ++- docs/usage.rst | 18 ++++++++++++- tutorials/paint_data_tutorial.ipynb | 41 +++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 tutorials/paint_data_tutorial.ipynb diff --git a/docs/dataset.rst b/docs/dataset.rst index f145c333..eeb776f8 100644 --- a/docs/dataset.rst +++ b/docs/dataset.rst @@ -32,7 +32,7 @@ There are three ways of creating a ``PaintCalibrationDataset``: 2. **From a benchmark file** - You can also create the dataset from a benchmark file (see above). In this case, the ``benchmark_file`` must be provided: + You can also create the dataset from a benchmark file (see the :information on dataset splits:`splitter` for details). In this case, the ``benchmark_file``, containing information on the train, validation, and test split must be provided: .. code-block:: python diff --git a/docs/splitter.rst b/docs/splitter.rst index 27bb5e86..5190fbce 100644 --- a/docs/splitter.rst +++ b/docs/splitter.rst @@ -34,7 +34,7 @@ Supported Splits Again, the goal is to create diverse and challenging training and validation datasets. - **Balanced Split:** - This method uses KMeans clustering on azimuth and elevation features to ensure a stratified selection. The process includes: + This method uses k-means clustering on azimuth and elevation features to ensure a stratified selection. The process includes: - Clustering the data into ``validation_size`` clusters. - Selecting one data point per cluster for the validation split. @@ -76,3 +76,5 @@ To generate the splits, simply call the ``get_dataset_splits()`` function: azimuth_splits = splitter.get_dataset_splits( split_type="azimuth", training_size=10, validation_size=30 ) + +This returns a ``pd.Dataframe`` containing information on the splits, i.e. which samples belong to which split, and also saves this information as a CSV file. diff --git a/docs/usage.rst b/docs/usage.rst index 77f8b427..6d3dd42f 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -2,7 +2,23 @@ How To Use ========== -Here, you can find an overview of how to use ``PAINT``. + +To get started with ``PAINT`` we have included a interactive notebook, which is available here: https://github.com/ARTIST-Association/PAINT/blob/main/tutorials/paint_data_tutorial.ipynb. + +This tutorial provides an interactive introduction to the PAINT database, demonstrating how to: +- Initialize the STAC Client. +- Download and inspect metadata. +- Generate calibration data splits. +- Load calibration data using a data loader. +- Download and inspect other types of PAINT data. + +To run the tutorial make sure you install the tutorial dependencies, i.e.: + +.. code-block:: console + + $ pip install "paint-csp[tutorial]" + +Most of the concepts covered in the interactive tutorial are also covered in the documentation and associated scripts listed below: .. toctree:: :maxdepth: 1 diff --git a/tutorials/paint_data_tutorial.ipynb b/tutorials/paint_data_tutorial.ipynb new file mode 100644 index 00000000..889f7ef8 --- /dev/null +++ b/tutorials/paint_data_tutorial.ipynb @@ -0,0 +1,41 @@ +{ + "cells": [ + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2026-01-23T14:24:18.330142Z", + "start_time": "2026-01-23T14:24:18.328796Z" + } + }, + "source": [ + "" + ], + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 5c95412ff5c1403480c56ad0d840a2a04c6c295a Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:06:31 +0100 Subject: [PATCH 05/21] update tutorial dependencies --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 44bb7088..0ea95ef6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ dev = [ "sphinxcontrib-napoleon", "sphinxemoji" ] +tutorial = ["jupyter"] [project.urls] Homepage = "https://github.com/ARTIST-Association/PAINT" From 73ab09ff50f3ca90aba9c84cd9775955e1668809 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:08:52 +0100 Subject: [PATCH 06/21] improve flexibility for dataset --- paint/data/dataset.py | 35 ++++++++++++++++++++++++++--------- tests/data/test_dataset.py | 20 ++++++++++++++++++++ 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/paint/data/dataset.py b/paint/data/dataset.py index 964dcdb8..fe11e6ae 100644 --- a/paint/data/dataset.py +++ b/paint/data/dataset.py @@ -142,7 +142,7 @@ def _check_accepted_keys(key: str) -> None: @classmethod def from_benchmark( cls, - benchmark_file: str | Path, + benchmark_file: str | Path | pd.DataFrame, root_dir: str | Path, item_type: str, download: bool = False, @@ -157,8 +157,8 @@ def from_benchmark( Parameters ---------- - benchmark_file : str | Path - Path to the file containing the benchmark information. + benchmark_file : str | Path | pd.DataFrame + Path to the file containing the benchmark information, or dataframe containing this information. root_dir : str | Path Directory where the dataset will be stored. item_type : str @@ -182,12 +182,29 @@ def from_benchmark( Validation dataset. """ root_dir = Path(root_dir) - log.info( - f"Begining the process of generating benchmark datasets. The file used to generate the benchmarks is:\n" - f" {benchmark_file}!" - ) - # Load the splits data. - splits = pd.read_csv(benchmark_file) + if not isinstance(benchmark_file, pd.DataFrame): + log.info( + f"Begining the process of generating benchmark datasets. The file used to generate the benchmarks is:\n" + f" {benchmark_file}!" + ) + # Load the splits data. + splits = pd.read_csv(benchmark_file) + else: + log.info( + "Begining the process of generating benchmark datasets using provided pandas dataframe!" + ) + benchmark_file.reset_index(inplace=True) + splits = benchmark_file + + expected_cols = ["Id", "HeliostatId", "Split"] + try: + pd.testing.assert_index_equal(splits.columns, pd.Index(expected_cols)) + except AssertionError as e: + raise ValueError( + f"The dataset split file provide has an incorrect schema. Please verify and try again.\n" + f"Expected: {expected_cols}\n" + f"Details: {e}" + ) # Check whether to download the data or not. if download: # pragma: no cover diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py index c5b9c30a..a296eafc 100644 --- a/tests/data/test_dataset.py +++ b/tests/data/test_dataset.py @@ -4,6 +4,7 @@ import cv2 import deepdiff +import pandas as pd import pytest import torch from torchvision import transforms @@ -191,6 +192,25 @@ def test_from_benchmark( assert len(test) == 4 assert len(val) == 3 + # Test with Pandas data frame as input instead of file. + benchmark_df = pd.read_csv( + pathlib.Path(PAINT_ROOT) + / "tests" + / "data" + / "test_data" + / "test_benchmark.csv", + index_col=0, + ) + train, test, val = PaintCalibrationDataset.from_benchmark( + benchmark_file=benchmark_df, + root_dir=pathlib.Path(PAINT_ROOT) / "tests" / "data" / "test_data" / "dataset", + item_type=item_type, + download=download, + ) + assert len(train) == 3 + assert len(test) == 4 + assert len(val) == 3 + @pytest.mark.parametrize( "item_type, heliostats", From ad157e4becb4d3e18a29acc6681b610456b6496a Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:09:28 +0100 Subject: [PATCH 07/21] fix docstring --- paint/data/dataset_splits.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/paint/data/dataset_splits.py b/paint/data/dataset_splits.py index 208b30a8..42aed11f 100644 --- a/paint/data/dataset_splits.py +++ b/paint/data/dataset_splits.py @@ -457,6 +457,11 @@ def get_dataset_splits( Size of the training split. validation_size : int Size of the validation split. + + Returns + ------- + pd.DataFrame + Data frame containing information on the dataset splits. """ allowed_split_types = [ mappings.AZIMUTH_SPLIT, From ec55d0e6c8a616ba734ba62e9f01dd9ac4d47dc0 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:09:52 +0100 Subject: [PATCH 08/21] fix bug in checkpoint deletion --- paint/data/stac_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paint/data/stac_client.py b/paint/data/stac_client.py index 6250a7cc..36861798 100644 --- a/paint/data/stac_client.py +++ b/paint/data/stac_client.py @@ -716,7 +716,7 @@ def get_heliostat_data( # Download the data for each heliostat. for heliostat_catalog in heliostat_catalogs_list: log.info(f"Processing heliostat catalog {heliostat_catalog.id}") - success = False + success = True # Download calibration data. if get_calibration: From c58c803e45bee254381b234842cab7de66867ff2 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:11:00 +0100 Subject: [PATCH 09/21] add interactive tutorial --- tutorials/paint_data_tutorial.ipynb | 1889 ++++++++++++++++++++++++++- 1 file changed, 1884 insertions(+), 5 deletions(-) diff --git a/tutorials/paint_data_tutorial.ipynb b/tutorials/paint_data_tutorial.ipynb index 889f7ef8..df61f2e4 100644 --- a/tutorials/paint_data_tutorial.ipynb +++ b/tutorials/paint_data_tutorial.ipynb @@ -1,20 +1,1899 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "23d8c4e94a4b55f4", + "metadata": {}, + "source": [ + "# PAINT Data Tutorial\n", + "\n", + "This interactive notebook provides a brief overview of the PAINT database, demonstrating how to:\n", + "- Initialize the STAC Client.\n", + "- Download and inspect metadata.\n", + "- Generate calibration data splits.\n", + "- Load calibration data using a data loader.\n", + "- Download and inspect other types of PAINT data.\n", + "\n", + "> **Note:** Python executable scripts for each step are available in the \"scripts\" folder of the PAINT GitHub. We recommend using those scripts if you plan to download and process large amounts of PAINT data." + ] + }, + { + "cell_type": "markdown", + "id": "3e82d5e92da63968", + "metadata": {}, + "source": [ + "## Getting Started\n", + "\n", + "To run this tutorial, ensure you have the ``PAINT`` tutorial dependencies installed:\n", + "```\n", + "pip install \"paint-csp[tutorial]\"\n", + "```\n", + "To verify the installation, let's import ``PAINT`` and check the version attribute:" + ] + }, { "cell_type": "code", + "execution_count": 1, "id": "initial_id", "metadata": { - "collapsed": true, "ExecuteTime": { - "end_time": "2026-01-23T14:24:18.330142Z", - "start_time": "2026-01-23T14:24:18.328796Z" + "end_time": "2026-01-23T23:37:20.553645Z", + "start_time": "2026-01-23T23:37:20.547929Z" + }, + "collapsed": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PAINT is running with version: 2.0.0\n" + ] + } + ], + "source": [ + "import paint\n", + "\n", + "print(f\"PAINT is running with version: {paint.__version__}\")" + ] + }, + { + "cell_type": "markdown", + "id": "b2a2d5be158a05b9", + "metadata": {}, + "source": "We also need to specify a directory where all downloaded data will be saved. **Update the file path below to a location that works for your system:**" + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ee4635a10ae20007", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:37:20.634385Z", + "start_time": "2026-01-23T23:37:20.632831Z" + } + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "download_path = Path(\"./PAINT_tutorial_data\")" + ] + }, + { + "cell_type": "markdown", + "id": "57e39a37b0d41383", + "metadata": {}, + "source": [ + "## Downloading Metadata\n", + "\n", + "Before working with the actual PAINT data, we should inspect the metadata to understand what is available. For this tutorial, we will focus on a small subset of heliostats: those with IDs starting with \"AA\". This includes the range from **AA23 to AA51**.\n", + "\n", + "In the next step, we will:\n", + "- Generate a list of heliostats to access.\n", + "- Create a STAC client.\n", + "- Download the metadata and save it to the specified location." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "95276b7d0af455cc", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:38:14.506706Z", + "start_time": "2026-01-23T23:37:20.642495Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No collections selected - downloading data for all collections!\n", + "Processing Heliostat Catalogs: 0%| | 0/29 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdHeliostatIdlatitudelongitudeElevationDateTime
0AA23-heliostat-propertiesAA2350.9136476.38701288.5900572021-07-20 05:09:00+00:00
1AA24-heliostat-propertiesAA2450.9136466.38707588.5998082021-07-20 05:09:00+00:00
2AA25-heliostat-propertiesAA2550.9136466.38713888.6205982021-07-20 05:09:00+00:00
3AA26-heliostat-propertiesAA2650.9136466.38720088.6030582021-07-20 05:09:00+00:00
4AA27-heliostat-propertiesAA2750.9136466.38726388.6156542021-07-20 05:09:00+00:00
\n", + "" + ], + "text/plain": [ + " Id HeliostatId latitude longitude Elevation \\\n", + "0 AA23-heliostat-properties AA23 50.913647 6.387012 88.590057 \n", + "1 AA24-heliostat-properties AA24 50.913646 6.387075 88.599808 \n", + "2 AA25-heliostat-properties AA25 50.913646 6.387138 88.620598 \n", + "3 AA26-heliostat-properties AA26 50.913646 6.387200 88.603058 \n", + "4 AA27-heliostat-properties AA27 50.913646 6.387263 88.615654 \n", + "\n", + " DateTime \n", + "0 2021-07-20 05:09:00+00:00 \n", + "1 2021-07-20 05:09:00+00:00 \n", + "2 2021-07-20 05:09:00+00:00 \n", + "3 2021-07-20 05:09:00+00:00 \n", + "4 2021-07-20 05:09:00+00:00 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "properties_metadata.head()" + ] + }, + { + "cell_type": "markdown", + "id": "3b7c1e4028dc1cf9", + "metadata": {}, + "source": "Above we can see the first five rows of this metadata table. Now lets look at the calibration metadata:" + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e6ef7037a3e832e0", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:20.866969Z", + "start_time": "2026-01-23T23:43:20.865094Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The Calibration metadata file contains 4691 rows and 17 columns.\n", + "The columns are: Id, HeliostatId, Azimuth, Elevation, lower_left_latitude, lower_left_longitude, lower_left_Elevation, upper_left_latitude, upper_left_longitude, upper_left_Elevation, upper_right_latitude, upper_right_longitude, upper_right_Elevation, lower_right_latitude, lower_right_longitude, lower_right_Elevation, DateTime\n" + ] + } + ], + "source": [ + "# Inspect the calibration metadata.\n", + "print(\n", + " f\"The Calibration metadata file contains {len(calibration_metadata)} rows and {len(calibration_metadata.columns)} columns.\\n\"\n", + " f\"The columns are: {', '.join(calibration_metadata.columns)}\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e21629a42491489b", + "metadata": {}, + "source": [ + "This dataframe contains significantly more rows because there are often multiple calibration measurements for each heliostat. The columns include:\n", + "- **Id:** The measurement ID of the calibration measurement.\n", + "- **HeliostatId:** The ID of the heliostat used for this measurement.\n", + "- **Azimuth:** The sun's azimuth at the time of measurement.\n", + "- **Elevation:** The sun's elevation at the time of measurement.\n", + "- **Target Coordinates:** The latitude, longitude, and elevation for the *lower_left*, *upper_left*, *upper_right*, and *lower_right* corners of the calibration target.\n", + "- **DateTime:** The timestamp of the measurement.\n", + "\n", + "The first five rows are displayed below:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cdf7447636c43830", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:21.007291Z", + "start_time": "2026-01-23T23:43:20.999861Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdHeliostatIdAzimuthElevationlower_left_latitudelower_left_longitudelower_left_Elevationupper_left_latitudeupper_left_longitudeupper_left_Elevationupper_right_latitudeupper_right_longitudeupper_right_Elevationlower_right_latitudelower_right_longitudelower_right_ElevationDateTime
0225295AA2381.83915837.04787950.9133966.387613135.78950.9133966.387613142.17550.9133976.387536142.17250.9133976.387536135.7832023-06-27 05:39:56+00:00
1199617AA23-24.27562948.83409050.9133966.387613135.78950.9133966.387613142.17550.9133976.387536142.17250.9133976.387536135.7832023-04-21 10:37:26+00:00
262302AA23-42.0170688.52727150.9133966.387613135.78950.9133966.387613142.17550.9133976.387536142.17250.9133976.387536135.7832022-01-18 13:44:45+00:00
3222963AA23-6.40035262.32791650.9133926.387886119.26850.9133926.387886126.47050.9133926.387763126.50650.9133926.387763119.2792023-06-16 09:48:04+00:00
4212358AA2366.41160745.21361750.9133926.387886119.26850.9133926.387886126.47050.9133926.387763126.50650.9133926.387763119.2792023-05-31 06:35:41+00:00
\n", + "
" + ], + "text/plain": [ + " Id HeliostatId Azimuth Elevation lower_left_latitude \\\n", + "0 225295 AA23 81.839158 37.047879 50.913396 \n", + "1 199617 AA23 -24.275629 48.834090 50.913396 \n", + "2 62302 AA23 -42.017068 8.527271 50.913396 \n", + "3 222963 AA23 -6.400352 62.327916 50.913392 \n", + "4 212358 AA23 66.411607 45.213617 50.913392 \n", + "\n", + " lower_left_longitude lower_left_Elevation upper_left_latitude \\\n", + "0 6.387613 135.789 50.913396 \n", + "1 6.387613 135.789 50.913396 \n", + "2 6.387613 135.789 50.913396 \n", + "3 6.387886 119.268 50.913392 \n", + "4 6.387886 119.268 50.913392 \n", + "\n", + " upper_left_longitude upper_left_Elevation upper_right_latitude \\\n", + "0 6.387613 142.175 50.913397 \n", + "1 6.387613 142.175 50.913397 \n", + "2 6.387613 142.175 50.913397 \n", + "3 6.387886 126.470 50.913392 \n", + "4 6.387886 126.470 50.913392 \n", + "\n", + " upper_right_longitude upper_right_Elevation lower_right_latitude \\\n", + "0 6.387536 142.172 50.913397 \n", + "1 6.387536 142.172 50.913397 \n", + "2 6.387536 142.172 50.913397 \n", + "3 6.387763 126.506 50.913392 \n", + "4 6.387763 126.506 50.913392 \n", + "\n", + " lower_right_longitude lower_right_Elevation DateTime \n", + "0 6.387536 135.783 2023-06-27 05:39:56+00:00 \n", + "1 6.387536 135.783 2023-04-21 10:37:26+00:00 \n", + "2 6.387536 135.783 2022-01-18 13:44:45+00:00 \n", + "3 6.387763 119.279 2023-06-16 09:48:04+00:00 \n", + "4 6.387763 119.279 2023-05-31 06:35:41+00:00 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "calibration_metadata.head()" + ] + }, + { + "cell_type": "markdown", + "id": "e00b86252619e38e", + "metadata": {}, + "source": "Now finally it is time to inspect the deflectometry metadata:" + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "55674ae61e53e85", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:21.042735Z", + "start_time": "2026-01-23T23:43:21.041333Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The Deflectometry metadata file contains 39 rows and 6 columns.\n", + "The columns are: Id, HeliostatId, latitude, longitude, Elevation, DateTime\n" + ] + } + ], + "source": [ + "# Inspect the deflectometry metadata.\n", + "print(\n", + " f\"The Deflectometry metadata file contains {len(deflectometry_metadata)} rows and {len(deflectometry_metadata.columns)} columns.\\n\"\n", + " f\"The columns are: {', '.join(deflectometry_metadata.columns)}\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9d8b2f0fd819e366", + "metadata": {}, + "source": [ + "Again, we see more rows than the number of heliostats because some heliostats contain multiple deflectometry measurements. The columns are nearly identical to the properties metadata, with one key difference: the **Id** column refers to the *deflectometry STAC ID*, not the properties ID.\n", + "\n", + "The first five rows are displayed below:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f4a270d14e53662a", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:21.092151Z", + "start_time": "2026-01-23T23:43:21.088500Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdHeliostatIdlatitudelongitudeElevationDateTime
0AA23-2021-10-13Z09-27-07Z-deflectometryAA2350.9136476.38701288.5900572021-10-13 09:27:07+00:00
1AA24-2021-10-13Z09-29-29Z-deflectometryAA2450.9136466.38707588.5998082021-10-13 09:29:29+00:00
2AA25-2021-10-13Z09-32-36Z-deflectometryAA2550.9136466.38713888.6205982021-10-13 09:32:36+00:00
3AA26-2021-10-13Z09-34-21Z-deflectometryAA2650.9136466.38720088.6030582021-10-13 09:34:21+00:00
4AA27-2021-10-12Z13-27-32Z-deflectometryAA2750.9136466.38726388.6156542021-10-12 13:27:32+00:00
\n", + "
" + ], + "text/plain": [ + " Id HeliostatId latitude longitude \\\n", + "0 AA23-2021-10-13Z09-27-07Z-deflectometry AA23 50.913647 6.387012 \n", + "1 AA24-2021-10-13Z09-29-29Z-deflectometry AA24 50.913646 6.387075 \n", + "2 AA25-2021-10-13Z09-32-36Z-deflectometry AA25 50.913646 6.387138 \n", + "3 AA26-2021-10-13Z09-34-21Z-deflectometry AA26 50.913646 6.387200 \n", + "4 AA27-2021-10-12Z13-27-32Z-deflectometry AA27 50.913646 6.387263 \n", + "\n", + " Elevation DateTime \n", + "0 88.590057 2021-10-13 09:27:07+00:00 \n", + "1 88.599808 2021-10-13 09:29:29+00:00 \n", + "2 88.620598 2021-10-13 09:32:36+00:00 \n", + "3 88.603058 2021-10-13 09:34:21+00:00 \n", + "4 88.615654 2021-10-12 13:27:32+00:00 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "deflectometry_metadata.head()" + ] + }, + { + "cell_type": "markdown", + "id": "cb071d62c5c816ea", + "metadata": {}, + "source": "Before we move on to the next step, let's inspect the calibration dataset in a bit more detail (since we will be using this more later). Specifically, how many of our heliostats have calibration measurements and how does the number of calibration measurements vary across the heliostats:" + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ae5fafd83c0957e0", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:21.147175Z", + "start_time": "2026-01-23T23:43:21.143518Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

Unique Heliostats: 27

\n", + "
\n", + "
\n", + " Top 5 (Most Measurements)\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Measurement Count
HeliostatId
AA23262
AA24228
AA51223
AA45215
AA49214
\n", + "
\n", + "
\n", + " Bottom 5 (Least Measurements)\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Measurement Count
HeliostatId
AA42140
AA39139
AA31135
AA4195
AA431
\n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import HTML, display\n", + "\n", + "# Calculate counts once.\n", + "counts = calibration_metadata[\"HeliostatId\"].value_counts()\n", + "unique_heliostats = calibration_metadata[\"HeliostatId\"].nunique()\n", + "\n", + "# Create DataFrames for better rendering.\n", + "top_5 = counts.head(5).to_frame(name=\"Measurement Count\")\n", + "bottom_5 = counts.tail(5).to_frame(name=\"Measurement Count\")\n", + "\n", + "display(\n", + " HTML(f\"\"\"\n", + "

Unique Heliostats: {unique_heliostats}

\n", + "
\n", + "
\n", + " Top 5 (Most Measurements)\n", + " {top_5.to_html()}\n", + "
\n", + "
\n", + " Bottom 5 (Least Measurements)\n", + " {bottom_5.to_html()}\n", + "
\n", + "
\n", + "\"\"\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b0d59ca652a6474c", + "metadata": {}, + "source": [ + "**Key takeaways for the next steps:**\n", + "- We have 27 heliostats with calibration measurements.\n", + "- One heliostat (AA43) has only a single calibration measurement; the rest have at least 95." + ] + }, + { + "cell_type": "markdown", + "id": "5f668d8bbd3cbe87", + "metadata": {}, + "source": [ + "## Creating a Calibration Dataset Split from the Metadata\n", + "\n", + "Now that we have inspected the metadata, we can create a dataset split for the calibration data. To summarize the nature of calibration data:\n", + "- Heliostats often have unknown offsets and deformations, meaning they do not point exactly as intended out of the box.\n", + "- Power plant operators use *calibration targets* to capture photos of the flux image generated by a single heliostat. These images help determine pointing errors.\n", + "- Multiple measurements are taken across different times and seasons to assist with operations.\n", + "\n", + "This data is ideal for machine learning. We can use these images to train algorithms that improve power plant operation.\n", + "\n", + "For such algorithms, training, validation, and test splits are required. PAINT provides multiple splitting methods, including the *Azimuth Split*, *Solstice Split*, *Balanced Split*, and *High-Variance Split* (see the [documentation here](https://paint.readthedocs.io/en/latest/splitter.html)).\n", + "\n", + "In this tutorial, we will use the **Balanced Split**, which uses k-means clustering on the azimuth and elevation features to ensure a stratified selection:\n", + " - Data is clustered into ``validation_size`` clusters.\n", + " - One data point per cluster is selected for the **validation** split.\n", + " - A distinct point from the same cluster is selected for the **test** split (if possible).\n", + " - Missing test samples are filled from the overall pool to maintain balance.\n", + " - Remaining data points are assigned to the **training** split." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c9bed2babdbf9116", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:22.598767Z", + "start_time": "2026-01-23T23:43:21.172100Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Split\n", + "train 650\n", + "validation 130\n", + "test 130\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Import paint mappings and dataset splitter\n", + "import paint.util.paint_mappings as mappings\n", + "from paint.data.dataset_splits import DatasetSplitter\n", + "\n", + "# Set train, and validation size\n", + "training_size = 25\n", + "validation_size = 5\n", + "\n", + "# Create the dataset splitter.\n", + "splitter = DatasetSplitter(\n", + " input_file=calibration_metadata_file, output_dir=download_path\n", + ")\n", + "\n", + "# Perform the balanced split.\n", + "split_data = splitter.get_dataset_splits(\n", + " split_type=mappings.BALANCED_SPLIT,\n", + " training_size=training_size,\n", + " validation_size=validation_size,\n", + ")\n", + "\n", + "# Inspect the size of the splits.\n", + "split_data.Split.value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "64a207a4d996fcbd", + "metadata": {}, + "source": [ + "**Verifying the Split**\n", + "\n", + "Does this output align with our earlier analysis?\n", + "- We have 27 heliostats with calibration measurements, but one (AA43) had only a single measurement.\n", + "- Because our validation size is 5, any heliostat with fewer than 5 measurements is excluded.\n", + "- This leaves 26 heliostats. With a validation size of 5 (and a matching test size of 5), we expect: $26 \\times 5 = 130$ samples for both validation and test sets.\n", + "- With a training size of 25, we expect: $25 \\times 26 = 650$ training samples.\n", + "\n", + "The numbers match! The split data has been automatically saved as a CSV in your download path (e.g., `benchmark_split-balanced_train-25_validation-5.csv`)." + ] + }, + { + "cell_type": "markdown", + "id": "95c3a3b0a33e56c8", + "metadata": {}, + "source": [ + "## Creating a Dataset\n", + "\n", + "Now that we have defined our splits, we can use the built-in ``PAINT`` functionality to create a ``torch.Dataset``. While there are several ways to create datasets (see [this tutorial](https://paint.readthedocs.io/en/latest/dataset.html)), we will use the benchmark split data we just generated.\n", + "\n", + "We must specify:\n", + "- The benchmark split file.\n", + "- The root directory for downloads.\n", + "- The item type.\n", + "- Whether to download the data.\n", + "\n", + "In this case, we use calibration images that have been cropped and centered on the **flux center of mass**. These are compressed and pre-processed, ensuring faster download times for this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d57ec839e47c7668", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:44.197018Z", + "start_time": "2026-01-23T23:43:22.632605Z" } }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading benchmark data for the test split: 100%|██████████| 130/130 [00:03<00:00, 40.88Item/s]\n", + "Downloading benchmark data for the train split: 100%|██████████| 650/650 [00:13<00:00, 47.44Item/s]\n", + "Downloading benchmark data for the validation split: 100%|██████████| 130/130 [00:02<00:00, 43.55Item/s]\n" + ] + } + ], + "source": [ + "from paint.data.dataset import PaintCalibrationDataset\n", + "\n", + "# Initialize dataset from benchmark splits.\n", + "train, test, val = PaintCalibrationDataset.from_benchmark(\n", + " benchmark_file=split_data,\n", + " root_dir=download_path,\n", + " item_type=mappings.CALIBRATION_FLUX_CENTERED_IMAGE_KEY,\n", + " download=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e9e1c60eccb6840e", + "metadata": {}, "source": [ - "" + "This results in a custom dataset that implements the standard PyTorch ``__getitem__()`` method. We can easily access the data (stored as tensors) for machine learning applications.\n", + "\n", + "Below is an example of loading and plotting the first four measurements from the training dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "cdde5d097426998f", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:44.655912Z", + "start_time": "2026-01-23T23:43:44.221862Z" + } + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Create a 2x2 grid\n", + "fig, axes = plt.subplots(2, 2, figsize=(10, 10))\n", + "\n", + "for i in range(4):\n", + " # Determine the row and column index.\n", + " ax = axes[i // 2, i % 2]\n", + "\n", + " # Grab the i-th item from your dataset.\n", + " item = train[i]\n", + "\n", + " # Convert (C, H, W) -> (H, W, C) for plotting.\n", + " img_data = item.permute(1, 2, 0).detach().cpu().numpy()\n", + "\n", + " ax.imshow(img_data)\n", + " ax.set_title(f\"Train Index: {i}\")\n", + " ax.axis(\"off\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b71546d8695c85", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:44.690964Z", + "start_time": "2026-01-23T23:43:44.689560Z" + } + }, "outputs": [], - "execution_count": null + "source": [] + }, + { + "cell_type": "markdown", + "id": "801bba9f50c235fa", + "metadata": {}, + "source": [ + "## Downloading Further Heliostat Data\n", + "\n", + "We have spent most of this tutorial looking at the calibration data, since this holds the most potential for machine learning applications. However, it is worth also looking at the other data available. We will download deflectometry data and properties data for the heliostat \"AA23\" in the following and briefly inspect it:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8687d5e3ddb27454", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:47.792756Z", + "start_time": "2026-01-23T23:43:44.701654Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing Items in Heliostat AA23-heliostat-catalog: 100%|██████████| 1/1 [00:02<00:00, 2.21s/Item]\n", + "Processing Items in Heliostat AA23-heliostat-catalog: 100%|██████████| 1/1 [00:00<00:00, 13.39Item/s]\n" + ] + } + ], + "source": [ + "client.get_heliostat_data(\n", + " heliostats=[\"AA23\"],\n", + " collections=[mappings.SAVE_DEFLECTOMETRY.lower(), mappings.SAVE_PROPERTIES.lower()],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "60530f3677d0c415", + "metadata": {}, + "source": [ + "### Properties Data\n", + "\n", + "There should now be a new folder in your download path called \"AA23\". Within this folder there will be two more subfolders, \"Deflectometry\" and \"Properties\".\n", + "\n", + "Let's first look at the Properties data:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a038b7fc9aa256d6", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:47.809627Z", + "start_time": "2026-01-23T23:43:47.806863Z" + } + }, + "outputs": [ + { + "data": { + "application/json": { + "facet_properties": { + "canting_type": "receiver canting", + "facets": [ + { + "canting_e": [ + 0.8024901549337139, + -0.0, + -0.003971726517017195 + ], + "canting_n": [ + 1.244240616731202E-5, + 0.6374950534642229, + 0.0025103732841759313 + ], + "translation_vector": [ + -0.8075, + 0.6425, + 0.0402 + ] + }, + { + "canting_e": [ + 0.8024901549337139, + -0.0, + 0.003971726517017195 + ], + "canting_n": [ + -1.244240616731202E-5, + 0.6374950534642229, + 0.0025103732841759313 + ], + "translation_vector": [ + 0.8075, + 0.6425, + 0.0402 + ] + }, + { + "canting_e": [ + 0.8024901549337139, + -0.0, + -0.003971726517017195 + ], + "canting_n": [ + -1.244240616731202E-5, + 0.6374950534642229, + -0.0025103732841759313 + ], + "translation_vector": [ + -0.8075, + -0.6425, + 0.0402 + ] + }, + { + "canting_e": [ + 0.8024901549337139, + -0.0, + 0.003971726517017195 + ], + "canting_n": [ + 1.244240616731202E-5, + 0.6374950534642229, + -0.0025103732841759313 + ], + "translation_vector": [ + 0.8075, + -0.6425, + 0.0402 + ] + } + ], + "number_of_facets": 4 + }, + "height": 2.559999942779541, + "heliostat_position": [ + 50.9136467956509, + 6.387012480022248, + 88.59005737 + ], + "initial_orientation": [ + 0.0, + -1.0, + 0.0 + ], + "kinematic_properties": { + "actuators": [ + { + "clockwise_axis_movement": 0, + "increment": 154166.6667, + "initial_angle": 0.005839586, + "initial_stroke_length": 0.075016089, + "max_increment": 69296, + "max_movement_angle": 1.570796327, + "min_increment": 0, + "min_movement_angle": 0.004434882, + "movement_speed": 0, + "offset": 0.335308, + "offset_shift": 0, + "pivot_radius": 0.338095, + "radius_shift": 0, + "type_axis": "linear" + }, + { + "clockwise_axis_movement": 1, + "increment": 154166.6667, + "initial_angle": 0.939715322, + "initial_stroke_length": 0.078892626, + "max_increment": 75451, + "max_movement_angle": 0.929079209, + "min_increment": 0, + "min_movement_angle": -0.95993, + "movement_speed": 0, + "offset": 0.340771, + "offset_shift": 0, + "pivot_radius": 0.3191, + "radius_shift": 0, + "type_axis": "linear" + } + ], + "concentrator_translation_e": 0.0, + "concentrator_translation_n": 0.175, + "concentrator_translation_u": 0.0, + "joint_translation_e_1": 0.0, + "joint_translation_e_2": 0.0, + "joint_translation_n_1": 0.0, + "joint_translation_n_2": 0.0, + "joint_translation_u_1": 0.0, + "joint_translation_u_2": 0.0 + }, + "renovation": "2021-04-15", + "width": 3.2200000286102295 + }, + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": { + "application/json": { + "expanded": false, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "\n", + "from IPython.display import JSON\n", + "\n", + "# Load the file.\n", + "with open(\n", + " Path(download_path) / \"AA23\" / \"Properties\" / \"AA23-heliostat-properties.json\", \"r\"\n", + ") as f:\n", + " properties_data = json.load(f)\n", + "\n", + "# Display the file (nice formatting).\n", + "JSON(properties_data)" + ] + }, + { + "cell_type": "markdown", + "id": "403852905b2a9013", + "metadata": {}, + "source": [ + "The JSON output contains detailed information on the heliostat, including:\n", + "- **Position:** Its coordinates in the field.\n", + "- **Dimensions:** Its height and width.\n", + "- **Orientation:** Its standard initial orientation (East, North, Up coordinates).\n", + "- **Kinematics:** Properties of the actuators and joint offsets.\n", + "- **Facets:** The number of facets, canting type, and translation vectors from the center.\n", + "- **Renovation:** The date the heliostat was last renovated.\n", + "\n", + "Detailed diagrams explaining these parameters are available on the [PAINT website](https://paint-database.org/data)." + ] + }, + { + "cell_type": "markdown", + "id": "be71d0542674d482", + "metadata": {}, + "source": [ + "### Deflectometry Data\n", + "\n", + "Deflectometry data is stored in HDF5 files and contains detailed surface measurements for each heliostat facet. You will find two files in the \"Deflectometry\" folder:\n", + "- A raw deflectometry HDF5 file.\n", + "- A \"filled\" HDF5 file, where missing values were substituted with ideal vectors. **Note:** This filling was performed by the measurement company using proprietary software; it is not part of the PAINT pre-processing.\n", + "\n", + "We can inspect the HDF5 structure using the helper function below:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "89f178d1a7eb0db3", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:43:48.099415Z", + "start_time": "2026-01-23T23:43:47.889543Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Structure of PAINT_tutorial_data/AA23/Deflectometry/AA23-filled-2021-10-13Z09-27-07Z-deflectometry.h5:\n", + "📂 facet1/\n", + " 📄 surface_normals (shape: (80760, 3), type: float32)\n", + " 📄 surface_points (shape: (80760, 3), type: float32)\n", + "📂 facet2/\n", + " 📄 surface_normals (shape: (80760, 3), type: float32)\n", + " 📄 surface_points (shape: (80760, 3), type: float32)\n", + "📂 facet3/\n", + " 📄 surface_normals (shape: (80760, 3), type: float32)\n", + " 📄 surface_points (shape: (80760, 3), type: float32)\n", + "📂 facet4/\n", + " 📄 surface_normals (shape: (80760, 3), type: float32)\n", + " 📄 surface_points (shape: (80760, 3), type: float32)\n" + ] + } + ], + "source": [ + "import h5py\n", + "\n", + "\n", + "# Helper function to print the structure of the HDF5 file.\n", + "def print_hdf5_structure(name: str, obj: h5py.Group | h5py.Dataset) -> None:\n", + " \"\"\"\n", + " Print clear summary HDF5 file structures.\n", + "\n", + " Parameters\n", + " ----------\n", + " name : str\n", + " Name of the HDF5 element.\n", + " obj : h5py.Dataset | h5py.Group\n", + " Object to be inspected.\n", + " \"\"\"\n", + " indent = name.count(\"/\") * \" \"\n", + " if isinstance(obj, h5py.Group):\n", + " print(f\"{indent}📂 {name.split('/')[-1]}/\")\n", + " elif isinstance(obj, h5py.Dataset):\n", + " print(\n", + " f\"{indent}📄 {name.split('/')[-1]} (shape: {obj.shape}, type: {obj.dtype})\"\n", + " )\n", + "\n", + "\n", + "filename = (\n", + " Path(download_path)\n", + " / \"AA23\"\n", + " / \"Deflectometry\"\n", + " / \"AA23-filled-2021-10-13Z09-27-07Z-deflectometry.h5\"\n", + ")\n", + "\n", + "with h5py.File(filename, \"r\") as f:\n", + " print(f\"Structure of {filename}:\")\n", + " f.visititems(print_hdf5_structure)" + ] + }, + { + "cell_type": "markdown", + "id": "ba0f532b6760688b", + "metadata": {}, + "source": "This file contains detailed measurements (80,760 points) for each of the four facets. These can be used to recreate heliostat surfaces; however, that is beyond the scope of this tutorial." + }, + { + "cell_type": "markdown", + "id": "e2320d302297779c", + "metadata": {}, + "source": [ + "## Further Data\n", + "\n", + "To conclude, let's look at two additional data types available via PAINT.\n", + "\n", + "### Weather Data\n", + "\n", + "Weather data is available from a station located directly next to the tower in Jülich, as well as from the nearest DWD (German Weather Service) station. The code below downloads one month of Jülich data and the complete DWD dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "a4f7bd6ef3e58743", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:44:16.722117Z", + "start_time": "2026-01-23T23:43:48.114591Z" + } + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "\n", + "client.get_weather_data(\n", + " data_sources=[\"Jülich\"],\n", + " start_date=datetime.strptime(\"2023-01-01Z00:00:00Z\", mappings.TIME_FORMAT),\n", + " end_date=datetime.strptime(\"2023-02-01Z00:00:00Z\", mappings.TIME_FORMAT),\n", + ")\n", + "client.get_weather_data(data_sources=[\"DWD\"])" + ] + }, + { + "cell_type": "markdown", + "id": "8e648142d124475", + "metadata": {}, + "source": "This weather data is also in HDF5 format, but each source has a slightly different structure. We can first consider the DWD data:" + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "fe1643257ddd3be5", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:44:16.738972Z", + "start_time": "2026-01-23T23:44:16.733410Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Structure of DWD Weather Station ID: 15000:\n", + "📂 15000/\n", + " 📂 cloud_cover_1h/\n", + " 📄 time (shape: (25561,), type: object)\n", + " 📄 value (shape: (25561,), type: float64)\n", + " 📂 global_radiation_10min/\n", + " 📄 time (shape: (153361,), type: object)\n", + " 📄 value (shape: (153361,), type: float64)\n", + " 📂 humidity_1h/\n", + " 📄 time (shape: (25561,), type: object)\n", + " 📄 value (shape: (25561,), type: float64)\n", + " 📂 long_wave_radiation_10min/\n", + " 📄 time (shape: (153361,), type: object)\n", + " 📄 value (shape: (153361,), type: float64)\n", + " 📂 pressure_vapor_1h/\n", + " 📄 time (shape: (25561,), type: object)\n", + " 📄 value (shape: (25561,), type: float64)\n", + " 📂 short_wave_radiation_10min/\n", + " 📄 time (shape: (153361,), type: object)\n", + " 📄 value (shape: (153361,), type: float64)\n", + " 📂 sunshine_duration_10min/\n", + " 📄 time (shape: (153361,), type: object)\n", + " 📄 value (shape: (153361,), type: float64)\n", + " 📂 visibility_range_1h/\n", + " 📄 time (shape: (25561,), type: object)\n", + " 📄 value (shape: (25561,), type: float64)\n", + " 📂 weather_type_1h/\n", + " 📄 time (shape: (25561,), type: object)\n", + " 📄 value (shape: (25561,), type: float64)\n" + ] + } + ], + "source": [ + "dwd_weather = Path(download_path) / \"Weather\" / \"dwd-weather.h5\"\n", + "\n", + "with h5py.File(dwd_weather, \"r\") as f:\n", + " print(\"Structure of DWD Weather Station ID: 15000:\")\n", + " f.visititems(print_hdf5_structure)" + ] + }, + { + "cell_type": "markdown", + "id": "3f2f48a15f309ec9", + "metadata": {}, + "source": [ + "Here the data is grouped by variable and we can clearly see that there are some variables available at 10min resolution and others at 1h resolution. For each variable we have:\n", + "- A time dataset containing the time stamps for each measurement.\n", + "- The value dataset containing the recorded values.\n", + "\n", + "The Jülich weather data on the other hand is all at the same temporal resolution - a very high one second resolution which results in the following structure:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "d29fd15549c7ad30", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:44:16.755177Z", + "start_time": "2026-01-23T23:44:16.751371Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Structure of the Jülich Weather Data:\n", + "📄 atmospheric_pressure (shape: (2678303,), type: float64)\n", + "📄 diffuse_irradiation (shape: (2678303,), type: float64)\n", + "📄 direct_irradiation (shape: (2678303,), type: float64)\n", + "📄 global_irradiation (shape: (2678303,), type: float64)\n", + "📄 precipitation (shape: (2678303,), type: float64)\n", + "📄 relative_humidity (shape: (2678303,), type: float64)\n", + "📄 temperature (shape: (2678303,), type: float64)\n", + "📄 temperature_diffuse (shape: (2678303,), type: float64)\n", + "📄 temperature_direct (shape: (2678303,), type: float64)\n", + "📄 temperature_global (shape: (2678303,), type: float64)\n", + "📄 time (shape: (2678303,), type: object)\n", + "📄 wind_direction (shape: (2678303,), type: float64)\n", + "📄 wind_speed (shape: (2678303,), type: float64)\n" + ] + } + ], + "source": [ + "juelich_weather = Path(download_path) / \"Weather\" / \"2023-01-juelich-weather.h5\"\n", + "\n", + "with h5py.File(juelich_weather, \"r\") as f:\n", + " print(\"Structure of the Jülich Weather Data:\")\n", + " f.visititems(print_hdf5_structure)" + ] + }, + { + "cell_type": "markdown", + "id": "eb1d987b20864b02", + "metadata": {}, + "source": "The Jülich data utilizes a flatter structure. All weather variable datasets contain the values, while a single ``time`` dataset contains the associated timestamps." + }, + { + "cell_type": "markdown", + "id": "a737c94d4eb7606b", + "metadata": {}, + "source": [ + "### Tower Measurements Data\n", + "\n", + "Finally, we will download a small file containing properties of the solar tower itself." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "b5c80db545b8cc51", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:44:17.020886Z", + "start_time": "2026-01-23T23:44:16.771728Z" + } + }, + "outputs": [], + "source": [ + "client.get_tower_measurements()" + ] + }, + { + "cell_type": "markdown", + "id": "4a4e245c031848b9", + "metadata": {}, + "source": "After running this code you should see a new JSON file \"WRI1030197-tower-measurements.json\" in your download folder. We can inspect it with the same code as before:" + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c97107916b2c01e0", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:44:17.035423Z", + "start_time": "2026-01-23T23:44:17.032745Z" + } + }, + "outputs": [ + { + "data": { + "application/json": { + "multi_focus_tower": { + "coordinates": { + "center": [ + 50.91339645088695, + 6.387574436728054, + 138.97975 + ], + "lower_left": [ + 50.913396343415734, + 6.387612841591359, + 135.789 + ], + "lower_right": [ + 50.91339655432385, + 6.3875358896401675, + 135.783 + ], + "upper_left": [ + 50.91339628900999, + 6.387612983329586, + 142.175 + ], + "upper_right": [ + 50.91339661677292, + 6.387536032350528, + 142.172 + ] + }, + "normal_vector": [ + 0, + 1, + 0 + ], + "type": "planar" + }, + "power_plant_properties": { + "ID": "WRI1030197", + "coordinates": [ + 50.913421122592574, + 6.387824755874856, + 87.0 + ] + }, + "receiver": { + "coordinates": { + "center": [ + 50.91341660151, + 6.387825304776098, + 142.22674999999998 + ], + "receiver_inner_lower_left": [ + 50.913406544144294, + 6.387853925842859, + 139.86 + ], + "receiver_inner_lower_right": [ + 50.91340664929648, + 6.387795301404112, + 139.862 + ], + "receiver_inner_upper_left": [ + 50.91342645401072, + 6.387854205350705, + 144.592 + ], + "receiver_inner_upper_right": [ + 50.913426766473705, + 6.3877954119834275, + 144.593 + ], + "receiver_outer_lower_left": [ + 50.913405475562435, + 6.387856291534852, + 139.596 + ], + "receiver_outer_lower_right": [ + 50.91340570660374, + 6.3877922506716125, + 139.592 + ], + "receiver_outer_upper_left": [ + 50.91342727218299, + 6.387856856914401, + 144.805 + ], + "receiver_outer_upper_right": [ + 50.91342773925188, + 6.387792121250146, + 144.82 + ] + }, + "normal_vector": [ + 0.0, + 0.90630779, + -0.42261826 + ], + "type": "convex_cylinder" + }, + "solar_tower_juelich_lower": { + "coordinates": { + "center": [ + 50.91339203683997, + 6.387824563513243, + 122.8815 + ], + "lower_left": [ + 50.913391839040266, + 6.387886038089168, + 119.268 + ], + "lower_middle": [ + 50.913392106574314, + 6.387824542765121, + 119.269 + ], + "lower_right": [ + 50.9133923375531, + 6.387763217765236, + 119.279 + ], + "upper_left": [ + 50.913391865959426, + 6.387886052532387, + 126.476 + ], + "upper_right": [ + 50.91339215692524, + 6.387763472205384, + 126.506 + ] + }, + "normal_vector": [ + 0, + 1, + 0 + ], + "type": "planar" + }, + "solar_tower_juelich_upper": { + "coordinates": { + "center": [ + 50.91339203683997, + 6.387824563513243, + 130.09766666666667 + ], + "lower_left": [ + 50.913391865959426, + 6.387886052532387, + 126.476 + ], + "lower_right": [ + 50.91339215692524, + 6.387763472205384, + 126.506 + ], + "upper_left": [ + 50.91339196507306, + 6.387885982262168, + 133.684 + ], + "upper_middle": [ + 50.91339190867827, + 6.387824583774971, + 133.71 + ], + "upper_right": [ + 50.91339211259599, + 6.387763286988281, + 133.719 + ] + }, + "normal_vector": [ + 0, + 1, + 0 + ], + "type": "planar" + } + }, + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": { + "application/json": { + "expanded": false, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "# Load the file.\n", + "with open(Path(download_path) / \"WRI1030197-tower-measurements.json\", \"r\") as f:\n", + " tower_data = json.load(f)\n", + "\n", + "# Display the file (nice formatting).\n", + "JSON(tower_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24bf47c0dcd6f428", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-23T23:44:17.102353Z", + "start_time": "2026-01-23T23:44:17.100369Z" + }, + "SqlCellData": { + "variableName$1": "df_sql1" + } + }, + "outputs": [], + "source": "%%sql\n" + }, + { + "cell_type": "markdown", + "id": "8b2e4e8791029079", + "metadata": {}, + "source": [ + "This file contains crucial properties of the solar tower, including:\n", + "- **ID:** The tower identifier.\n", + "- **Coordinates:** Latitude, longitude, and elevation.\n", + "- **Targets:** Coordinates for the various calibration targets (corners and center) and the receiver." + ] + }, + { + "cell_type": "markdown", + "id": "7c8663ca1667f0bd", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "This concludes the tutorial. Please check our [documentation](https://paint.readthedocs.io/en/latest/usage.html) for further scripts and information. We hope you enjoy using the PAINT database!" + ] } ], "metadata": { From 816b00a2e66b15ca605329327135e46fd23701fe Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:16:54 +0100 Subject: [PATCH 10/21] update code of conduct --- CODE_OF_CONDUCT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 6de56c5c..4c8e3bc4 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -60,7 +60,7 @@ representative at an online or offline event. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at -max.pargmann@dlr.de. +artist@lists.kit.edu. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the From 40609f7b3b0e7766191dd12b11a22bc40a5f78f7 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:17:09 +0100 Subject: [PATCH 11/21] update readme --- README.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5887090d..82460be6 100644 --- a/README.md +++ b/README.md @@ -71,23 +71,27 @@ The ``PAINT`` repository is structured as shown below: . ├── html # Code for the paint-database.org website ├── markers # Saved markers for the WRI1030197 power plant in Jülich -├── paint # Python package +├── paint # Python package/ │ ├── data │ ├── preprocessing │ └── util ├── plots # Scripts used to generate plots found in our paper ├── preprocessing-scripts # Scripts used for preprocessing and STAC generation ├── scripts # Scripts highlighting example usage of the data -└── test # Tests for the python package - ├── data - ├── preprocessing - └── util +├── test # Tests for the python package/ +│ ├── data +│ ├── preprocessing +│ └── util +└── tutorials # Interactive notebooks showcasing how to get started with PAINT ``` ### Example usage: In the ``scripts`` folder there are multiple scripts highlighting how ``PAINT`` can be used. Detailed descriptions of these scripts are available via our [Documentation](http://paint.readthedocs.io). +Furthermore, an interactive notebook is available in the ``tutorials`` folder - this is the perfect starting point to +dive into ``PAINT``! + ## How to contribute Check out our [contribution guidelines](CONTRIBUTING.md) if you are interested in contributing to the `PAINT` project :fire:. Please also carefully check our [code of conduct](CODE_OF_CONDUCT.md) :blue_heart:. From 726526a2103fff4d16da58de7b1500fbe95c7fa4 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:17:24 +0100 Subject: [PATCH 12/21] update security support --- SECURITY.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 55a3284c..529845b2 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,10 +2,15 @@ ## Supported Versions -We are currently supporting ``PAINT 1.0.0`` +We are currently supporting ``PAINT 2.0.1`` | Version | Supported | -| ------- | ------------------ | +|---------| ------------------ | +| 2.0.1 | :white_check_mark: | +| 2.0.0 | :white_check_mark: | +| 1.0.3 | :white_check_mark: | +| 1.0.2 | :white_check_mark: | +| 1.0.1 | :white_check_mark: | | 1.0.0 | :white_check_mark: | ## Reporting a Vulnerability From ad85d750803468dc7bad950400eee61e741ace53 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:19:55 +0100 Subject: [PATCH 13/21] fix formatting --- tutorials/paint_data_tutorial.ipynb | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/tutorials/paint_data_tutorial.ipynb b/tutorials/paint_data_tutorial.ipynb index df61f2e4..3e802417 100644 --- a/tutorials/paint_data_tutorial.ipynb +++ b/tutorials/paint_data_tutorial.ipynb @@ -1114,19 +1114,6 @@ "plt.show()" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "b71546d8695c85", - "metadata": { - "ExecuteTime": { - "end_time": "2026-01-23T23:43:44.690964Z", - "start_time": "2026-01-23T23:43:44.689560Z" - } - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "id": "801bba9f50c235fa", @@ -1858,22 +1845,6 @@ "JSON(tower_data)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "24bf47c0dcd6f428", - "metadata": { - "ExecuteTime": { - "end_time": "2026-01-23T23:44:17.102353Z", - "start_time": "2026-01-23T23:44:17.100369Z" - }, - "SqlCellData": { - "variableName$1": "df_sql1" - } - }, - "outputs": [], - "source": "%%sql\n" - }, { "cell_type": "markdown", "id": "8b2e4e8791029079", From 2692a40bfce404a8d4eb1eaa332a9da2cf0efcd8 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Sat, 24 Jan 2026 01:38:07 +0100 Subject: [PATCH 14/21] fix test coverage --- tests/data/test_dataset.py | 21 +++++++++++++++++++++ tests/test_package.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 tests/test_package.py diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py index a296eafc..c024b9a7 100644 --- a/tests/data/test_dataset.py +++ b/tests/data/test_dataset.py @@ -304,3 +304,24 @@ def test_str_method() -> None: "-The dataset contains 4 items\n" ) assert str(dataset) == expected + + +def test_from_benchmark_fails_with_incorrect_dataframe( + tmp_path: pathlib.Path, +) -> None: + """ + Verifies that from_benchmark raises ValueError when the input DataFrame has incorrect columns. + + Parameters + ---------- + tmp_path : pathlib.Path + Fixture to the temporary folder. + """ + # Create invalid data frame. + invalid_df = pd.DataFrame(columns=["Id", "HeliostatId", "WrongCol"]) + + # Expect a ValueError. + with pytest.raises(ValueError, match="incorrect schema"): + PaintCalibrationDataset.from_benchmark( + benchmark_file=invalid_df, root_dir=tmp_path, item_type="raw_image" + ) diff --git a/tests/test_package.py b/tests/test_package.py new file mode 100644 index 00000000..ef47427a --- /dev/null +++ b/tests/test_package.py @@ -0,0 +1,33 @@ +import importlib +import importlib.metadata +from importlib.metadata import PackageNotFoundError +from unittest.mock import MagicMock + +import pytest + +import paint + + +def test_version_fallback_when_package_missing(monkeypatch: pytest.MonkeyPatch) -> None: + """ + Verifies that __version__ falls back to '0.0.0' if the package is not installed. + + This test mocks importlib.metadata.version to raise PackageNotFoundError, + then reloads the module to trigger the except block. + + Parameters + ---------- + monkeypatch: pytest.MonkeyPatch + MonkeyPatch fixture. + """ + # Create a mock that raises the specific error. + mock_raiser = MagicMock(side_effect=PackageNotFoundError) + + # Apply the mock to the standard library function. + monkeypatch.setattr(importlib.metadata, "version", mock_raiser) + + # Reload the module to force the top-level try/except block to run again. + importlib.reload(paint) + + # Assert the fallback behavior. + assert paint.__version__ == "0.0.0" From 4a9faf853cf75d111585fb72e3e14a7072486a30 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Tue, 27 Jan 2026 11:50:31 +0100 Subject: [PATCH 15/21] fix error loading power plant position --- plots/04_create_distribution_plots.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/plots/04_create_distribution_plots.py b/plots/04_create_distribution_plots.py index 744d54ec..ebd30eb4 100644 --- a/plots/04_create_distribution_plots.py +++ b/plots/04_create_distribution_plots.py @@ -79,9 +79,8 @@ def __init__( self.output_path.mkdir(parents=True, exist_ok=True) self.figure_size = (4, 4) - self.data = self._load_data() - # Power plant position as tensor + # Power plant position as tensor. power_plant_lat, power_plant_lon = convert_gk_to_lat_lon( mappings.GK_RIGHT_BASE, mappings.GK_HEIGHT_BASE ) @@ -92,6 +91,10 @@ def __init__( mappings.POWER_PLANT_ALT, ] ) + + # Load data. + self.data = self._load_data() + # Precompute receiver corners once self.receiver_coordinates = [ convert_wgs84_coordinates_to_local_enu( From 5bb92fbadfe0d3ff5544846de31ebd0c039f51a4 Mon Sep 17 00:00:00 2001 From: Marie Weiel Date: Thu, 29 Jan 2026 11:48:08 +0100 Subject: [PATCH 16/21] polish language, fix typos, etc. --- tutorials/paint_data_tutorial.ipynb | 262 +++++++++++++++------------- 1 file changed, 140 insertions(+), 122 deletions(-) diff --git a/tutorials/paint_data_tutorial.ipynb b/tutorials/paint_data_tutorial.ipynb index 3e802417..78fa5cb9 100644 --- a/tutorials/paint_data_tutorial.ipynb +++ b/tutorials/paint_data_tutorial.ipynb @@ -8,13 +8,13 @@ "# PAINT Data Tutorial\n", "\n", "This interactive notebook provides a brief overview of the PAINT database, demonstrating how to:\n", - "- Initialize the STAC Client.\n", + "- Initialize the STAC client.\n", "- Download and inspect metadata.\n", "- Generate calibration data splits.\n", - "- Load calibration data using a data loader.\n", + "- Load calibration data using a dataloader.\n", "- Download and inspect other types of PAINT data.\n", "\n", - "> **Note:** Python executable scripts for each step are available in the \"scripts\" folder of the PAINT GitHub. We recommend using those scripts if you plan to download and process large amounts of PAINT data." + "> **Note:** Python executable scripts for each step are available in the ``scripts`` folder of the [PAINT GitHub](https://github.com/ARTIST-Association/PAINT/tree/main/scripts). We recommend using those scripts if you plan to download and process large amounts of PAINT data." ] }, { @@ -39,15 +39,14 @@ "ExecuteTime": { "end_time": "2026-01-23T23:37:20.553645Z", "start_time": "2026-01-23T23:37:20.547929Z" - }, - "collapsed": true + } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "PAINT is running with version: 2.0.0\n" + "PAINT is running with version: 2.0.1\n" ] } ], @@ -61,7 +60,9 @@ "cell_type": "markdown", "id": "b2a2d5be158a05b9", "metadata": {}, - "source": "We also need to specify a directory where all downloaded data will be saved. **Update the file path below to a location that works for your system:**" + "source": [ + "We also need to specify a directory where all downloaded data will be saved. **Update the file path below to a location that works for your system:**" + ] }, { "cell_type": "code", @@ -87,7 +88,7 @@ "source": [ "## Downloading Metadata\n", "\n", - "Before working with the actual PAINT data, we should inspect the metadata to understand what is available. For this tutorial, we will focus on a small subset of heliostats: those with IDs starting with \"AA\". This includes the range from **AA23 to AA51**.\n", + "Before working with the actual PAINT data, we will inspect the metadata to understand what is available. For this tutorial, we will focus on a small subset of heliostats: those with IDs starting with \"AA\". This includes the range from **AA23 to AA51**.\n", "\n", "In the next step, we will:\n", "- Generate a list of heliostats to access.\n", @@ -111,35 +112,35 @@ "output_type": "stream", "text": [ "No collections selected - downloading data for all collections!\n", - "Processing Heliostat Catalogs: 0%| | 0/29 [00:00" ] @@ -1093,7 +1103,7 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", - "# Create a 2x2 grid\n", + "# Create a 2x2 grid.\n", "fig, axes = plt.subplots(2, 2, figsize=(10, 10))\n", "\n", "for i in range(4):\n", @@ -1121,7 +1131,7 @@ "source": [ "## Downloading Further Heliostat Data\n", "\n", - "We have spent most of this tutorial looking at the calibration data, since this holds the most potential for machine learning applications. However, it is worth also looking at the other data available. We will download deflectometry data and properties data for the heliostat \"AA23\" in the following and briefly inspect it:" + "We have spent most of this tutorial looking at the calibration data, since this holds the most potential for machine learning applications. However, it is worth considering the other available data as well. We will download deflectometry data and properties data for the heliostat \"AA23\" in the following and briefly inspect it:" ] }, { @@ -1139,8 +1149,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Processing Items in Heliostat AA23-heliostat-catalog: 100%|██████████| 1/1 [00:02<00:00, 2.21s/Item]\n", - "Processing Items in Heliostat AA23-heliostat-catalog: 100%|██████████| 1/1 [00:00<00:00, 13.39Item/s]\n" + "Processing Items in Heliostat AA23-heliostat-catalog: 100%|█| 1/1 [00:02<00:00, 2.\n", + "Processing Items in Heliostat AA23-heliostat-catalog: 100%|█| 1/1 [00:00<00:00, 7.\n" ] } ], @@ -1158,9 +1168,9 @@ "source": [ "### Properties Data\n", "\n", - "There should now be a new folder in your download path called \"AA23\". Within this folder there will be two more subfolders, \"Deflectometry\" and \"Properties\".\n", + "There should now be a new folder in your download path called ``AA23``. Within this folder there will be two more subfolders, ``Deflectometry`` and ``Properties``.\n", "\n", - "Let's first look at the Properties data:" + "Let's first look at the properties data:" ] }, { @@ -1183,11 +1193,11 @@ { "canting_e": [ 0.8024901549337139, - -0.0, + 0, -0.003971726517017195 ], "canting_n": [ - 1.244240616731202E-5, + 1.244240616731202e-05, 0.6374950534642229, 0.0025103732841759313 ], @@ -1200,11 +1210,11 @@ { "canting_e": [ 0.8024901549337139, - -0.0, + 0, 0.003971726517017195 ], "canting_n": [ - -1.244240616731202E-5, + -1.244240616731202e-05, 0.6374950534642229, 0.0025103732841759313 ], @@ -1217,11 +1227,11 @@ { "canting_e": [ 0.8024901549337139, - -0.0, + 0, -0.003971726517017195 ], "canting_n": [ - -1.244240616731202E-5, + -1.244240616731202e-05, 0.6374950534642229, -0.0025103732841759313 ], @@ -1234,11 +1244,11 @@ { "canting_e": [ 0.8024901549337139, - -0.0, + 0, 0.003971726517017195 ], "canting_n": [ - 1.244240616731202E-5, + 1.244240616731202e-05, 0.6374950534642229, -0.0025103732841759313 ], @@ -1258,9 +1268,9 @@ 88.59005737 ], "initial_orientation": [ - 0.0, - -1.0, - 0.0 + 0, + -1, + 0 ], "kinematic_properties": { "actuators": [ @@ -1297,15 +1307,15 @@ "type_axis": "linear" } ], - "concentrator_translation_e": 0.0, + "concentrator_translation_e": 0, "concentrator_translation_n": 0.175, - "concentrator_translation_u": 0.0, - "joint_translation_e_1": 0.0, - "joint_translation_e_2": 0.0, - "joint_translation_n_1": 0.0, - "joint_translation_n_2": 0.0, - "joint_translation_u_1": 0.0, - "joint_translation_u_2": 0.0 + "concentrator_translation_u": 0, + "joint_translation_e_1": 0, + "joint_translation_e_2": 0, + "joint_translation_n_1": 0, + "joint_translation_n_2": 0, + "joint_translation_u_1": 0, + "joint_translation_u_2": 0 }, "renovation": "2021-04-15", "width": 3.2200000286102295 @@ -1345,12 +1355,12 @@ "metadata": {}, "source": [ "The JSON output contains detailed information on the heliostat, including:\n", - "- **Position:** Its coordinates in the field.\n", - "- **Dimensions:** Its height and width.\n", - "- **Orientation:** Its standard initial orientation (East, North, Up coordinates).\n", - "- **Kinematics:** Properties of the actuators and joint offsets.\n", - "- **Facets:** The number of facets, canting type, and translation vectors from the center.\n", - "- **Renovation:** The date the heliostat was last renovated.\n", + "- **Position:** Its coordinates in the field\n", + "- **Dimensions:** Its height and width\n", + "- **Orientation:** Its standard initial orientation (East, North, Up coordinates)\n", + "- **Kinematics:** Properties of the actuators and joint offsets\n", + "- **Facets:** The number of facets, canting type, and translation vectors from the center\n", + "- **Renovation:** The date the heliostat was last renovated\n", "\n", "Detailed diagrams explaining these parameters are available on the [PAINT website](https://paint-database.org/data)." ] @@ -1362,9 +1372,9 @@ "source": [ "### Deflectometry Data\n", "\n", - "Deflectometry data is stored in HDF5 files and contains detailed surface measurements for each heliostat facet. You will find two files in the \"Deflectometry\" folder:\n", - "- A raw deflectometry HDF5 file.\n", - "- A \"filled\" HDF5 file, where missing values were substituted with ideal vectors. **Note:** This filling was performed by the measurement company using proprietary software; it is not part of the PAINT pre-processing.\n", + "Deflectometry data is stored in HDF5 files and contains detailed surface measurements for each heliostat facet. You will find two files in the ``Deflectometry`` folder:\n", + "- A raw deflectometry HDF5 file\n", + "- A \"filled\" HDF5 file, where missing values were substituted with ideal vectors. **Note:** This filling was performed by the measurement company using proprietary software; it is not part of the ``PAINT`` pre-processing.\n", "\n", "We can inspect the HDF5 structure using the helper function below:" ] @@ -1407,7 +1417,7 @@ "# Helper function to print the structure of the HDF5 file.\n", "def print_hdf5_structure(name: str, obj: h5py.Group | h5py.Dataset) -> None:\n", " \"\"\"\n", - " Print clear summary HDF5 file structures.\n", + " Print clear summary of HDF5 file structures.\n", "\n", " Parameters\n", " ----------\n", @@ -1441,7 +1451,9 @@ "cell_type": "markdown", "id": "ba0f532b6760688b", "metadata": {}, - "source": "This file contains detailed measurements (80,760 points) for each of the four facets. These can be used to recreate heliostat surfaces; however, that is beyond the scope of this tutorial." + "source": [ + "This file contains detailed measurements (80,760 points) for each of the four facets. These can be used to recreate heliostat surfaces; however, that is beyond the scope of this tutorial." + ] }, { "cell_type": "markdown", @@ -1483,7 +1495,9 @@ "cell_type": "markdown", "id": "8e648142d124475", "metadata": {}, - "source": "This weather data is also in HDF5 format, but each source has a slightly different structure. We can first consider the DWD data:" + "source": [ + "This weather data is also in HDF5 format, but each source has a slightly different structure. We can first consider the DWD data:" + ] }, { "cell_type": "code", @@ -1500,7 +1514,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Structure of DWD Weather Station ID: 15000:\n", + "Structure of DWD Weather Station (ID 15000):\n", "📂 15000/\n", " 📂 cloud_cover_1h/\n", " 📄 time (shape: (25561,), type: object)\n", @@ -1536,7 +1550,7 @@ "dwd_weather = Path(download_path) / \"Weather\" / \"dwd-weather.h5\"\n", "\n", "with h5py.File(dwd_weather, \"r\") as f:\n", - " print(\"Structure of DWD Weather Station ID: 15000:\")\n", + " print(\"Structure of DWD Weather Station (ID 15000):\")\n", " f.visititems(print_hdf5_structure)" ] }, @@ -1546,10 +1560,10 @@ "metadata": {}, "source": [ "Here the data is grouped by variable and we can clearly see that there are some variables available at 10min resolution and others at 1h resolution. For each variable we have:\n", - "- A time dataset containing the time stamps for each measurement.\n", - "- The value dataset containing the recorded values.\n", + "- A time dataset containing the time stamps for each measurement\n", + "- The value dataset containing the recorded values\n", "\n", - "The Jülich weather data on the other hand is all at the same temporal resolution - a very high one second resolution which results in the following structure:" + "The Jülich weather data on the other hand is all at the same temporal resolution - a very high 1s resolution which results in the following structure:" ] }, { @@ -1567,7 +1581,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Structure of the Jülich Weather Data:\n", + "Structure of the Jülich weather data:\n", "📄 atmospheric_pressure (shape: (2678303,), type: float64)\n", "📄 diffuse_irradiation (shape: (2678303,), type: float64)\n", "📄 direct_irradiation (shape: (2678303,), type: float64)\n", @@ -1588,7 +1602,7 @@ "juelich_weather = Path(download_path) / \"Weather\" / \"2023-01-juelich-weather.h5\"\n", "\n", "with h5py.File(juelich_weather, \"r\") as f:\n", - " print(\"Structure of the Jülich Weather Data:\")\n", + " print(\"Structure of the Jülich weather data:\")\n", " f.visititems(print_hdf5_structure)" ] }, @@ -1596,7 +1610,9 @@ "cell_type": "markdown", "id": "eb1d987b20864b02", "metadata": {}, - "source": "The Jülich data utilizes a flatter structure. All weather variable datasets contain the values, while a single ``time`` dataset contains the associated timestamps." + "source": [ + "The Jülich data utilizes a flatter structure. All weather variable datasets contain the values, while a single ``time`` dataset contains the associated timestamps." + ] }, { "cell_type": "markdown", @@ -1627,7 +1643,9 @@ "cell_type": "markdown", "id": "4a4e245c031848b9", "metadata": {}, - "source": "After running this code you should see a new JSON file \"WRI1030197-tower-measurements.json\" in your download folder. We can inspect it with the same code as before:" + "source": [ + "After running this code you should see a new JSON file ``WRI1030197-tower-measurements.json`` in your download folder. We can inspect it with the same code as before:" + ] }, { "cell_type": "code", @@ -1683,7 +1701,7 @@ "coordinates": [ 50.913421122592574, 6.387824755874856, - 87.0 + 87 ] }, "receiver": { @@ -1735,7 +1753,7 @@ ] }, "normal_vector": [ - 0.0, + 0, 0.90630779, -0.42261826 ], @@ -1851,9 +1869,9 @@ "metadata": {}, "source": [ "This file contains crucial properties of the solar tower, including:\n", - "- **ID:** The tower identifier.\n", - "- **Coordinates:** Latitude, longitude, and elevation.\n", - "- **Targets:** Coordinates for the various calibration targets (corners and center) and the receiver." + "- **ID:** The tower identifier\n", + "- **Coordinates:** Latitude, longitude, and elevation\n", + "- **Targets:** Coordinates for the various calibration targets (corners and center) and the receiver" ] }, { @@ -1869,21 +1887,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.12.3" } }, "nbformat": 4, From 7860dad4298efcc18b6538afe671f15fb8655ecd Mon Sep 17 00:00:00 2001 From: Kaleb Phipps <58810809+kalebphipps@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:48:23 +0100 Subject: [PATCH 17/21] Apply suggestions from code review Co-authored-by: Marie Weiel --- docs/dataset.rst | 2 +- docs/usage.rst | 4 ++-- paint/__init__.py | 2 +- paint/data/dataset_splits.py | 2 +- plots/04_create_distribution_plots.py | 2 +- tests/data/test_dataset.py | 4 ++-- tests/test_package.py | 6 +++--- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/dataset.rst b/docs/dataset.rst index eeb776f8..1324bd27 100644 --- a/docs/dataset.rst +++ b/docs/dataset.rst @@ -32,7 +32,7 @@ There are three ways of creating a ``PaintCalibrationDataset``: 2. **From a benchmark file** - You can also create the dataset from a benchmark file (see the :information on dataset splits:`splitter` for details). In this case, the ``benchmark_file``, containing information on the train, validation, and test split must be provided: + You can also create the dataset from a benchmark file (see the :information on dataset splits:`splitter` for details). In this case, the ``benchmark_file`` containing information on the train, validation, and test split must be provided: .. code-block:: python diff --git a/docs/usage.rst b/docs/usage.rst index 6d3dd42f..873699eb 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -6,10 +6,10 @@ How To Use To get started with ``PAINT`` we have included a interactive notebook, which is available here: https://github.com/ARTIST-Association/PAINT/blob/main/tutorials/paint_data_tutorial.ipynb. This tutorial provides an interactive introduction to the PAINT database, demonstrating how to: -- Initialize the STAC Client. +- Initialize the STAC client. - Download and inspect metadata. - Generate calibration data splits. -- Load calibration data using a data loader. +- Load calibration data using a dataloader. - Download and inspect other types of PAINT data. To run the tutorial make sure you install the tutorial dependencies, i.e.: diff --git a/paint/__init__.py b/paint/__init__.py index 003bcbd7..33c01843 100644 --- a/paint/__init__.py +++ b/paint/__init__.py @@ -6,7 +6,7 @@ try: __version__ = version("paint-csp") except PackageNotFoundError: - # Allows running from source without installation + # Allows running from source without installation. __version__ = "0.0.0" __all__ = ["PAINT_ROOT", "preprocessing", "util", "__version__"] diff --git a/paint/data/dataset_splits.py b/paint/data/dataset_splits.py index 42aed11f..2436ef87 100644 --- a/paint/data/dataset_splits.py +++ b/paint/data/dataset_splits.py @@ -461,7 +461,7 @@ def get_dataset_splits( Returns ------- pd.DataFrame - Data frame containing information on the dataset splits. + Dataframe containing information on the dataset splits. """ allowed_split_types = [ mappings.AZIMUTH_SPLIT, diff --git a/plots/04_create_distribution_plots.py b/plots/04_create_distribution_plots.py index ebd30eb4..32dbf916 100644 --- a/plots/04_create_distribution_plots.py +++ b/plots/04_create_distribution_plots.py @@ -95,7 +95,7 @@ def __init__( # Load data. self.data = self._load_data() - # Precompute receiver corners once + # Precompute receiver corners once. self.receiver_coordinates = [ convert_wgs84_coordinates_to_local_enu( torch.tensor(coords), self.power_plant_position diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py index c024b9a7..4892b20b 100644 --- a/tests/data/test_dataset.py +++ b/tests/data/test_dataset.py @@ -192,7 +192,7 @@ def test_from_benchmark( assert len(test) == 4 assert len(val) == 3 - # Test with Pandas data frame as input instead of file. + # Test with Pandas dataframe as input instead of file. benchmark_df = pd.read_csv( pathlib.Path(PAINT_ROOT) / "tests" @@ -310,7 +310,7 @@ def test_from_benchmark_fails_with_incorrect_dataframe( tmp_path: pathlib.Path, ) -> None: """ - Verifies that from_benchmark raises ValueError when the input DataFrame has incorrect columns. + Verify that ``from_benchmark`` raises ``ValueError`` when the input dataframe has incorrect columns. Parameters ---------- diff --git a/tests/test_package.py b/tests/test_package.py index ef47427a..fa309df8 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -10,14 +10,14 @@ def test_version_fallback_when_package_missing(monkeypatch: pytest.MonkeyPatch) -> None: """ - Verifies that __version__ falls back to '0.0.0' if the package is not installed. + Verify that ``__version__`` falls back to '0.0.0' if the package is not installed. - This test mocks importlib.metadata.version to raise PackageNotFoundError, + This test mocks ``importlib.metadata.version`` to raise ``PackageNotFoundError``, then reloads the module to trigger the except block. Parameters ---------- - monkeypatch: pytest.MonkeyPatch + monkeypatch : pytest.MonkeyPatch MonkeyPatch fixture. """ # Create a mock that raises the specific error. From 600ab414ae846974d03588e3e65c2aa9f497922f Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Fri, 30 Jan 2026 12:10:59 +0100 Subject: [PATCH 18/21] update version --- .github/workflows/post-coverage.yml | 4 ++++ docs/conf.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/post-coverage.yml diff --git a/.github/workflows/post-coverage.yml b/.github/workflows/post-coverage.yml new file mode 100644 index 00000000..b340c3d4 --- /dev/null +++ b/.github/workflows/post-coverage.yml @@ -0,0 +1,4 @@ +name: post-coverage.yml +on: + +jobs: diff --git a/docs/conf.py b/docs/conf.py index 5c7ae1da..10181d5a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,7 +10,7 @@ project = "PAINT" copyright = f"{datetime.now().year}, ARTIST consortium" author = "ARTIST Consortium" -release = "2.0.0" +release = "2.0.1" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration From 1839d1c969dfb84fe89f0f8b98a7711320293e6a Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Fri, 30 Jan 2026 12:11:26 +0100 Subject: [PATCH 19/21] update workflow to deal with forks --- .github/workflows/python-test.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 3c0bbdce..3d3805f9 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -20,7 +20,6 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 with: - ref: ${{ github.head_ref }} fetch-depth: 0 - name: Install uv, set the python version, and enable cache @@ -40,11 +39,19 @@ jobs: coverage report -m --format markdown > cov_report.txt coverage xml - - name: Post coverage report to PR + - name: Save PR number if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest' - uses: marocchino/sticky-pull-request-comment@v2 + run: echo ${{ github.event.number }} > pr_number.txt + + - name: Save coverage report and PR number + if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest' + uses: actions/upload-artifact@v4 with: - path: cov_report.txt + name: coverage-report + path: | + cov_report.txt + pr_number.txt + retention-days: 1 - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v4.0.1 From d88032717fbd2b3d52c01ba4870aa4e389b11e99 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Fri, 30 Jan 2026 12:11:54 +0100 Subject: [PATCH 20/21] add workflow to post coverage to pr --- .github/workflows/post-coverage.yml | 33 ++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/.github/workflows/post-coverage.yml b/.github/workflows/post-coverage.yml index b340c3d4..6aab4b6e 100644 --- a/.github/workflows/post-coverage.yml +++ b/.github/workflows/post-coverage.yml @@ -1,4 +1,35 @@ -name: post-coverage.yml +name: Post coverage report to PR + on: + workflow_run: + workflows: ["Python test"] + types: + - completed + +permissions: + pull-requests: write + actions: read jobs: + comment: + runs-on: ubuntu-latest + if: > + github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' + steps: + - name: Download coverage artifact + uses: actions/download-artifact@v4 + with: + name: coverage-report + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Get PR number + id: pr_number + run: echo "number=$(cat pr_number.txt)" >> $GITHUB_OUTPUT + + - name: Post coverage report to PR + uses: marocchino/sticky-pull-request-comment@v2 + with: + path: cov_report.txt + number: ${{ steps.pr_number.outputs.number }} From c8f7ffd45860b87c152ee9967511fba1e24f7673 Mon Sep 17 00:00:00 2001 From: Kaleb Phipps Date: Fri, 30 Jan 2026 12:14:44 +0100 Subject: [PATCH 21/21] final fixes --- tutorials/paint_data_tutorial.ipynb | 1176 +++++++++++++-------------- 1 file changed, 588 insertions(+), 588 deletions(-) diff --git a/tutorials/paint_data_tutorial.ipynb b/tutorials/paint_data_tutorial.ipynb index 78fa5cb9..20af5783 100644 --- a/tutorials/paint_data_tutorial.ipynb +++ b/tutorials/paint_data_tutorial.ipynb @@ -5,16 +5,16 @@ "id": "23d8c4e94a4b55f4", "metadata": {}, "source": [ - "# PAINT Data Tutorial\n", + "# ``PAINT`` Data Tutorial\n", "\n", - "This interactive notebook provides a brief overview of the PAINT database, demonstrating how to:\n", + "This interactive notebook provides a brief overview of the ``PAINT`` database, demonstrating how to:\n", "- Initialize the STAC client.\n", "- Download and inspect metadata.\n", "- Generate calibration data splits.\n", "- Load calibration data using a dataloader.\n", - "- Download and inspect other types of PAINT data.\n", + "- Download and inspect other types of ``PAINT`` data.\n", "\n", - "> **Note:** Python executable scripts for each step are available in the ``scripts`` folder of the [PAINT GitHub](https://github.com/ARTIST-Association/PAINT/tree/main/scripts). We recommend using those scripts if you plan to download and process large amounts of PAINT data." + "> **Note:** Python executable scripts for each step are available in the ``scripts`` folder of the [PAINT GitHub](https://github.com/ARTIST-Association/PAINT/tree/main/scripts). We recommend using those scripts if you plan to download and process large amounts of ``PAINT`` data." ] }, { @@ -33,28 +33,28 @@ }, { "cell_type": "code", - "execution_count": 1, "id": "initial_id", "metadata": { "ExecuteTime": { - "end_time": "2026-01-23T23:37:20.553645Z", - "start_time": "2026-01-23T23:37:20.547929Z" + "end_time": "2026-01-30T11:12:26.352122Z", + "start_time": "2026-01-30T11:12:26.344571Z" } }, + "source": [ + "import paint\n", + "\n", + "print(f\"``PAINT`` is running with version: {paint.__version__}\")" + ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "PAINT is running with version: 2.0.1\n" + "``PAINT`` is running with version: 2.0.0\n" ] } ], - "source": [ - "import paint\n", - "\n", - "print(f\"PAINT is running with version: {paint.__version__}\")" - ] + "execution_count": 1 }, { "cell_type": "markdown", @@ -66,20 +66,20 @@ }, { "cell_type": "code", - "execution_count": 2, "id": "ee4635a10ae20007", "metadata": { "ExecuteTime": { - "end_time": "2026-01-23T23:37:20.634385Z", - "start_time": "2026-01-23T23:37:20.632831Z" + "end_time": "2026-01-30T11:12:26.361699Z", + "start_time": "2026-01-30T11:12:26.359796Z" } }, - "outputs": [], "source": [ "from pathlib import Path\n", "\n", "download_path = Path(\"./PAINT_tutorial_data\")" - ] + ], + "outputs": [], + "execution_count": 2 }, { "cell_type": "markdown", @@ -88,7 +88,7 @@ "source": [ "## Downloading Metadata\n", "\n", - "Before working with the actual PAINT data, we will inspect the metadata to understand what is available. For this tutorial, we will focus on a small subset of heliostats: those with IDs starting with \"AA\". This includes the range from **AA23 to AA51**.\n", + "Before working with the actual ``PAINT`` data, we will inspect the metadata to understand what is available. For this tutorial, we will focus on a small subset of heliostats: those with IDs starting with \"AA\". This includes the range from **AA23 to AA51**.\n", "\n", "In the next step, we will:\n", "- Generate a list of heliostats to access.\n", @@ -98,38 +98,13 @@ }, { "cell_type": "code", - "execution_count": 3, "id": "95276b7d0af455cc", "metadata": { "ExecuteTime": { - "end_time": "2026-01-23T23:38:14.506706Z", - "start_time": "2026-01-23T23:37:20.642495Z" + "end_time": "2026-01-30T11:13:16.335654Z", + "start_time": "2026-01-30T11:12:26.445975Z" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No collections selected - downloading data for all collections!\n", - "Processing Heliostat Catalogs: 14%|█▍ | 4/29 [00:03<00:33, 1.35s/ catalog]The child with ID AA37-deflectometry-collection is not available, data for this child cannot be accessed.\n", - "Processing Heliostat Catalogs: 17%|█▋ | 5/29 [00:04<00:28, 1.17s/ catalog]The child with ID AA40-deflectometry-collection is not available, data for this child cannot be accessed.\n", - "Processing Heliostat Catalogs: 45%|████ | 13/29 [00:08<00:09, 1.73 catalog/s]The child with ID AA42-deflectometry-collection is not available, data for this child cannot be accessed.\n", - "Processing Heliostat Catalogs: 48%|████▎ | 14/29 [00:09<00:07, 1.91 catalog/s]The child with ID AA41-deflectometry-collection is not available, data for this child cannot be accessed.\n", - "Processing Heliostat Catalogs: 55%|████▉ | 16/29 [00:09<00:04, 3.17 catalog/s]The child with ID AA43-deflectometry-collection is not available, data for this child cannot be accessed.\n", - "The child with ID AA48-deflectometry-collection is not available, data for this child cannot be accessed.\n", - "Processing Heliostat Catalogs: 69%|██████▏ | 20/29 [00:09<00:01, 6.40 catalog/s]The child with ID AA47-deflectometry-collection is not available, data for this child cannot be accessed.\n", - "Processing Heliostat Catalogs: 76%|██████▊ | 22/29 [00:09<00:00, 7.38 catalog/s]The child with ID AA46-deflectometry-collection is not available, data for this child cannot be accessed.\n", - "The child with ID AA45-deflectometry-collection is not available, data for this child cannot be accessed.\n", - "Processing Heliostat Catalogs: 97%|████████▋| 28/29 [00:09<00:00, 12.58 catalog/s]The child with ID AA51-deflectometry-collection is not available, data for this child cannot be accessed.\n", - "Processing Heliostat Catalogs: 100%|█████████| 29/29 [00:09<00:00, 2.91 catalog/s]\n", - "Processing Heliostat Catalogs: 0%| | 0/29 [00:00\n", "