ARTIST-Association · kalebphipps · Jan 30, 2026 · Jan 23, 2026 · Jan 23, 2026 · Jan 23, 2026
diff --git a/.github/workflows/post-coverage.yml b/.github/workflows/post-coverage.yml
@@ -0,0 +1,35 @@
+name: Post coverage report to PR
+
+on:
+  workflow_run:
+    workflows: ["Python test"]
+    types:
+      - completed
+
+permissions:
+  pull-requests: write
+  actions: read
+
+jobs:
+  comment:
+    runs-on: ubuntu-latest
+    if: >
+      github.event.workflow_run.event == 'pull_request' &&
+      github.event.workflow_run.conclusion == 'success'
+    steps:
+      - name: Download coverage artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: coverage-report
+          run-id: ${{ github.event.workflow_run.id }}
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Get PR number
+        id: pr_number
+        run: echo "number=$(cat pr_number.txt)" >> $GITHUB_OUTPUT
+
+      - name: Post coverage report to PR
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          path: cov_report.txt
+          number: ${{ steps.pr_number.outputs.number }}
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
@@ -20,7 +20,6 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
         with:
-          ref: ${{ github.head_ref }}
           fetch-depth: 0
 
       - name: Install uv, set the python version, and enable cache
@@ -40,11 +39,19 @@ jobs:
           coverage report -m --format markdown > cov_report.txt
           coverage xml
 
-      - name: Post coverage report to PR
+      - name: Save PR number
         if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest'
-        uses: marocchino/sticky-pull-request-comment@v2
+        run: echo ${{ github.event.number }} > pr_number.txt
+
+      - name: Save coverage report and PR number
+        if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest'
+        uses: actions/upload-artifact@v4
         with:
-          path: cov_report.txt
+          name: coverage-report
+          path: |
+            cov_report.txt
+            pr_number.txt
+          retention-days: 1
 
       - name: Upload coverage reports to Codecov
         uses: codecov/codecov-action@v4.0.1

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -60,7 +60,7 @@ representative at an online or offline event.
 
 Instances of abusive, harassing, or otherwise unacceptable behavior may be
 reported to the community leaders responsible for enforcement at
-max.pargmann@dlr.de.
+artist@lists.kit.edu.
 All complaints will be reviewed and investigated promptly and fairly.
 
 All community leaders are obligated to respect the privacy and security of the

diff --git a/README.md b/README.md
@@ -71,23 +71,27 @@ The ``PAINT`` repository is structured as shown below:
 .
 ├── html # Code for the paint-database.org website
 ├── markers # Saved markers for the WRI1030197 power plant in Jülich
-├── paint # Python package
+├── paint # Python package/
 │   ├── data
 │   ├── preprocessing
 │   └── util
 ├── plots # Scripts used to generate plots found in our paper
 ├── preprocessing-scripts # Scripts used for preprocessing and STAC generation
 ├── scripts # Scripts highlighting example usage of the data
-└── test # Tests for the python package
-    ├── data
-    ├── preprocessing
-    └── util
+├── test # Tests for the python package/
+│   ├── data
+│   ├── preprocessing
+│   └── util
+└── tutorials # Interactive notebooks showcasing how to get started with PAINT
 ```
 
 ### Example usage:
 In the ``scripts`` folder there are multiple scripts highlighting how ``PAINT`` can be used. Detailed
 descriptions of these scripts are available via our [Documentation](http://paint.readthedocs.io).
 
+Furthermore, an interactive notebook is available in the ``tutorials`` folder - this is the perfect starting point to
+dive into ``PAINT``!
+
 ## How to contribute
 Check out our [contribution guidelines](CONTRIBUTING.md) if you are interested in contributing to the `PAINT` project :fire:.
 Please also carefully check our [code of conduct](CODE_OF_CONDUCT.md) :blue_heart:.

diff --git a/SECURITY.md b/SECURITY.md
@@ -2,10 +2,15 @@
 
 ## Supported Versions
 
-We are currently supporting ``PAINT 1.0.0``
+We are currently supporting ``PAINT 2.0.1``
 
 | Version | Supported          |
-| ------- | ------------------ |
+|---------| ------------------ |
+| 2.0.1   | :white_check_mark: |
+| 2.0.0   | :white_check_mark: |
+| 1.0.3   | :white_check_mark: |
+| 1.0.2   | :white_check_mark: |
+| 1.0.1   | :white_check_mark: |
 | 1.0.0   | :white_check_mark: |
 
 ## Reporting a Vulnerability

diff --git a/docs/conf.py b/docs/conf.py
@@ -10,7 +10,7 @@
 project = "PAINT"
 copyright = f"{datetime.now().year}, ARTIST consortium"
 author = "ARTIST Consortium"
-release = "2.0.0"
+release = "2.0.1"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

diff --git a/docs/dataset.rst b/docs/dataset.rst
@@ -32,7 +32,7 @@ There are three ways of creating a ``PaintCalibrationDataset``:
 
 2. **From a benchmark file**
 
-   You can also create the dataset from a benchmark file (see above). In this case, the ``benchmark_file`` must be provided:
+   You can also create the dataset from a benchmark file (see the :information on dataset splits:`splitter` for details). In this case, the ``benchmark_file`` containing information on the train, validation, and test split must be provided:
 
    .. code-block:: python
 

diff --git a/docs/splitter.rst b/docs/splitter.rst
@@ -34,7 +34,7 @@ Supported Splits
   Again, the goal is to create diverse and challenging training and validation datasets.
 
 - **Balanced Split:**
-  This method uses KMeans clustering on azimuth and elevation features to ensure a stratified selection. The process includes:
+  This method uses k-means clustering on azimuth and elevation features to ensure a stratified selection. The process includes:
 
   - Clustering the data into ``validation_size`` clusters.
   - Selecting one data point per cluster for the validation split.
@@ -76,3 +76,5 @@ To generate the splits, simply call the ``get_dataset_splits()`` function:
     azimuth_splits = splitter.get_dataset_splits(
         split_type="azimuth", training_size=10, validation_size=30
     )
+
+This returns a ``pd.Dataframe`` containing information on the splits, i.e. which samples belong to which split, and also saves this information as a CSV file.
diff --git a/docs/usage.rst b/docs/usage.rst
@@ -2,7 +2,23 @@
 
 How To Use
 ==========
-Here, you can find an overview of how to use ``PAINT``.
+
+To get started with ``PAINT`` we have included a interactive notebook, which is available here: https://github.com/ARTIST-Association/PAINT/blob/main/tutorials/paint_data_tutorial.ipynb.
+
+This tutorial provides an interactive introduction to the PAINT database, demonstrating how to:
+- Initialize the STAC client.
+- Download and inspect metadata.
+- Generate calibration data splits.
+- Load calibration data using a dataloader.
+- Download and inspect other types of PAINT data.
+
+To run the tutorial make sure you install the tutorial dependencies, i.e.:
+
+.. code-block:: console
+
+    $ pip install "paint-csp[tutorial]"
+
+Most of the concepts covered in the interactive tutorial are also covered in the documentation and associated scripts listed below:
 
 .. toctree::
    :maxdepth: 1

diff --git a/paint/__init__.py b/paint/__init__.py
@@ -1,6 +1,12 @@
 import os
+from importlib.metadata import PackageNotFoundError, version
 
 PAINT_ROOT = f"{os.sep}".join(__file__.split(os.sep)[:-2])
 """Reference to the root directory of ARTIST."""
+try:
+    __version__ = version("paint-csp")
+except PackageNotFoundError:
+    # Allows running from source without installation.
+    __version__ = "0.0.0"
 
-__all__ = ["PAINT_ROOT", "preprocessing", "util"]
+__all__ = ["PAINT_ROOT", "preprocessing", "util", "__version__"]
diff --git a/paint/data/dataset.py b/paint/data/dataset.py
@@ -142,7 +142,7 @@ def _check_accepted_keys(key: str) -> None:
     @classmethod
     def from_benchmark(
         cls,
-        benchmark_file: str | Path,
+        benchmark_file: str | Path | pd.DataFrame,
         root_dir: str | Path,
         item_type: str,
         download: bool = False,
@@ -157,8 +157,8 @@ def from_benchmark(
 
         Parameters
         ----------
-        benchmark_file : str | Path
-            Path to the file containing the benchmark information.
+        benchmark_file : str | Path | pd.DataFrame
+            Path to the file containing the benchmark information, or dataframe containing this information.
         root_dir : str | Path
             Directory where the dataset will be stored.
         item_type : str
@@ -182,12 +182,29 @@ def from_benchmark(
             Validation dataset.
         """
         root_dir = Path(root_dir)
-        log.info(
-            f"Begining the process of generating benchmark datasets. The file used to generate the benchmarks is:\n"
-            f" {benchmark_file}!"
-        )
-        # Load the splits data.
-        splits = pd.read_csv(benchmark_file)
+        if not isinstance(benchmark_file, pd.DataFrame):
+            log.info(
+                f"Begining the process of generating benchmark datasets. The file used to generate the benchmarks is:\n"
+                f" {benchmark_file}!"
+            )
+            # Load the splits data.
+            splits = pd.read_csv(benchmark_file)
+        else:
+            log.info(
+                "Begining the process of generating benchmark datasets using provided pandas dataframe!"
+            )
+            benchmark_file.reset_index(inplace=True)
+            splits = benchmark_file
+
+        expected_cols = ["Id", "HeliostatId", "Split"]
+        try:
+            pd.testing.assert_index_equal(splits.columns, pd.Index(expected_cols))
+        except AssertionError as e:
+            raise ValueError(
+                f"The dataset split file provide has an incorrect schema. Please verify and try again.\n"
+                f"Expected: {expected_cols}\n"
+                f"Details: {e}"
+            )
 
         # Check whether to download the data or not.
         if download:  # pragma: no cover

diff --git a/paint/data/dataset_splits.py b/paint/data/dataset_splits.py
@@ -457,6 +457,11 @@ def get_dataset_splits(
             Size of the training split.
         validation_size : int
             Size of the validation split.
+
+        Returns
+        -------
+        pd.DataFrame
+            Dataframe containing information on the dataset splits.
         """
         allowed_split_types = [
             mappings.AZIMUTH_SPLIT,

diff --git a/paint/data/stac_client.py b/paint/data/stac_client.py
@@ -69,6 +69,7 @@ def __init__(
         self.output_dir = pathlib.Path(output_dir)
         self.output_dir.mkdir(parents=True, exist_ok=True)
         self.chunk_size = chunk_size
+        log.info(f"Initializing STAC client to download data to: {output_dir}.")
 
     @staticmethod
     def load_checkpoint(path: pathlib.Path) -> dict[str, Any]:
@@ -715,7 +716,7 @@ def get_heliostat_data(
         # Download the data for each heliostat.
         for heliostat_catalog in heliostat_catalogs_list:
             log.info(f"Processing heliostat catalog {heliostat_catalog.id}")
-            success = False
+            success = True
 
             # Download calibration data.
             if get_calibration:

diff --git a/plots/04_create_distribution_plots.py b/plots/04_create_distribution_plots.py
@@ -79,9 +79,8 @@ def __init__(
         self.output_path.mkdir(parents=True, exist_ok=True)
 
         self.figure_size = (4, 4)
-        self.data = self._load_data()
 
-        # Power plant position as tensor
+        # Power plant position as tensor.
         power_plant_lat, power_plant_lon = convert_gk_to_lat_lon(
             mappings.GK_RIGHT_BASE, mappings.GK_HEIGHT_BASE
         )
@@ -92,7 +91,11 @@ def __init__(
                 mappings.POWER_PLANT_ALT,
             ]
         )
-        # Precompute receiver corners once
+
+        # Load data.
+        self.data = self._load_data()
+
+        # Precompute receiver corners once.
         self.receiver_coordinates = [
             convert_wgs84_coordinates_to_local_enu(
                 torch.tensor(coords), self.power_plant_position

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ packages = ["paint"]
 
 [project]
 name = "paint-csp"
-version = "2.0.0"
+version = "2.0.1"
 authors = [
   { name="ARTIST Consortium", email="artist@lists.kit.edu" },
 ]
@@ -17,7 +17,7 @@ requires-python = ">=3.10"
 classifiers = [
     "Programming Language :: Python :: 3",
     "License :: OSI Approved :: MIT License",
-    "Development Status :: 1 - Planning",
+    "Development Status :: 5 - Production/Stable",
 ]
 dependencies = [
     "numpy",
@@ -51,6 +51,7 @@ dev = [
     "sphinxcontrib-napoleon",
     "sphinxemoji"
 ]
+tutorial = ["jupyter"]
 
 [project.urls]
 Homepage = "https://github.com/ARTIST-Association/PAINT"

diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py
@@ -4,6 +4,7 @@
 
 import cv2
 import deepdiff
+import pandas as pd
 import pytest
 import torch
 from torchvision import transforms
@@ -191,6 +192,25 @@ def test_from_benchmark(
     assert len(test) == 4
     assert len(val) == 3
 
+    # Test with Pandas dataframe as input instead of file.
+    benchmark_df = pd.read_csv(
+        pathlib.Path(PAINT_ROOT)
+        / "tests"
+        / "data"
+        / "test_data"
+        / "test_benchmark.csv",
+        index_col=0,
+    )
+    train, test, val = PaintCalibrationDataset.from_benchmark(
+        benchmark_file=benchmark_df,
+        root_dir=pathlib.Path(PAINT_ROOT) / "tests" / "data" / "test_data" / "dataset",
+        item_type=item_type,
+        download=download,
+    )
+    assert len(train) == 3
+    assert len(test) == 4
+    assert len(val) == 3
+
 
 @pytest.mark.parametrize(
     "item_type, heliostats",
@@ -284,3 +304,24 @@ def test_str_method() -> None:
         "-The dataset contains 4 items\n"
     )
     assert str(dataset) == expected
+
+
+def test_from_benchmark_fails_with_incorrect_dataframe(
+    tmp_path: pathlib.Path,
+) -> None:
+    """
+    Verify that ``from_benchmark`` raises ``ValueError`` when the input dataframe has incorrect columns.
+
+    Parameters
+    ----------
+    tmp_path : pathlib.Path
+        Fixture to the temporary folder.
+    """
+    # Create invalid data frame.
+    invalid_df = pd.DataFrame(columns=["Id", "HeliostatId", "WrongCol"])
+
+    # Expect a ValueError.
+    with pytest.raises(ValueError, match="incorrect schema"):
+        PaintCalibrationDataset.from_benchmark(
+            benchmark_file=invalid_df, root_dir=tmp_path, item_type="raw_image"
+        )