From 8dfb169190c4e2144276d092fba9a23a4672663e Mon Sep 17 00:00:00 2001 From: Guillaume EB Date: Tue, 20 Sep 2022 08:42:24 +0000 Subject: [PATCH 1/4] Refactor for handling tests using dask gateway --- README.md | 4 +- .../EOSC-CESNET-small.readwrite.yaml | 27 +++++ benchmark-configs/EOSC-CESNET.readwrite.yaml | 35 ++++++ benchmark-configs/cheyenne.yaml | 11 +- benchmarks/datasets.py | 2 +- benchmarks/utils.py | 112 ++++++++---------- 6 files changed, 118 insertions(+), 73 deletions(-) create mode 100644 benchmark-configs/EOSC-CESNET-small.readwrite.yaml create mode 100644 benchmark-configs/EOSC-CESNET.readwrite.yaml diff --git a/README.md b/README.md index d5d4565..bd2bb69 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ To run the benchmarks, a command utility `pangeobench` is provided in this repos To use it to benchmark Pangeo computation, you need to specify subcommand `run` and the location of the benchmark configuration ```bash -./pangebench run benchmark-configs/cheyenne.computation.yaml +./pangeobench run benchmark-configs/cheyenne.computation.yaml ``` @@ -72,7 +72,7 @@ First, create data files: ``` Second, upload data files to S3 object store if you need to benchmark S3 object store: ```bash -./pangebench upload --config_file benchmark-configs/cheyenne.write.yaml +./pangeobench upload --config_file benchmark-configs/cheyenne.write.yaml ``` Last, read data files: diff --git a/benchmark-configs/EOSC-CESNET-small.readwrite.yaml b/benchmark-configs/EOSC-CESNET-small.readwrite.yaml new file mode 100644 index 0000000..cfe23b0 --- /dev/null +++ b/benchmark-configs/EOSC-CESNET-small.readwrite.yaml @@ -0,0 +1,27 @@ +operation_choice: readwrite +machine: cheyenne +cluster_manager: gateway +cluster_kwargs: + worker_memory: 4 +chunk_per_worker: 2 +spil: false +freq: 1D +parameters: + fixed_totalsize: False + number_of_workers_per_nodes: + - 1 + number_of_nodes: + - 1 + - 4 + chunk_size: + - 32MB + - 64MB + chunking_scheme: + - temporal + io_format: + - zarr + filesystem: + - s3 +profile: default +bucket: pangeo-benchmarking +endpoint_url: https://object-store.cloud.muni.cz diff --git a/benchmark-configs/EOSC-CESNET.readwrite.yaml b/benchmark-configs/EOSC-CESNET.readwrite.yaml new file mode 100644 index 0000000..4eb0cbc --- /dev/null +++ b/benchmark-configs/EOSC-CESNET.readwrite.yaml @@ -0,0 +1,35 @@ +operation_choice: readwrite +machine: cheyenne +cluster_manager: gateway +cluster_kwargs: + worker_memory: 4 +chunk_per_worker: 10 +spil: false +freq: 1D +parameters: + fixed_totalsize: False + number_of_workers_per_nodes: + - 1 + number_of_nodes: + - 1 + - 4 + - 8 + - 16 + - 32 + chunk_size: + - 32MB + - 64MB + - 128MB + - 256MB + - 512MB + - 1024MB + chunking_scheme: + - temporal + - auto + io_format: + - zarr + filesystem: + - s3 +profile: default +bucket: pangeo-benchmarking +endpoint_url: https://object-store.cloud.muni.cz diff --git a/benchmark-configs/cheyenne.yaml b/benchmark-configs/cheyenne.yaml index 8fa6a54..a127354 100644 --- a/benchmark-configs/cheyenne.yaml +++ b/benchmark-configs/cheyenne.yaml @@ -1,9 +1,10 @@ machine: cheyenne -job_scheduler: pbs -queue: regular -walltime: 1:00:00 -maxmemory_per_node: 109gb -maxcore_per_node: 36 +cluster_manager: pbs +cluster_kwargs: + queue: regular + walltime: 1:00:00 + memory: 109gb + cores: 36 chunk_per_worker: 10 spil: false parameters: diff --git a/benchmarks/datasets.py b/benchmarks/datasets.py index 6e2e5e4..41df847 100644 --- a/benchmarks/datasets.py +++ b/benchmarks/datasets.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd import xarray as xr -from distributed.utils import parse_bytes +from dask.utils import parse_bytes def timeseries( diff --git a/benchmarks/utils.py b/benchmarks/utils.py index b634211..14b7fba 100644 --- a/benchmarks/utils.py +++ b/benchmarks/utils.py @@ -7,11 +7,10 @@ import fsspec import pandas as pd from distributed import Client -from distributed.utils import format_bytes +from dask.utils import format_bytes from fsspec.implementations.local import LocalFileSystem from . import __version__ -from .conda_env_export import env_dump from .datasets import timeseries from .ops import ( anomaly, @@ -50,21 +49,6 @@ def dataframe(self): return pd.DataFrame(self.diagnostics) -def cluster_wait(client, n_workers): - """ Delay process until all workers in the cluster are available. """ - start = time() - wait_thresh = 600 - worker_thresh = n_workers * 0.95 - - while len(client.cluster.scheduler.workers) < n_workers: - sleep(2) - elapsed = time() - start - # If we are getting close to timeout but cluster is mostly available, - # just break out - if elapsed > wait_thresh and len(client.cluster.scheduler.workers) >= worker_thresh: - break - - class Runner: def __init__(self, input_file): import yaml @@ -81,43 +65,48 @@ def __init__(self, input_file): self.operations['read'] = [openfile, readfile] self.client = None - def create_cluster(self, job_scheduler, maxcore, walltime, memory, queue, wpn): - """ Creates a dask cluster using dask_jobqueue """ - logger.warning('Creating a dask cluster using dask_jobqueue') - logger.warning(f'Job Scheduler: {job_scheduler}') - logger.warning(f'Memory size for each node: {memory}') - logger.warning(f'Number of cores for each node: {maxcore}') - logger.warning(f'Number of workers for each node: {wpn}') + def create_cluster(self, cluster_manager, processes=1, **cluster_kwargs): + """ Creates a Dask cluster using dask_jobqueue or dask_gateway """ + logger.warning('Creating a Dask cluster') + logger.warning(f'Cluster Manager: {cluster_manager}') + logger.warning(f'Kwargs: {cluster_kwargs}') + + if cluster_manager in ('pbs', 'slurm'): + + from dask_jobqueue import PBSCluster, SLURMCluster - from dask_jobqueue import PBSCluster, SLURMCluster + job_schedulers = {'pbs': PBSCluster, 'slurm': SLURMCluster} - job_schedulers = {'pbs': PBSCluster, 'slurm': SLURMCluster} + # Note about OMP_NUM_THREADS=1, --threads 1: + # These two lines are to ensure that each benchmark workers + # only use one threads for benchmark. + # in the job script one sees twice --nthreads, + # but it get overwritten by --nthreads 1 + cluster = job_schedulers[cluster_manager]( + processes=proecesses, + local_directory='$TMPDIR', + interface='ib0', + env_extra=['OMP_NUM_THREADS=1'], + extra=['--nthreads 1'], + **cluster_kwargs + ) - # Note about OMP_NUM_THREADS=1, --threads 1: - # These two lines are to ensure that each benchmark workers - # only use one threads for benchmark. - # in the job script one sees twice --nthreads, - # but it get overwritten by --nthreads 1 - cluster = job_schedulers[job_scheduler]( - cores=maxcore, - memory=memory, - processes=wpn, - local_directory='$TMPDIR', - interface='ib0', - queue=queue, - walltime=walltime, - env_extra=['OMP_NUM_THREADS=1'], - extra=['--nthreads 1'], - ) + logger.warning( + '************************************\n' + 'Job script created by dask_jobqueue:\n' + f'{cluster.job_script()}\n' + '***************************************' + ) + elif cluster_manager == 'gateway': + if processes > 1: + logger.warning(f'Processing kwarg of value {processes} will be ignored with dask-gateway clusters.') + from dask_gateway import Gateway + gateway = Gateway() + cluster = gateway.new_cluster(**cluster_kwargs) + else: + raise ValueError(f"Unkown Cluster Manager: {cluster_manager}") self.client = Client(cluster) - - logger.warning( - '************************************\n' - 'Job script created by dask_jobqueue:\n' - f'{cluster.job_script()}\n' - '***************************************' - ) logger.warning(f'Dask cluster dashboard_link: {self.client.cluster.dashboard_link}') def run(self): @@ -125,11 +114,8 @@ def run(self): logger.warning('Reading configuration YAML config file') operation_choice = self.params['operation_choice'] machine = self.params['machine'] - job_scheduler = self.params['job_scheduler'] - queue = self.params['queue'] - walltime = self.params['walltime'] - maxmemory_per_node = self.params['maxmemory_per_node'] - maxcore_per_node = self.params['maxcore_per_node'] + cluster_manager = self.params['cluster_manager'] + cluster_kwargs = self.params['cluster_kwargs'] chunk_per_worker = self.params['chunk_per_worker'] freq = self.params['freq'] spil = self.params['spil'] @@ -146,21 +132,17 @@ def run(self): filesystems = parameters['filesystem'] fixed_totalsize = parameters['fixed_totalsize'] chsz = parameters['chunk_size'] - local_dir = self.params['local_dir'] + #TODO Dump the environment somewhere env_export_filename = f"{output_dir}/env_export_{now.strftime('%Y-%m-%d_%H-%M-%S')}.yml" - env_dump('./binder/environment.yml', env_export_filename) for wpn in num_workers: self.create_cluster( - job_scheduler=job_scheduler, - maxcore=maxcore_per_node, - walltime=walltime, - memory=maxmemory_per_node, - queue=queue, - wpn=wpn, + cluster_manager=cluster_manager, + processes=wpn, + **cluster_kwargs ) for num in num_nodes: self.client.cluster.scale(num * wpn) - cluster_wait(self.client, num * wpn) + self.client.wait_for_workers(n_workers=num * wpn, timeout=1800) timer = DiagnosticTimer() logger.warning( '#####################################################################\n' @@ -197,6 +179,7 @@ def run(self): root = f'{bucket}' elif filesystem == 'posix': fs = LocalFileSystem() + local_dir = self.params['local_dir'] root = local_dir if not os.path.isdir(f'{root}'): os.makedirs(f'{root}') @@ -244,10 +227,9 @@ def run(self): filesystem=filesystem, root=root, machine=machine, - maxmemory_per_node=maxmemory_per_node, - maxcore_per_node=maxcore_per_node, spil=spil, version=__version__, + **cluster_kwargs ): fname = f'{chunk_size}{chunking_scheme}{filesystem}{num}' if op.__name__ == 'writefile': From 278e98fbc1bbd55eb1e608df33cbec2bbbeca545 Mon Sep 17 00:00:00 2001 From: Guillaume EB Date: Sun, 25 Sep 2022 20:22:42 +0000 Subject: [PATCH 2/4] Working benchmarks on Cloud with dask-gateway --- .../EOSC-CESNET-small.readwrite.yaml | 2 +- benchmark-configs/EOSC-CESNET.readwrite.yaml | 2 +- benchmarks/utils.py | 88 ++++++++++++++++++- 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/benchmark-configs/EOSC-CESNET-small.readwrite.yaml b/benchmark-configs/EOSC-CESNET-small.readwrite.yaml index cfe23b0..5b6f60f 100644 --- a/benchmark-configs/EOSC-CESNET-small.readwrite.yaml +++ b/benchmark-configs/EOSC-CESNET-small.readwrite.yaml @@ -1,5 +1,5 @@ operation_choice: readwrite -machine: cheyenne +machine: EOSC-CESNET-small cluster_manager: gateway cluster_kwargs: worker_memory: 4 diff --git a/benchmark-configs/EOSC-CESNET.readwrite.yaml b/benchmark-configs/EOSC-CESNET.readwrite.yaml index 4eb0cbc..b8914ad 100644 --- a/benchmark-configs/EOSC-CESNET.readwrite.yaml +++ b/benchmark-configs/EOSC-CESNET.readwrite.yaml @@ -1,5 +1,5 @@ operation_choice: readwrite -machine: cheyenne +machine: EOSC-CESNET cluster_manager: gateway cluster_kwargs: worker_memory: 4 diff --git a/benchmarks/utils.py b/benchmarks/utils.py index 14b7fba..cd9428d 100644 --- a/benchmarks/utils.py +++ b/benchmarks/utils.py @@ -1,6 +1,8 @@ import datetime import logging import os +import uuid +import zipfile from contextlib import contextmanager from time import sleep, time @@ -9,6 +11,8 @@ from distributed import Client from dask.utils import format_bytes from fsspec.implementations.local import LocalFileSystem +from distributed.diagnostics.plugin import WorkerPlugin +from dask.utils import tmpfile from . import __version__ from .datasets import timeseries @@ -49,6 +53,9 @@ def dataframe(self): return pd.DataFrame(self.diagnostics) + + + class Runner: def __init__(self, input_file): import yaml @@ -108,6 +115,74 @@ def create_cluster(self, cluster_manager, processes=1, **cluster_kwargs): self.client = Client(cluster) logger.warning(f'Dask cluster dashboard_link: {self.client.cluster.dashboard_link}') + + if cluster_manager == 'gateway': + #We need to upload benchmarking Python files to use them on worker side + #This added plugin must be a closure for it to work on worker side (can't be a class declared in this file + class UploadDirectoryWorker(WorkerPlugin): + """A WorkerPlugin to upload a local directory to workers. + + Parameters + ---------- + path: str + A path to the directory to upload + + Examples + -------- + >>> from distributed.diagnostics.plugin import UploadDirectoryWorker + >>> client.register_worker_plugin(UploadDirectoryWorker("/path/to/directory")) # doctest: +SKIP + """ + + def __init__( + self, + path, + restart=False, + skip_words=(".git", ".github", ".pytest_cache", "tests", "docs"), + skip=(lambda fn: os.path.splitext(fn)[1] == ".pyc",), + ): + """ + Initialize the plugin by reading in the data from the given file. + """ + path = os.path.expanduser(path) + self.path = os.path.split(path)[-1] + self.restart = restart + + self.name = "upload-directory-" + os.path.split(path)[-1] + + with tmpfile(extension="zip") as fn: + with zipfile.ZipFile(fn, "w", zipfile.ZIP_DEFLATED) as z: + for root, dirs, files in os.walk(path): + for file in files: + filename = os.path.join(root, file) + if any(predicate(filename) for predicate in skip): + continue + dirs = filename.split(os.sep) + if any(word in dirs for word in skip_words): + continue + + archive_name = os.path.relpath( + os.path.join(root, file), os.path.join(path, "..") + ) + z.write(filename, archive_name) + + with open(fn, "rb") as f: + self.data = f.read() + + async def setup(self, worker): + fn = os.path.join(worker.local_directory, f"tmp-{uuid.uuid4()}.zip") + with open(fn, "wb") as f: + f.write(self.data) + + import zipfile + + with zipfile.ZipFile(fn) as z: + z.extractall(path=worker.local_directory) + + os.remove(fn) + + logger.warning(f'Uploading directory {os.path.dirname(__file__)}') + plugin = UploadDirectoryWorker(os.path.dirname(__file__)) + self.client.register_worker_plugin(plugin) def run(self): @@ -143,6 +218,7 @@ def run(self): for num in num_nodes: self.client.cluster.scale(num * wpn) self.client.wait_for_workers(n_workers=num * wpn, timeout=1800) + timer = DiagnosticTimer() logger.warning( '#####################################################################\n' @@ -165,13 +241,21 @@ def run(self): f'### Skipping NetCDF S3 {operation_choice} benchmarking ###\n' ) continue + profile = self.params['profile'] bucket = self.params['bucket'] endpoint_url = self.params['endpoint_url'] + + #We need to get access/secret keys from the Client notebook AWS credential files. + #In the cloud, we have no guarantee that workers will have those files, so we + #need to passe those keys directly to the S3 filesystem object. + import boto3 + session = boto3.Session(profile_name='default') fs = fsspec.filesystem( 's3', - profile=profile, anon=False, + key=session.get_credentials().access_key, + secret=session.get_credentials().secret_key, client_kwargs={'endpoint_url': endpoint_url}, skip_instance_cache=True, use_listings_cache=True, @@ -212,6 +296,7 @@ def run(self): logger.warning(f'Dataset total size: {dataset_size}') for op in self.operations[operation_choice]: + logger.warning(f'Operation begin: {op}') with timer.time( 'runtime', operation=op.__name__, @@ -240,6 +325,7 @@ def run(self): ds = op(fs, io_format, root, filename) else: op(ds) + logger.warning(f'Operation done: {op}') # kills ds, and every other dependent computation logger.warning('Computation done') self.client.cancel(ds) From 0cce38799c1f063d4a0a41d8e42f9516e4bf9d4d Mon Sep 17 00:00:00 2001 From: Guillaume EB Date: Mon, 26 Sep 2022 11:37:14 +0000 Subject: [PATCH 3/4] Correctly use UploadDirectory Dask plugin --- benchmarks/utils.py | 73 +------------------ .../compute_study_2022-09-26_10-55-25.csv | 9 +++ .../compute_study_2022-09-26_10-57-04.csv | 9 +++ 3 files changed, 22 insertions(+), 69 deletions(-) create mode 100644 results/EOSC-CESNET-small/2022-09-26/compute_study_2022-09-26_10-55-25.csv create mode 100644 results/EOSC-CESNET-small/2022-09-26/compute_study_2022-09-26_10-57-04.csv diff --git a/benchmarks/utils.py b/benchmarks/utils.py index cd9428d..8cd2b30 100644 --- a/benchmarks/utils.py +++ b/benchmarks/utils.py @@ -1,8 +1,6 @@ import datetime import logging import os -import uuid -import zipfile from contextlib import contextmanager from time import sleep, time @@ -11,8 +9,7 @@ from distributed import Client from dask.utils import format_bytes from fsspec.implementations.local import LocalFileSystem -from distributed.diagnostics.plugin import WorkerPlugin -from dask.utils import tmpfile +from distributed.diagnostics.plugin import UploadDirectory from . import __version__ from .datasets import timeseries @@ -118,71 +115,9 @@ def create_cluster(self, cluster_manager, processes=1, **cluster_kwargs): if cluster_manager == 'gateway': #We need to upload benchmarking Python files to use them on worker side - #This added plugin must be a closure for it to work on worker side (can't be a class declared in this file - class UploadDirectoryWorker(WorkerPlugin): - """A WorkerPlugin to upload a local directory to workers. - - Parameters - ---------- - path: str - A path to the directory to upload - - Examples - -------- - >>> from distributed.diagnostics.plugin import UploadDirectoryWorker - >>> client.register_worker_plugin(UploadDirectoryWorker("/path/to/directory")) # doctest: +SKIP - """ - - def __init__( - self, - path, - restart=False, - skip_words=(".git", ".github", ".pytest_cache", "tests", "docs"), - skip=(lambda fn: os.path.splitext(fn)[1] == ".pyc",), - ): - """ - Initialize the plugin by reading in the data from the given file. - """ - path = os.path.expanduser(path) - self.path = os.path.split(path)[-1] - self.restart = restart - - self.name = "upload-directory-" + os.path.split(path)[-1] - - with tmpfile(extension="zip") as fn: - with zipfile.ZipFile(fn, "w", zipfile.ZIP_DEFLATED) as z: - for root, dirs, files in os.walk(path): - for file in files: - filename = os.path.join(root, file) - if any(predicate(filename) for predicate in skip): - continue - dirs = filename.split(os.sep) - if any(word in dirs for word in skip_words): - continue - - archive_name = os.path.relpath( - os.path.join(root, file), os.path.join(path, "..") - ) - z.write(filename, archive_name) - - with open(fn, "rb") as f: - self.data = f.read() - - async def setup(self, worker): - fn = os.path.join(worker.local_directory, f"tmp-{uuid.uuid4()}.zip") - with open(fn, "wb") as f: - f.write(self.data) - - import zipfile - - with zipfile.ZipFile(fn) as z: - z.extractall(path=worker.local_directory) - - os.remove(fn) - - logger.warning(f'Uploading directory {os.path.dirname(__file__)}') - plugin = UploadDirectoryWorker(os.path.dirname(__file__)) - self.client.register_worker_plugin(plugin) + logger.warning(f'Uploading directory {here}') + plugin = UploadDirectory(here, restart=True, update_path=True) + self.client.register_worker_plugin(plugin, nanny=True) def run(self): diff --git a/results/EOSC-CESNET-small/2022-09-26/compute_study_2022-09-26_10-55-25.csv b/results/EOSC-CESNET-small/2022-09-26/compute_study_2022-09-26_10-55-25.csv new file mode 100644 index 0000000..fdb5e52 --- /dev/null +++ b/results/EOSC-CESNET-small/2022-09-26/compute_study_2022-09-26_10-55-25.csv @@ -0,0 +1,9 @@ +operation,fixed_totalsize,chunk_size,chunk_per_worker,dataset_size,worker_per_node,threads_per_worker,num_nodes,chunking_scheme,io_format,filesystem,root,machine,spil,version,worker_memory,runtime,dask,distributed,fsspec,gcsfs,netCDF4,numpy,pandas,s3fs,xarray,zarr +writefile,False,32MB,2,75.05 MiB,1,1,1,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,40.57333040237427,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +openfile,False,32MB,2,75.05 MiB,1,1,1,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,2.8290905952453613,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +readfile,False,32MB,2,75.05 MiB,1,1,1,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,1.1352343559265137,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +deletefile,False,32MB,2,75.05 MiB,1,1,1,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,1.2147369384765625,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +writefile,False,64MB,2,150.05 MiB,1,1,1,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,39.94281983375549,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +openfile,False,64MB,2,150.05 MiB,1,1,1,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,2.578951835632324,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +readfile,False,64MB,2,150.05 MiB,1,1,1,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,1.867366075515747,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +deletefile,False,64MB,2,150.05 MiB,1,1,1,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,1.0949342250823975,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 diff --git a/results/EOSC-CESNET-small/2022-09-26/compute_study_2022-09-26_10-57-04.csv b/results/EOSC-CESNET-small/2022-09-26/compute_study_2022-09-26_10-57-04.csv new file mode 100644 index 0000000..87ffe7b --- /dev/null +++ b/results/EOSC-CESNET-small/2022-09-26/compute_study_2022-09-26_10-57-04.csv @@ -0,0 +1,9 @@ +operation,fixed_totalsize,chunk_size,chunk_per_worker,dataset_size,worker_per_node,threads_per_worker,num_nodes,chunking_scheme,io_format,filesystem,root,machine,spil,version,worker_memory,runtime,dask,distributed,fsspec,gcsfs,netCDF4,numpy,pandas,s3fs,xarray,zarr +writefile,False,32MB,2,300.05 MiB,1,1,4,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,43.10253405570984,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +openfile,False,32MB,2,300.05 MiB,1,1,4,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,2.622267484664917,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +readfile,False,32MB,2,300.05 MiB,1,1,4,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,3.698916435241699,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +deletefile,False,32MB,2,300.05 MiB,1,1,4,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,1.4919712543487549,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +writefile,False,64MB,2,525.05 MiB,1,1,4,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,40.31912088394165,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +openfile,False,64MB,2,525.05 MiB,1,1,4,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,2.6523828506469727,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +readfile,False,64MB,2,525.05 MiB,1,1,4,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,3.6914968490600586,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 +deletefile,False,64MB,2,525.05 MiB,1,1,4,temporal,zarr,s3,pangeo-benchmarking,EOSC-CESNET-small,False,0.0.0,4,1.2323241233825684,2022.7.0,2022.7.0,2022.5.0,2022.5.0,1.6.0,1.23.1,1.4.3,2022.5.0,2022.6.0,2.12.0 From 7dfe4cc4b27d71b03214ffbc875cb60e1e359276 Mon Sep 17 00:00:00 2001 From: Guillaume EB Date: Wed, 5 Oct 2022 17:42:17 +0000 Subject: [PATCH 4/4] Fix HPC code, update config files and Readme --- README.md | 33 +++++++++++------------ benchmark-configs/cheyenne.pri1-a.yaml | 22 +++++++++++---- benchmark-configs/cheyenne.pri1-b.yaml | 22 +++++++++++---- benchmark-configs/cheyenne.pri2.yaml | 22 +++++++++++---- benchmark-configs/cheyenne.readwrite.yaml | 13 +++++---- benchmark-configs/cheyenne.write.yaml | 13 +++++---- benchmark-configs/cheyenne.yaml | 33 ----------------------- benchmark-configs/hal.yaml | 26 ------------------ benchmark-configs/hal1D.yaml | 23 ++++++++++++---- benchmark-configs/halstrong11D.yaml | 21 +++++++++++---- benchmark-configs/halstrong11H.yaml | 22 +++++++++++---- benchmark-configs/halstrong21D.yaml | 21 +++++++++++---- benchmark-configs/halstrong21H.yaml | 22 +++++++++++---- benchmark-configs/halweak1D.yaml | 21 +++++++++++---- benchmark-configs/halweak1H.yaml | 22 +++++++++++---- benchmark-configs/wrangler.yaml | 22 +++++++++++---- benchmarks/utils.py | 11 ++++---- binder/environment.yml | 6 ++--- requirements.txt | 2 +- setup.py | 7 +++-- 20 files changed, 229 insertions(+), 155 deletions(-) delete mode 100644 benchmark-configs/cheyenne.yaml delete mode 100644 benchmark-configs/hal.yaml diff --git a/README.md b/README.md index bd2bb69..e45919b 100644 --- a/README.md +++ b/README.md @@ -3,15 +3,16 @@ Benchmarking & Scaling Studies of the Pangeo Platform - [Benchmarking](#benchmarking) - - [Creating an Environment](#creating-an-environment) + - [Creating an Environment on an HPC Center](#creating-an-environment-on-an-hpc-center) + - [Environment on a Kubernetes based system](#environment-on-a-kubernetes-based-system) - [Benchmark Configuration](#benchmark-configuration) - [Running the Benchmarks](#running-the-benchmarks) - [Benchmark Results](#benchmark-results) - [Visualization](#visualization) -## Creating an Environment +## Creating an Environment on an HPC Center -To run the benchmarks, it's recommended to create a dedicated conda environment by running: +To run the benchmarks on an HPC platform, it's recommended to create a dedicated conda environment by running: ```bash conda env create -f ./binder/environment.yml @@ -31,33 +32,31 @@ and then run the post build script: ./binder/postBuild ``` -## Benchmark Configuration +## Environment on a Kubernetes based system -The `benchmark-configs` directory contains YAML files that are used to run benchmarks on different machines. So far, the following HPC systems' configs are provided: +To run the benchmark on any Cloud platform using Kubernetes, it is recommanded to use [pangeo/pangeo-notebook Docker image](https://github.com/pangeo-data/pangeo-docker-images/tree/master/pangeo-notebook). -```bash -$ tree ./benchmark-configs/ -benchmark-configs/ -├── cheyenne.yaml -└── hal.yaml -└── wrangler.yaml +This package currently assumes a Dask Gateway cluster is available from the Kubernetes environment. -``` +## Benchmark Configuration + +The `benchmark-configs` directory contains YAML files that are used to run benchmarks on different machines. So far, HPC systems config have been provided for several clusters: Cheyenne from NCAR, HAL from CNES, Wrangler from TACC. It also contains configurations for CESNET Center based on a Kubernetes deployment over Openstack. There might be several configurations for each center. In case you are interested in running the benchmarks on another system, you will need to create a new YAML file for your system with the right configurations. See the existing config files for reference. ## Running the Benchmarks + ### from command line To run the benchmarks, a command utility `pangeobench` is provided in this repository. -To use it to benchmark Pangeo computation, you need to specify subcommand `run` and the location of the benchmark configuration +To use it to benchmark Pangeo computation, you need to specify subcommand `run` and the location of the benchmark configuration. ```bash -./pangeobench run benchmark-configs/cheyenne.computation.yaml +./pangeobench run benchmark-configs/cheyenne.pri2.yaml ``` -To use it to benchmark Pangeo IO with weak scaling analysis, you need to specify subcommand `run` and the location of the benchmark configuration +To use it to benchmark Pangeo IO with weak scaling analysis, you need to specify subcommand `run` and the location of the benchmark configuration. ```bash @@ -91,8 +90,8 @@ Commands: run Run benchmarking upload Upload benchmarking files from local directory to S3 object store ``` -## Running the Benchmarks -### from jupyter notebook. + +### from Jupyter notebook. To run the benchmarks from jupyter notebook, install 'pangeo-bench' kernel to your jupyter notebook enviroment, then start run.ipynb notebook. You will need to specify the configuration file as described above in your notebook. diff --git a/benchmark-configs/cheyenne.pri1-a.yaml b/benchmark-configs/cheyenne.pri1-a.yaml index 3228315..018a3ed 100644 --- a/benchmark-configs/cheyenne.pri1-a.yaml +++ b/benchmark-configs/cheyenne.pri1-a.yaml @@ -1,12 +1,18 @@ +operation_choice: readwrite machine: cheyenne -job_scheduler: pbs -queue: regular -walltime: 1:00:00 -maxmemory_per_node: 109gb -maxcore_per_node: 36 +cluster_manager: pbs +cluster_kwargs: + queue: regular + walltime: 1:00:00 + memory: 109gb + cores: 36 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false +freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -18,3 +24,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_pri1-a diff --git a/benchmark-configs/cheyenne.pri1-b.yaml b/benchmark-configs/cheyenne.pri1-b.yaml index 66a2002..c2f0f38 100644 --- a/benchmark-configs/cheyenne.pri1-b.yaml +++ b/benchmark-configs/cheyenne.pri1-b.yaml @@ -1,12 +1,18 @@ +operation_choice: readwrite machine: cheyenne -job_scheduler: pbs -queue: regular -walltime: 1:00:00 -maxmemory_per_node: 109gb -maxcore_per_node: 36 +cluster_manager: pbs +cluster_kwargs: + queue: regular + walltime: 1:00:00 + memory: 109gb + cores: 36 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false +freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -18,3 +24,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_pri1-b diff --git a/benchmark-configs/cheyenne.pri2.yaml b/benchmark-configs/cheyenne.pri2.yaml index 84d7f43..b9509f5 100644 --- a/benchmark-configs/cheyenne.pri2.yaml +++ b/benchmark-configs/cheyenne.pri2.yaml @@ -1,12 +1,18 @@ +operation_choice: readwrite machine: cheyenne -job_scheduler: pbs -queue: regular -walltime: 1:00:00 -maxmemory_per_node: 109gb -maxcore_per_node: 36 +cluster_manager: pbs +cluster_kwargs: + queue: regular + walltime: 1:00:00 + memory: 109gb + cores: 36 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false +freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -25,3 +31,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_pri2 diff --git a/benchmark-configs/cheyenne.readwrite.yaml b/benchmark-configs/cheyenne.readwrite.yaml index 2850d07..aff4061 100644 --- a/benchmark-configs/cheyenne.readwrite.yaml +++ b/benchmark-configs/cheyenne.readwrite.yaml @@ -1,10 +1,13 @@ operation_choice: readwrite machine: cheyenne -job_scheduler: pbs -queue: regular -walltime: 1:00:00 -maxmemory_per_node: 109gb -maxcore_per_node: 36 +cluster_manager: pbs +cluster_kwargs: + queue: regular + walltime: 1:00:00 + memory: 109gb + cores: 36 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false freq: 1D diff --git a/benchmark-configs/cheyenne.write.yaml b/benchmark-configs/cheyenne.write.yaml index 0454179..ad00ec8 100644 --- a/benchmark-configs/cheyenne.write.yaml +++ b/benchmark-configs/cheyenne.write.yaml @@ -1,10 +1,13 @@ operation_choice: write machine: cheyenne -job_scheduler: pbs -queue: regular -walltime: 1:00:00 -maxmemory_per_node: 109gb -maxcore_per_node: 36 +cluster_manager: pbs +cluster_kwargs: + queue: regular + walltime: 1:00:00 + memory: 109gb + cores: 36 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false freq: 1D diff --git a/benchmark-configs/cheyenne.yaml b/benchmark-configs/cheyenne.yaml deleted file mode 100644 index a127354..0000000 --- a/benchmark-configs/cheyenne.yaml +++ /dev/null @@ -1,33 +0,0 @@ -machine: cheyenne -cluster_manager: pbs -cluster_kwargs: - queue: regular - walltime: 1:00:00 - memory: 109gb - cores: 36 -chunk_per_worker: 10 -spil: false -parameters: - number_of_workers_per_nodes: - - 1 - number_of_threads_per_workers: 1 - number_of_nodes: - - 1 - - 2 - - 4 - - 8 - - 16 - - 24 - - 32 - chunk_size: - - 32MB - - 64MB - - 128MB - - 256MB - - 512MB - - 1024MB - - 2048MB - chunking_scheme: - - spatial - - temporal - - auto diff --git a/benchmark-configs/hal.yaml b/benchmark-configs/hal.yaml deleted file mode 100644 index a1c6484..0000000 --- a/benchmark-configs/hal.yaml +++ /dev/null @@ -1,26 +0,0 @@ -machine: hal24 -job_scheduler: pbs -queue: batch -walltime: 1:00:00 -maxmemory_per_node: 128gb -maxcore_per_node: 24 -spil: false -parameters: - number_of_workers_per_nodes: - - 1 - number_of_threads_per_workers: 1 - number_of_nodes: - - 1 - - 2 - - 4 - - 8 - - 16 - chunk_size: - - 512MB - - 1024MB - - 2048MB - - 4096MB - chunking_scheme: - - spatial - - temporal - - auto diff --git a/benchmark-configs/hal1D.yaml b/benchmark-configs/hal1D.yaml index 92938bb..e03c464 100644 --- a/benchmark-configs/hal1D.yaml +++ b/benchmark-configs/hal1D.yaml @@ -1,11 +1,18 @@ +operation_choice: readwrite machine: hal1D -job_scheduler: pbs -queue: batch -walltime: 1:00:00 -maxmemory_per_node: 128gb -maxcore_per_node: 24 +cluster_manager: pbs +cluster_kwargs: + queue: batch + walltime: 1:00:00 + memory: 128gb + cores: 24 + local_directory: "$TMPDIR" + interface: "ib0" +chunk_per_worker: 10 spil: false +freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -24,3 +31,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_1D diff --git a/benchmark-configs/halstrong11D.yaml b/benchmark-configs/halstrong11D.yaml index c56e3a2..04ba080 100644 --- a/benchmark-configs/halstrong11D.yaml +++ b/benchmark-configs/halstrong11D.yaml @@ -1,13 +1,18 @@ +operation_choice: readwrite machine: hal1D -job_scheduler: pbs -queue: batch -walltime: 1:00:00 -maxmemory_per_node: 128gb -maxcore_per_node: 24 +cluster_manager: pbs +cluster_kwargs: + queue: batch + walltime: 1:00:00 + memory: 128gb + cores: 24 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -19,3 +24,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_11D diff --git a/benchmark-configs/halstrong11H.yaml b/benchmark-configs/halstrong11H.yaml index c89d93a..f039fc1 100644 --- a/benchmark-configs/halstrong11H.yaml +++ b/benchmark-configs/halstrong11H.yaml @@ -1,12 +1,18 @@ +operation_choice: readwrite machine: hal1H -job_scheduler: pbs -queue: batch -walltime: 1:00:00 -maxmemory_per_node: 128gb -maxcore_per_node: 24 +cluster_manager: pbs +cluster_kwargs: + queue: batch + walltime: 1:00:00 + memory: 128gb + cores: 24 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false +freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -18,3 +24,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_11H diff --git a/benchmark-configs/halstrong21D.yaml b/benchmark-configs/halstrong21D.yaml index 0cce1e7..3d6c732 100644 --- a/benchmark-configs/halstrong21D.yaml +++ b/benchmark-configs/halstrong21D.yaml @@ -1,13 +1,18 @@ +operation_choice: readwrite machine: hal1D -job_scheduler: pbs -queue: batch -walltime: 1:00:00 -maxmemory_per_node: 128gb -maxcore_per_node: 24 +cluster_manager: pbs +cluster_kwargs: + queue: batch + walltime: 1:00:00 + memory: 128gb + cores: 24 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -19,3 +24,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_21D diff --git a/benchmark-configs/halstrong21H.yaml b/benchmark-configs/halstrong21H.yaml index a549555..5b38a05 100644 --- a/benchmark-configs/halstrong21H.yaml +++ b/benchmark-configs/halstrong21H.yaml @@ -1,12 +1,18 @@ +operation_choice: readwrite machine: hal1H -job_scheduler: pbs -queue: batch -walltime: 1:00:00 -maxmemory_per_node: 128gb -maxcore_per_node: 24 +cluster_manager: pbs +cluster_kwargs: + queue: batch + walltime: 1:00:00 + memory: 128gb + cores: 24 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false +freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -18,3 +24,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_21H diff --git a/benchmark-configs/halweak1D.yaml b/benchmark-configs/halweak1D.yaml index e13a561..d9fb263 100644 --- a/benchmark-configs/halweak1D.yaml +++ b/benchmark-configs/halweak1D.yaml @@ -1,13 +1,18 @@ +operation_choice: readwrite machine: hal1D -job_scheduler: pbs -queue: batch -walltime: 1:00:00 -maxmemory_per_node: 128gb -maxcore_per_node: 24 +cluster_manager: pbs +cluster_kwargs: + queue: batch + walltime: 1:00:00 + memory: 128gb + cores: 24 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -26,3 +31,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_weak1D diff --git a/benchmark-configs/halweak1H.yaml b/benchmark-configs/halweak1H.yaml index 45ae844..874bbce 100644 --- a/benchmark-configs/halweak1H.yaml +++ b/benchmark-configs/halweak1H.yaml @@ -1,12 +1,18 @@ +operation_choice: readwrite machine: hal1H -job_scheduler: pbs -queue: batch -walltime: 1:00:00 -maxmemory_per_node: 128gb -maxcore_per_node: 24 +cluster_manager: pbs +cluster_kwargs: + queue: batch + walltime: 1:00:00 + memory: 128gb + cores: 24 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false +freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -25,3 +31,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_weak1H diff --git a/benchmark-configs/wrangler.yaml b/benchmark-configs/wrangler.yaml index 2eb280c..93cf3d6 100644 --- a/benchmark-configs/wrangler.yaml +++ b/benchmark-configs/wrangler.yaml @@ -1,12 +1,18 @@ +operation_choice: readwrite machine: wrangler -job_scheduler: slurm -queue: normal -walltime: 00:60:00 -maxmemory_per_node: 128gb -maxcore_per_node: 24 +cluster_manager: slurm +cluster_kwargs: + queue: normal + walltime: 00:60:00 + memory: 128gb + cores: 24 + local_directory: "$TMPDIR" + interface: "ib0" chunk_per_worker: 10 spil: false +freq: 1D parameters: + fixed_totalsize: False number_of_workers_per_nodes: - 1 number_of_threads_per_workers: 1 @@ -26,3 +32,9 @@ parameters: - spatial - temporal - auto + io_format: + - zarr + - netcdf + filesystem: + - posix +local_dir: test_readwrite diff --git a/benchmarks/utils.py b/benchmarks/utils.py index 8cd2b30..90e82b7 100644 --- a/benchmarks/utils.py +++ b/benchmarks/utils.py @@ -69,7 +69,7 @@ def __init__(self, input_file): self.operations['read'] = [openfile, readfile] self.client = None - def create_cluster(self, cluster_manager, processes=1, **cluster_kwargs): + def create_cluster(self, cluster_manager, processes=1, nthreads=1, **cluster_kwargs): """ Creates a Dask cluster using dask_jobqueue or dask_gateway """ logger.warning('Creating a Dask cluster') logger.warning(f'Cluster Manager: {cluster_manager}') @@ -87,11 +87,9 @@ def create_cluster(self, cluster_manager, processes=1, **cluster_kwargs): # in the job script one sees twice --nthreads, # but it get overwritten by --nthreads 1 cluster = job_schedulers[cluster_manager]( - processes=proecesses, - local_directory='$TMPDIR', - interface='ib0', - env_extra=['OMP_NUM_THREADS=1'], - extra=['--nthreads 1'], + processes=processes, + worker_extra_args=[f"--nthreads {nthreads}"], + job_script_prologue=['OMP_NUM_THREADS=1'], **cluster_kwargs ) @@ -148,6 +146,7 @@ def run(self): self.create_cluster( cluster_manager=cluster_manager, processes=wpn, + nthreads=num_threads, **cluster_kwargs ) for num in num_nodes: diff --git a/binder/environment.yml b/binder/environment.yml index 3be43a9..76de2b5 100644 --- a/binder/environment.yml +++ b/binder/environment.yml @@ -3,9 +3,9 @@ channels: - conda-forge dependencies: - click - - dask-jobqueue + - dask-jobqueue>=0.8.0 - dask-labextension - - dask>=2.3 + - dask>=2022.7.1 - fsspec>=0.7.3 - holoviews - hvplot @@ -16,7 +16,7 @@ dependencies: - nodejs - numpy - pandas - - python=3.7 + - python=3.9 - s3fs>=0.4.2 - scipy - setuptools diff --git a/requirements.txt b/requirements.txt index d964f2a..8834d56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ awscli xarray>=0.15.1 click -dask +dask>=2022.07.01 distributed s3fs>=0.4.2 fsspec>=0.7.3 diff --git a/setup.py b/setup.py index d585353..d7bb351 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ long_description=long_description, long_description_content_type='text/markdown', use_scm_version={'version_scheme': 'post-release', 'local_scheme': 'dirty-tag'}, - install_requires=['dask>=2.11', 'xarray>=0.14', 'numpy>1.17'], + install_requires=['dask>=2022.07.01', 'xarray>=0.14', 'numpy>1.17'], license='Apache 2.0', url='https://github.com/pangeo-data/benchmarking', packages=find_packages(), @@ -23,9 +23,8 @@ 'Intended Audience :: Science/Research', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', 'Topic :: Scientific/Engineering', ], )