From edc6f7460c3202370fc29112be2e8a591c3b8161 Mon Sep 17 00:00:00 2001
From: drlyamzin <dmitry@benuccilab.net>
Date: Wed, 29 Jan 2025 06:54:43 +0000
Subject: [PATCH 1/2] enabling precipitation inference: initial code
 corrections

---
 README.md                               | 22 ++++++++++++++
 config/AFNO.yaml                        | 39 +++++++++++++++----------
 data_process/parallel_copy_small_set.py | 27 +++++++++++++++++
 docker/Dockerfile                       |  9 +++---
 utils/data_loader_multifiles.py         |  1 +
 5 files changed, 79 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 705f03a..12ed099 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,25 @@
+# Recursive Forked Version
+
+0. Clone the repository, 
+
+```
+git clone https://github.com/recursiveai/FourCastNet
+cd FourCastNet
+```
+
+1. Build and run the container
+
+NOTE: Assumes nvidia runtime is available for docker.
+
+```docker build -f docker/Dockerfile -t fourcastnet .```
+```docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 --runtime nvidia -v ${PWD}:/workspace -it fourcastnet:latest```
+
+2. Copy data and checkpoints from the bucket. 
+
+NOTE: Checkpoints are 1Gb each and take time to download. 
+
+```python download_data.sh```
+
 # FourCastNet
 
 ![nvidia](assets/nvidia.png) ![nersc](assets/nersc.png)
diff --git a/config/AFNO.yaml b/config/AFNO.yaml
index ef42448..9039768 100644
--- a/config/AFNO.yaml
+++ b/config/AFNO.yaml
@@ -7,8 +7,8 @@ full_field: &FULL_FIELD
   dt: 1 # how many timesteps ahead the model will predict
   n_history: 0 #how many previous timesteps to consider
   prediction_type: 'iterative'
-  prediction_length: 41 #applicable only if prediction_type == 'iterative'
-  n_initial_conditions: 5 #applicable only if prediction_type == 'iterative'
+  prediction_length: 4 #applicable only if prediction_type == 'iterative'
+  n_initial_conditions: 1 #applicable only if prediction_type == 'iterative'
   ics_type: "default"
   save_raw_forecasts: !!bool True
   save_channel: !!bool False
@@ -35,11 +35,11 @@ full_field: &FULL_FIELD
   normalization: 'zscore' #options zscore (minmax not supported) 
   train_data_path: '/pscratch/sd/j/jpathak/wind/train'
   valid_data_path: '/pscratch/sd/j/jpathak/wind/test'
-  inf_data_path: '/pscratch/sd/j/jpathak/wind/out_of_sample' # test set path for inference
+  inf_data_path: "/workspace/data" # test set path for inference, FourCastNet/ is mounted to /workspace/
   exp_dir: '/pscratch/sd/j/jpathak/ERA5_expts_gtc/wind'
-  time_means_path:   '/pscratch/sd/j/jpathak/wind/time_means.npy'
-  global_means_path: '/pscratch/sd/j/jpathak/wind/global_means.npy'
-  global_stds_path:  '/pscratch/sd/j/jpathak/wind/global_stds.npy'
+  time_means_path:   "/workspace/data/stats/time_means.npy"
+  global_means_path: "/workspace/data/stats/global_means.npy"
+  global_stds_path:  "/workspace/data/stats/global_stds.npy"
 
   orography: !!bool False
   orography_path: None
@@ -73,10 +73,11 @@ afno_backbone: &backbone
   exp_dir: '/pscratch/sd/s/shas1693/results/era5_wind'
   train_data_path: '/pscratch/sd/s/shas1693/data/era5/train'
   valid_data_path: '/pscratch/sd/s/shas1693/data/era5/test'
-  inf_data_path:   '/pscratch/sd/s/shas1693/data/era5/out_of_sample'
-  time_means_path:   '/pscratch/sd/s/shas1693/data/era5/time_means.npy'
-  global_means_path: '/pscratch/sd/s/shas1693/data/era5/global_means.npy'
-  global_stds_path:  '/pscratch/sd/s/shas1693/data/era5/global_stds.npy'
+  inf_data_path:  "/workspace/data" 
+  time_means_path:   "/workspace/data/stats/time_means.npy" # backbone time means
+  global_means_path: "/workspace/data/stats/global_means.npy"
+  global_stds_path:  "/workspace/data/stats/global_stds.npy"
+
 
 afno_backbone_orography: &backbone_orography 
   <<: *backbone
@@ -120,12 +121,20 @@ precip: &precip
   out_channels: [0]
   nettype: 'afno'
   nettype_wind: 'afno'
-  log_to_wandb: !!bool True
+  log_to_wandb: !!bool False
   lr: 2.5E-4
   batch_size: 64
   max_epochs: 25
-  precip: '/pscratch/sd/p/pharring/ERA5/precip/total_precipitation'
-  time_means_path_tp: '/pscratch/sd/p/pharring/ERA5/precip/total_precipitation/time_means.npy'
-  model_wind_path: '/pscratch/sd/s/shas1693/results/era5_wind/afno_backbone_finetune/0/training_checkpoints/best_ckpt.tar'
+  precip: "/workspace" # parent directory of "out_of_sample" test data directory
+  time_means_path_tp: "/workspace/data/stats/time_means_tp.npy"
+  model_wind_path: "/workspace/checkpoints/precip.ckpt"
   precip_eps: !!float 1e-5
-
+  # consider moving the following to constants
+  era5_source: "gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3/"
+  era5_vars: ['geopotential',  'temperature', 'u_component_of_wind', 'v_component_of_wind', 
+            "2m_dewpoint_temperature", '10m_u_component_of_wind', '10m_v_component_of_wind',
+            '2m_temperature', 'mean_sea_level_pressure', 'surface_pressure', 
+            'total_column_water_vapour', 'u_component_of_wind', 'v_component_of_wind', 
+            'geopotential']
+  time_sel: ["2023-01-01T00:00:00", "2023-01-01T06:00:00", "2023-01-01T12:00:00", "2023-01-01T18:00:00"]
+  pressure_level: [50, 500, 850, 1000] 
diff --git a/data_process/parallel_copy_small_set.py b/data_process/parallel_copy_small_set.py
index 20c9e03..f81603e 100644
--- a/data_process/parallel_copy_small_set.py
+++ b/data_process/parallel_copy_small_set.py
@@ -106,6 +106,33 @@ def writetofile(src, dest, channel_idx, varslist, src_idx=0, frmt='nc'):
             mins = (ttot - 3600*hrs)//60
             secs = (ttot - 3600*hrs - 60*mins)
             channel_idx += 1 
+            
+
+def writetofile_simplest(src, dest, channel_idx, variable_name):
+    """Write era5 data from nc file to hdf5 with channels in the right order for FourCastNet.
+    
+    Args: 
+        src - str: path to the source nc file
+        dest - str: path to the destination hdf5 file
+        channel_idx - int: index of the channel in the target hdf5 file
+        variable_name - str: variable to copy from the source file
+    """
+    batch = 4
+    nfeatures = 20
+    latlon = (721, 1440)
+    
+    with h5py.File(dest, 'a') as fdest:
+        if "fields" not in fdest:
+            shape = (batch, nfeatures, *latlon)
+            dtype = "float32"
+            fdest.create_dataset("fields", shape, dtype=dtype)
+    
+    fsrc = DS(src, 'r', format="NETCDF4").variables[variable_name]
+    
+    with h5py.File(dest, 'a') as fdest:
+        fdest['fields'][:, channel_idx, :, :] = fsrc[:]
+            
+
 filestr = 'oct_2021_19_31'
 dest = '/global/cscratch1/sd/jpathak/21var/oct_2021_19_21.h5'
 
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 90985d2..300d249 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -14,7 +14,11 @@ RUN pip install wandb && \
     pip install ruamel.yaml && \
     pip install --upgrade tqdm && \
     pip install timm && \
-    pip install einops
+    pip install einops && \
+    pip install zarr && \
+    pip install netCDF4 && \
+    pip install xarray && \
+    pip install gcsfs
 
 # benchy
 RUN pip install git+https://github.com/romerojosh/benchy.git
@@ -29,11 +33,8 @@ COPY copernicus /opt/ERA5_wind/copernicus
 COPY docker /opt/ERA5_wind/docker
 COPY networks /opt/ERA5_wind/networks
 COPY utils /opt/ERA5_wind/utils
-COPY plotting /opt/ERA5_wind/plotting
-COPY mpu /opt/ERA5_wind/mpu
 COPY *.py /opt/ERA5_wind/
 COPY *.sh /opt/ERA5_wind/
-COPY perf_tests /opt/perf_tests
 
 # create dummy git image
 RUN cd /opt/ERA5_wind && git init
diff --git a/utils/data_loader_multifiles.py b/utils/data_loader_multifiles.py
index eae359f..c1bbefa 100644
--- a/utils/data_loader_multifiles.py
+++ b/utils/data_loader_multifiles.py
@@ -113,6 +113,7 @@ def _get_files_stats(self):
     self.files_paths = glob.glob(self.location + "/*.h5")
     self.files_paths.sort()
     self.n_years = len(self.files_paths)
+    logging.info(f"Inference files paths: {self.files_paths}")
     with h5py.File(self.files_paths[0], 'r') as _f:
         logging.info("Getting file stats from {}".format(self.files_paths[0]))
         self.n_samples_per_year = _f['fields'].shape[0]

From 6ea0c87c4fcdc3dc581c24b65a8656a321b6ed62 Mon Sep 17 00:00:00 2001
From: drlyamzin <dmitry@benuccilab.net>
Date: Wed, 29 Jan 2025 07:10:42 +0000
Subject: [PATCH 2/2] enabling precipitation inference: added data download
 script

---
 README.md                          |  8 ++++++--
 data_process/data_for_inference.py | 31 ++++++++++++++++++++++++++++++
 download_data.sh                   |  7 +++++++
 3 files changed, 44 insertions(+), 2 deletions(-)
 create mode 100644 data_process/data_for_inference.py
 create mode 100644 download_data.sh

diff --git a/README.md b/README.md
index 12ed099..4513736 100644
--- a/README.md
+++ b/README.md
@@ -11,8 +11,12 @@ cd FourCastNet
 
 NOTE: Assumes nvidia runtime is available for docker.
 
-```docker build -f docker/Dockerfile -t fourcastnet .```
-```docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 --runtime nvidia -v ${PWD}:/workspace -it fourcastnet:latest```
+```
+docker build -f docker/Dockerfile -t fourcastnet .
+```
+```
+docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 --runtime nvidia -v ${PWD}:/workspace -it fourcastnet:latest
+```
 
 2. Copy data and checkpoints from the bucket. 
 
diff --git a/data_process/data_for_inference.py b/data_process/data_for_inference.py
new file mode 100644
index 0000000..19cf8aa
--- /dev/null
+++ b/data_process/data_for_inference.py
@@ -0,0 +1,31 @@
+import xarray as xr
+import argparse
+import os
+from utils.YParams import YParams
+
+def get_era5_data(params):
+    """Load the ERA5 data from zarr file, subselect variables and time range, save as nc file.
+    """
+    
+    era5_zarr = xr.open_zarr(params.era5_source)
+    all_vars = params.era5_vars
+    era5_data = era5_zarr.sel(time=params.time_sel, level=params.pressure_level)[all_vars]
+    era5_data = era5_data.compute()
+    era5_data.to_netcdf("data/era5_data.nc")
+    return
+    
+
+def main(params):
+    # load the data
+    get_era5_data(params)
+    # format for inference
+    # save as h5
+    return
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--yaml_config", default='./config/AFNO.yaml', type=str)
+    args = parser.parse_args()
+    params = YParams(os.path.abspath(args.yaml_config), 'full_field')
+    main(params)
\ No newline at end of file
diff --git a/download_data.sh b/download_data.sh
new file mode 100644
index 0000000..9754228
--- /dev/null
+++ b/download_data.sh
@@ -0,0 +1,7 @@
+# download variable stats and checkpoints from our bucket, see README for original source
+
+set -e
+mkdir -p data/stats
+mkdir -p checkpoints
+gsutil -m cp -r gs://borealis-models/fourcastnet/stats/* data/stats/
+gsutil -m cp -r gs://borealis-models/fourcastnet/checkpoints/* checkpoints/
\ No newline at end of file