A01 - ERA5 data download#

This notebook downloads ERA5 air temperature from the ECMWF Data Stores API for a given bounding box and period. It uses pooch to manage a local cache and avoid re-downloading files.

TODO: try using Earthmover’s arraylake as an alternative approach to access ERA5 data https://docs.earthmover.io/sample-data/era5

import os
from os import path

import pooch
import xarray as xr
from ecmwf.datastores import Client

Provide ecmwf_key below or set ECMWF_DATASTORES_KEY/CDSAPI_KEY in the environment. The default ecmwf_url points to the CDS API base (https://cds.climate.copernicus.eu/api).

region = [8.34, 47.28, 8.67, 47.54]
# select study period
start_year = 2022
end_year = 2024
# months to consider when querying the data
start_month = 6
end_month = 8

# ECMWF Data Stores Service
ecmwf_key = None  # optional override (e.g., "<uid>:<api_key>")
ecmwf_url = "https://cds.climate.copernicus.eu/api"
era5_dataset = "reanalysis-era5-single-levels"
era5_variable = "2m_temperature"
cache_dir = None  # optional, otherwise use pooch default

# output file
era5_filename = (
    f"{'-'.join([str(coord) for coord in region])}_era5_{era5_variable}_"
    f"m{start_month:02d}-m{end_month:02d}_{start_year}-{end_year}.nc"
)
dst_dir = "data"
dst_filepath = path.join(dst_dir, era5_filename)

Now we will download (or retrieve it from a local cache) our requested ERA5 data:

_ecmwf_key = ecmwf_key or os.getenv("ECMWF_DATASTORES_KEY") or os.getenv("CDSAPI_KEY")

if _ecmwf_key is None:
    print("ECMWF API key not provided; skipping ERA5 download")
    era5_ds = None
else:
    west, south, east, north = region
    era5_request = {
        "product_type": "reanalysis",
        "variable": era5_variable,
        "year": [str(y) for y in range(start_year, end_year + 1)],
        "month": [f"{m:02d}" for m in range(start_month, end_month + 1)],
        "day": [f"{d:02d}" for d in range(1, 32)],
        "time": [f"{h:02d}:00" for h in range(24)],
        "area": [north, west, south, east],
        "data_format": "netcdf",
        "download_format": "unarchived",
    }

    client = Client(url=ecmwf_url, key=_ecmwf_key)

    def _era5_downloader(url, output_file, pooch_obj, check_only=False):
        if check_only:
            return
        client.retrieve(era5_dataset, era5_request, target=str(output_file))

    era5_path = pooch.retrieve(
        url=ecmwf_url,
        known_hash=None,
        fname=era5_filename,
        path=cache_dir,
        downloader=_era5_downloader,
    )
    era5_ds = xr.open_dataset(era5_path)

if era5_ds is not None:
    # rename vaid_time to time
    era5_ds = era5_ds.rename({"valid_time": "time"})
    # and dump it to a netcdf file:
    era5_ds.to_netcdf(dst_filepath)
ECMWF API key not provided; skipping ERA5 download