A01 - ERA5 data download#
This notebook downloads ERA5 air temperature from the ECMWF Data Stores API for a
given bounding box and period. It uses pooch to manage a local cache and avoid
re-downloading files.
TODO: try using Earthmover’s arraylake as an alternative approach to access ERA5 data https://docs.earthmover.io/sample-data/era5
import os
from os import path
import pooch
import xarray as xr
from ecmwf.datastores import Client
Provide ecmwf_key below or set ECMWF_DATASTORES_KEY/CDSAPI_KEY in the environment.
The default ecmwf_url points to the CDS API base (https://cds.climate.copernicus.eu/api).
region = [8.34, 47.28, 8.67, 47.54]
# select study period
start_year = 2022
end_year = 2024
# months to consider when querying the data
start_month = 6
end_month = 8
# ECMWF Data Stores Service
ecmwf_key = None # optional override (e.g., "<uid>:<api_key>")
ecmwf_url = "https://cds.climate.copernicus.eu/api"
era5_dataset = "reanalysis-era5-single-levels"
era5_variable = "2m_temperature"
cache_dir = None # optional, otherwise use pooch default
# output file
era5_filename = (
f"{'-'.join([str(coord) for coord in region])}_era5_{era5_variable}_"
f"m{start_month:02d}-m{end_month:02d}_{start_year}-{end_year}.nc"
)
dst_dir = "data"
dst_filepath = path.join(dst_dir, era5_filename)
Now we will download (or retrieve it from a local cache) our requested ERA5 data:
_ecmwf_key = ecmwf_key or os.getenv("ECMWF_DATASTORES_KEY") or os.getenv("CDSAPI_KEY")
if _ecmwf_key is None:
print("ECMWF API key not provided; skipping ERA5 download")
era5_ds = None
else:
west, south, east, north = region
era5_request = {
"product_type": "reanalysis",
"variable": era5_variable,
"year": [str(y) for y in range(start_year, end_year + 1)],
"month": [f"{m:02d}" for m in range(start_month, end_month + 1)],
"day": [f"{d:02d}" for d in range(1, 32)],
"time": [f"{h:02d}:00" for h in range(24)],
"area": [north, west, south, east],
"data_format": "netcdf",
"download_format": "unarchived",
}
client = Client(url=ecmwf_url, key=_ecmwf_key)
def _era5_downloader(url, output_file, pooch_obj, check_only=False):
if check_only:
return
client.retrieve(era5_dataset, era5_request, target=str(output_file))
era5_path = pooch.retrieve(
url=ecmwf_url,
known_hash=None,
fname=era5_filename,
path=cache_dir,
downloader=_era5_downloader,
)
era5_ds = xr.open_dataset(era5_path)
if era5_ds is not None:
# rename vaid_time to time
era5_ds = era5_ds.rename({"valid_time": "time"})
# and dump it to a netcdf file:
era5_ds.to_netcdf(dst_filepath)
ECMWF API key not provided; skipping ERA5 download