{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0",
   "metadata": {},
   "source": [
    "# Why meteora?\n",
    "\n",
    "While the API (and especially the underlying code) of meteora may appear rather abstract, the library serves a very simple purpose: making retrieving data from meteorological stations from different sources *as easy as possible* (or at least, this is what we intend).\n",
    "\n",
    "Let us illustrate the purpose of meteora with a very simple use case. Imagine you want to get meteorological observations for the Zurich (Switzerland) area. A good starting point is always [the Global Historical Climatology Network hourly (GHCNh)](https://www.ncei.noaa.gov/products/global-historical-climatology-network-hourly) dataset:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1",
   "metadata": {
    "tags": [
     "hide-input"
    ]
   },
   "outputs": [],
   "source": [
    "import datetime as dt\n",
    "import os\n",
    "import types\n",
    "\n",
    "import contextily as cx\n",
    "import geopandas as gpd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import xarray as xr\n",
    "import xclim.indices as xci\n",
    "from dask import diagnostics\n",
    "from matplotlib import cm\n",
    "from matplotlib import colors as mcolors\n",
    "\n",
    "from meteora import clients, climate_indices, settings\n",
    "\n",
    "\n",
    "# Suppress Dask progress bars used by the GHCNh client.\n",
    "class _NoProgressBar:\n",
    "    def __init__(self, *args, **kwargs):\n",
    "        pass\n",
    "\n",
    "    def __enter__(self):\n",
    "        return self\n",
    "\n",
    "    def __exit__(self, exc_type, exc, tb):\n",
    "        return False\n",
    "\n",
    "    def register(self):\n",
    "        pass\n",
    "\n",
    "    def unregister(self):\n",
    "        pass\n",
    "\n",
    "\n",
    "diagnostics.ProgressBar = _NoProgressBar\n",
    "\n",
    "\n",
    "def _get_first_crs(client_dict):\n",
    "    return list(client_dict.values())[0].CRS\n",
    "\n",
    "\n",
    "def plot_single_axis(client_dict, *, crs=None, ax=None):\n",
    "    \"\"\"Plot stations from different clients in a single figure.\"\"\"\n",
    "    if crs is None:\n",
    "        crs = _get_first_crs(client_dict)\n",
    "    if ax is None:\n",
    "        _, ax = plt.subplots()\n",
    "\n",
    "    pd.concat(\n",
    "        [\n",
    "            client.stations_gdf.assign(source=client_label).to_crs(crs)\n",
    "            for client_label, client in client_dict.items()\n",
    "        ]\n",
    "    ).plot(\"source\", legend=True, ax=ax)\n",
    "    cx.add_basemap(ax, crs=crs, attribution=False)\n",
    "\n",
    "    return ax\n",
    "\n",
    "\n",
    "def plot_multi_axes(client_dict, *, crs=None, col_wrap=2, colors=None):\n",
    "    \"\"\"Plot stations from different clients in a grid of subplots.\"\"\"\n",
    "    n_rows = len(client_dict) // col_wrap\n",
    "    if crs is None:\n",
    "        crs = _get_first_crs(client_dict)\n",
    "    if len(client_dict) % col_wrap != 0:\n",
    "        n_rows += 1\n",
    "    if colors is None:\n",
    "        colors = sns.color_palette()\n",
    "    fig, axes = plt.subplots(\n",
    "        nrows=n_rows,\n",
    "        ncols=col_wrap,\n",
    "        sharex=True,\n",
    "        sharey=True,\n",
    "        figsize=(col_wrap * figwidth, n_rows * figheight),\n",
    "    )\n",
    "    for (client_label, client), ax, color in zip(\n",
    "        client_dict.items(), axes.flat, colors\n",
    "    ):\n",
    "        client.stations_gdf.to_crs(crs).plot(ax=ax, color=color)\n",
    "        ax.set_title(client_label)\n",
    "\n",
    "    xmin, xmax = np.inf, -np.inf\n",
    "    ymin, ymax = np.inf, -np.inf\n",
    "    for ax in axes.flat:\n",
    "        xmin = min(xmin, ax.get_xlim()[0])\n",
    "        xmax = max(xmax, ax.get_xlim()[1])\n",
    "        ymin = min(ymin, ax.get_ylim()[0])\n",
    "        ymax = max(ymax, ax.get_ylim()[1])\n",
    "\n",
    "    for ax in axes.flat:\n",
    "        ax.set_xlim(xmin, xmax)\n",
    "        ax.set_ylim(ymin, ymax)\n",
    "        cx.add_basemap(ax, crs=crs, attribution=False)\n",
    "\n",
    "    return fig\n",
    "\n",
    "\n",
    "def plot_ts_by_source(ts_df, variables, *, hue=\"source\"):\n",
    "    \"\"\"Plot time series for multiple variables with a shared legend.\"\"\"\n",
    "    fig, axes = plt.subplots(\n",
    "        ncols=len(variables),\n",
    "        sharex=True,\n",
    "        sharey=False,\n",
    "        figsize=(2 * figwidth, figheight),\n",
    "    )\n",
    "    legend_flags = [False] * len(variables)\n",
    "    if legend_flags:\n",
    "        legend_flags[-1] = True\n",
    "    plot_df = ts_df.reset_index()\n",
    "    for variable, ax, legend in zip(variables, axes, legend_flags):\n",
    "        sns.lineplot(plot_df, x=\"time\", y=variable, hue=hue, ax=ax, legend=legend)\n",
    "        ax.tick_params(axis=\"x\", labelrotation=45)\n",
    "    if legend_flags:\n",
    "        sns.move_legend(axes[-1], \"center left\", bbox_to_anchor=(1.05, 0.5))\n",
    "    fig.tight_layout()\n",
    "    return fig\n",
    "\n",
    "\n",
    "def get_seasonal_ts_df(client, start_year, end_year, start_month, end_month, variables):\n",
    "    \"\"\"Get multi-year seasonal time series data frame.\"\"\"\n",
    "    return pd.concat(\n",
    "        [\n",
    "            client.get_ts_df(\n",
    "                variables,\n",
    "                dt.date(year, start_month, 1),\n",
    "                dt.datetime.combine(\n",
    "                    dt.date(year, end_month + 1, 1) - dt.timedelta(days=1),\n",
    "                    dt.time.max,\n",
    "                ),\n",
    "            )\n",
    "            for year in range(start_year, end_year + 1)\n",
    "        ],\n",
    "        axis=\"rows\",\n",
    "    )\n",
    "\n",
    "\n",
    "def plot_tn_facetgrid(\n",
    "    tn_gdf,\n",
    "    *,\n",
    "    vmin=None,\n",
    "    vmax=None,\n",
    "    cmap=None,\n",
    "    colorbar_label=\"N. tropical nights\",\n",
    "    **plot_kwargs,\n",
    "):\n",
    "    \"\"\"Plot tropical nights in a FacetGrid with a shared colorbar.\"\"\"\n",
    "    with sns.axes_style(\"white\"):\n",
    "        g = sns.FacetGrid(tn_gdf, row=\"source\", col=\"time\")\n",
    "\n",
    "    def _gdf_plot(data, **kwargs):\n",
    "        _kwargs = kwargs.copy()\n",
    "        _kwargs.pop(\"color\", None)\n",
    "        data.plot(\"tn\", ax=plt.gca(), **_kwargs)\n",
    "\n",
    "    g.map_dataframe(_gdf_plot, vmin=vmin, vmax=vmax, cmap=cmap, **plot_kwargs)\n",
    "\n",
    "    for ax in g.axes.flat:\n",
    "        cx.add_basemap(ax, crs=tn_gdf.crs, attribution=False)\n",
    "\n",
    "    for ax in g.axes.flat:\n",
    "        ax.axes.get_xaxis().set_visible(False)\n",
    "        ax.axes.get_yaxis().set_ticks([])\n",
    "        for spine in [\"bottom\", \"left\"]:\n",
    "            ax.spines[spine].set_visible(False)\n",
    "\n",
    "    g.set_titles(col_template=\"{col_name}\", row_template=\"{row_name}\")\n",
    "    for ax_row in g.axes:\n",
    "        ax_row[0].set_ylabel(ax_row[0].get_title().split(\" | \")[0])\n",
    "    for ax in g.axes[0]:\n",
    "        ax.set_title(ax.get_title().split(\" | \")[1])\n",
    "    for ax_row in g.axes[1:]:\n",
    "        for ax in ax_row:\n",
    "            ax.set_title(\"\")\n",
    "\n",
    "    g.tight_layout()\n",
    "    norm = mcolors.Normalize(vmin=vmin, vmax=vmax)\n",
    "    sm = cm.ScalarMappable(norm=norm, cmap=cmap)\n",
    "    cax = g.figure.add_axes([1, 0.3, 0.02, 0.4])\n",
    "    g.figure.colorbar(sm, cax=cax, label=colorbar_label)\n",
    "\n",
    "    return g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2",
   "metadata": {},
   "outputs": [],
   "source": [
    "region = \"Zurich, Switzerland\"\n",
    "\n",
    "netatmo_client_id = os.getenv(\"NETATMO_CLIENT_ID\", default=\"\")\n",
    "netatmo_client_secret = os.getenv(\"NETATMO_CLIENT_SECRET\", default=\"\")\n",
    "netatmo_stations_filepath = \"data/zurich-netatmo-cws.gpkg\"\n",
    "era5_filepath = \"data/8.34-47.28-8.67-47.54_era5_2m_temperature_m06-m08_2022-2024.nc\"\n",
    "crs = \"epsg:4326\"\n",
    "\n",
    "# viz args\n",
    "figwidth, figheight = plt.rcParams[\"figure.figsize\"]\n",
    "cmap = \"coolwarm\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3",
   "metadata": {},
   "outputs": [],
   "source": [
    "ghcnh_client = clients.GHCNHourlyClient(region)\n",
    "ax = ghcnh_client.stations_gdf.plot()\n",
    "cx.add_basemap(ax, crs=ghcnh_client.stations_gdf.crs, attribution=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4",
   "metadata": {},
   "source": [
    "*(C) OpenStreetMap contributors, Tiles style by Humanitarian OpenStreetMap Team hosted by OpenStreetMap France*\n",
    "\n",
    "What if we wanted more stations? In this case, we could also try the [Swiss Federal Office of Meteorology and Climatology (MeteoSwiss)](https://www.meteoswiss.admin.ch) via the `MeteoSwissClient`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5",
   "metadata": {},
   "outputs": [],
   "source": [
    "meteoswiss_client = clients.MeteoSwissClient(region)\n",
    "_ = plot_single_axis({\"GHCNh\": ghcnh_client, \"MeteoSwiss\": meteoswiss_client})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6",
   "metadata": {},
   "source": [
    "*(C) OpenStreetMap contributors, Tiles style by Humanitarian OpenStreetMap Team hosted by OpenStreetMap France*\n",
    "\n",
    "As we can see, using both clients improves the spatial density of the stations. What if we wanted even more? We can add the data from the Office for Waste, Water, Energy and Air (AWEL) of the canton of Zurich via the `AWELClient`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7",
   "metadata": {},
   "outputs": [],
   "source": [
    "awel_client = clients.AWELClient(region)\n",
    "client_dict = {\n",
    "    \"GHCNh\": ghcnh_client,\n",
    "    \"MeteoSwiss\": meteoswiss_client,\n",
    "    \"AWEL\": awel_client,\n",
    "}\n",
    "ax = plot_single_axis(\n",
    "    client_dict,\n",
    ")\n",
    "sns.move_legend(ax, \"center left\", bbox_to_anchor=(1, 0.5))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8",
   "metadata": {},
   "source": [
    "*(C) OpenStreetMap contributors, Tiles style by Humanitarian OpenStreetMap Team hosted by OpenStreetMap France*\n",
    "\n",
    "Can we still improve the spatial density of stations? In many cases, yes - enter citizen weather stations (CWS). Meteora features the `NetatmoClient`, which makes it easy to access public data from Netatmo weather stations (one of the major CWS providers). Let us focus on the city of Zurich:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9",
   "metadata": {},
   "outputs": [],
   "source": [
    "use_netatmo_files = not netatmo_client_id or not netatmo_client_secret\n",
    "\n",
    "if use_netatmo_files:\n",
    "    # emulate the client `stations_gdf` attribute\n",
    "    netatmo_client = types.SimpleNamespace(\n",
    "        stations_gdf=gpd.read_file(netatmo_stations_filepath).set_index(\n",
    "            settings.STATIONS_ID_COL\n",
    "        )\n",
    "    )\n",
    "else:\n",
    "    netatmo_client = clients.NetatmoClient(\n",
    "        region, netatmo_client_id, netatmo_client_secret\n",
    "    )\n",
    "\n",
    "client_dict = {\n",
    "    \"GHCNh\": ghcnh_client,\n",
    "    \"MeteoSwiss\": meteoswiss_client,\n",
    "    \"AWEL\": awel_client,\n",
    "    \"Netatmo\": netatmo_client,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plot_multi_axes(client_dict)\n",
    "fig.tight_layout()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "11",
   "metadata": {},
   "source": [
    "*(C) OpenStreetMap contributors, Tiles style by Humanitarian OpenStreetMap Team hosted by OpenStreetMap France*\n",
    "\n",
    "As we can see, CWS can vastly improve the spatial density of weather observation, especially in urban areas of Europe and North America. Note that it is strongly recommended to quality-control (QC) CWS data before any climatological analysis - see [a section dedicated to CWS QC with Meteora](netatmo-qc.ipynb) for more details on the subject.\n",
    "\n",
    "To sum up: despite great standardization efforts of global climatological datasets such as GHCNh, there can be many other sources of meteorological data. *The central objective of meteora is to provide a standardized API and data representation format for meteorological stations data in Python*.\n",
    "\n",
    "Then, given a list of climate variables and time period of interest, assemble the observations from the above sources into a single data frame:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "12",
   "metadata": {},
   "outputs": [],
   "source": [
    "variables = [\"temperature\", \"relative_humidity\"]\n",
    "start = \"08-13-2021\"\n",
    "end = \"08-15-2021\"\n",
    "\n",
    "# from now on compare MeteoSwiss and AWEL data only\n",
    "ts_client_dict = {\n",
    "    \"MeteoSwiss\": meteoswiss_client,\n",
    "    \"AWEL\": awel_client,\n",
    "}\n",
    "\n",
    "ts_df = pd.concat(\n",
    "    [\n",
    "        ts_client_dict[client_label]\n",
    "        .get_ts_df(variables, start, end)\n",
    "        .assign(source=client_label)\n",
    "        for client_label in ts_client_dict\n",
    "    ]\n",
    ")\n",
    "_ = plot_ts_by_source(ts_df, variables)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "13",
   "metadata": {},
   "source": [
    "## Why this matters? an example on computing urban climate indices\n",
    "\n",
    "Nowadays the computation of climate indices often relies on global gridded products such as ERA5 reanalysis. However, these datasets can have major limitations, especially regarding the smoothing of local weather weather extremes due to interpolation (both spatial and temporal) {cite}`wagner2025stationbench`. These issues are exacerbated by the major spatial gaps in the underlying climate observation networks in developing regions such as Africa {cite}`wmo2020gbon,vancutsem2010modis,vandegiesen2014tahmo`. Similarly, the observation networks in urban areas are too spatially sparse to accurately capture urban climate phenomena {cite}`muller2013sensors,baklanov2018urban`. Accordingly, reanalysis datasets have a rather coarse spatial resolution (e.g. 31 km for ERA5) which is likely too coarse given the spatial heterogeneity of cities and their impact on local climatic conditions.\n",
    "\n",
    "To overcome this, Meteora allows to easily retrieve data from multiple meteorological stations networks, which can then be used to compute climate indices at a higher spatial resolution. We will illustrate this by computing the number of tropical nights (i.e. nights where the minimum temperature remains above 20 °C) for June, July and August (JJA) for the years 2022 to 2024 using both ERA5 reanalysis data (downloaded with the [\"A01 - ERA5 data download\" notebook](a01-era5-download.ipynb)) as well as local meteorological stations data from MeteoSwiss and AWEL:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14",
   "metadata": {},
   "outputs": [],
   "source": [
    "# input data (temporal range)\n",
    "start_year = 2022\n",
    "end_year = 2024\n",
    "start_month = 6\n",
    "end_month = 8\n",
    "# tropical night parameters\n",
    "thresh = \"20.0 degC\"\n",
    "freq = \"YS\"\n",
    "\n",
    "# read ERA5 for the study region\n",
    "era5_ds = xr.open_dataset(era5_filepath)\n",
    "\n",
    "# compute tropical nights with xclim\n",
    "era5_tn = xci.tn_days_above(\n",
    "    era5_ds.resample(time=\"D\").min()[\"t2m\"], thresh=thresh, freq=freq\n",
    ").rename(\"N. tropical nights\")\n",
    "era5_tn = era5_tn.assign_coords(year=era5_tn[\"time\"].dt.year).swap_dims(\n",
    "    {\"time\": \"year\"}\n",
    ")\n",
    "# plot the results\n",
    "era5_tn.plot(\n",
    "    col=\"year\",\n",
    "    cmap=cmap,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "15",
   "metadata": {},
   "source": [
    "We will now use Meteora to retrieve temperature data from MeteoSwiss and AWEL stations for the same period and compute tropical nights from these observations:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16",
   "metadata": {},
   "outputs": [],
   "source": [
    "tn_dfs = []\n",
    "for label, client in ts_client_dict.items():\n",
    "    ts_df = get_seasonal_ts_df(\n",
    "        client,\n",
    "        start_year,\n",
    "        end_year,\n",
    "        start_month,\n",
    "        end_month,\n",
    "        \"temperature\",\n",
    "    )\n",
    "    tn_df = (\n",
    "        climate_indices.tn_days_above(\n",
    "            ts_df,\n",
    "            thresh=thresh,\n",
    "            freq=freq,\n",
    "        )\n",
    "        .stack()\n",
    "        .rename(\"tn\")\n",
    "        .reset_index()\n",
    "        .assign(source=label)\n",
    "    )\n",
    "    # set geometry\n",
    "    tn_df[\"geometry\"] = tn_df[\"station_id\"].map(\n",
    "        client.stations_gdf[\"geometry\"].to_crs(crs)\n",
    "    )\n",
    "    # set time as year only\n",
    "    tn_df[\"time\"] = tn_df[\"time\"].dt.year\n",
    "    # append it to the list\n",
    "    tn_dfs.append(tn_df)\n",
    "# put it together as a geo-data frame\n",
    "tn_gdf = gpd.GeoDataFrame(pd.concat(tn_dfs, ignore_index=True))\n",
    "\n",
    "# get global min/max values\n",
    "vmin = tn_gdf[\"tn\"].min()\n",
    "vmax = tn_gdf[\"tn\"].max()\n",
    "\n",
    "plot_tn_facetgrid(tn_gdf, vmin=vmin, vmax=vmax, cmap=cmap, edgecolor=\"k\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "17",
   "metadata": {},
   "source": [
    "*(C) OpenStreetMap contributors, Tiles style by Humanitarian OpenStreetMap Team hosted by OpenStreetMap France*\n",
    "\n",
    "In order to better appreciate the difference, we will plot the ERA5-based number of tropical nights using the same color scale as above:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "18",
   "metadata": {},
   "outputs": [],
   "source": [
    "# note that vmin and vmax come from the cell above in order to use the same color scale\n",
    "grid = era5_tn.plot(\n",
    "    col=\"year\",\n",
    "    cmap=cmap,\n",
    "    vmin=vmin,\n",
    "    vmax=vmax,\n",
    "    add_colorbar=False,\n",
    ")\n",
    "\n",
    "cbar = grid.fig.colorbar(grid._mappables[0], ax=grid.axs.ravel().tolist())\n",
    "cbar.set_label(\"N. tropical nights\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "19",
   "metadata": {},
   "source": [
    "This shows how ERA5 underestimates the number of tropical nights compared to local meteorological stations and highlights the added value of using local meteorological stations data to compute urban climate indices.\n",
    "\n",
    "## References\n",
    "\n",
    "```{bibliography}\n",
    ":filter: docname in docnames\n",
    "```"
   ]
  }
 ],
 "metadata": {
  "jupytext": {
   "cell_metadata_filter": "tags,-all"
  },
  "kernelspec": {
   "display_name": "Python (Pixi)",
   "language": "python",
   "name": "pixi-kernel-python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.11"
  },
  "pixi-kernel": {
   "environment": "doc"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}