{ "cells": [ { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from glob import glob\n", "import xarray as xr\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "fdir = '/home/www/oggm/cmip5-ng'\n", "fl = []\n", "fl_2100 = []\n", "fl_2300 = []\n", "for x in os.walk(fdir):\n", " for y in glob(os.path.join(x[0], '*.nc')):\n", " # exclude the redundant folder!\n", " if 'redundant' not in y:\n", " # the gcsm until 2300 are separately in 2300/ subfolders\n", " if '2300' in y:\n", " fl_2300.append(y)\n", " else:\n", " fl_2100.append(y)" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "df_2100 = pd.DataFrame(fl_2100, columns=['path'])\n", "df_2100.columns = ['path']\n", "\n", "df_2300 = pd.DataFrame(fl_2300, columns=['path'])\n", "df_2300.columns = ['path']" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "for i, p in df_2100.iterrows():\n", " p = p['path'].replace(fdir, '')\n", " fn = p.split('/')[-1]\n", " df_2100.loc[i, 'fname'] = fn\n", " assert len(fn.split('_')) == 6\n", " assert fn.split('_')[1] == 'mon'\n", " df_2100.loc[i, 'var'] = fn.split('_')[0]\n", " df_2100.loc[i, 'gcm'] = fn.split('_')[2]\n", " df_2100.loc[i, 'rcp'] = fn.split('_')[3]\n", " df_2100.loc[i, 'rea'] = fn.split('_')[4]\n", "\n", "### \n", "for i, p in df_2300.iterrows():\n", " p = p['path'].replace(fdir, '')\n", " fn = p.split('/')[-1]\n", " df_2300.loc[i, 'fname'] = fn\n", " # there is no _g025 in the 2300 GCM names\n", " assert len(fn.split('_')) == 5\n", " assert fn.split('_')[1] == 'mon'\n", " df_2300.loc[i, 'var'] = fn.split('_')[0]\n", " df_2300.loc[i, 'gcm'] = fn.split('_')[2]\n", " df_2300.loc[i, 'rcp'] = fn.split('_')[3]\n", " df_2300.loc[i, 'rea'] = fn.split('_')[4][:-3]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Note that the newer downloaded 2300 GCMs have another grid than the 2100 GCMs which are 2.5° bilinear interpolated (and are thus named `_g025.nc`**\n", "Therefore, we use for the 2100 simulations the `_g025.nc` simulations where possible. There is just one GCM until 2300, which has not yet existed until 2100. We will add that one to the `all_gcm_list_2100.csv` " ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:    (time: 5412, bnds: 2, lat: 96, lon: 144)\n",
       "Coordinates:\n",
       "  * time       (time) object 1850-01-16 12:00:00 ... 2300-12-16 12:00:00\n",
       "  * lat        (lat) float64 -90.0 -88.11 -86.21 -84.32 ... 86.21 88.11 90.0\n",
       "  * lon        (lon) float64 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5\n",
       "Dimensions without coordinates: bnds\n",
       "Data variables:\n",
       "    time_bnds  (time, bnds) object 1850-01-01 00:00:00 ... 2301-01-01 00:00:00\n",
       "    lat_bnds   (lat, bnds) float64 -90.0 -89.05 -89.05 ... 89.05 89.05 90.0\n",
       "    lon_bnds   (lon, bnds) float64 -1.25 1.25 1.25 3.75 ... 356.2 356.2 358.8\n",
       "    pr         (time, lat, lon) float32 ...\n",
       "Attributes: (12/27)\n",
       "    institution:            Norwegian Climate Centre\n",
       "    institute_id:           NCC\n",
       "    experiment_id:          rcp45\n",
       "    source:                 NorESM1-M 2011  atmosphere: CAM-Oslo (CAM4-Oslo-n...\n",
       "    model_id:               NorESM1-M\n",
       "    forcing:                GHG, SA, Oz, Sl, Vl, BC, OC\n",
       "    ...                     ...\n",
       "    title:                  NorESM1-M model output prepared for CMIP5 RCP4.5\n",
       "    parent_experiment:      pre-industrial control\n",
       "    modeling_realm:         atmos\n",
       "    realization:            1\n",
       "    cmor_version:           2.6.0\n",
       "    modification:           Downloaded from Copernicus (Copernicus Climate Ch...
" ], "text/plain": [ "\n", "Dimensions: (time: 5412, bnds: 2, lat: 96, lon: 144)\n", "Coordinates:\n", " * time (time) object 1850-01-16 12:00:00 ... 2300-12-16 12:00:00\n", " * lat (lat) float64 -90.0 -88.11 -86.21 -84.32 ... 86.21 88.11 90.0\n", " * lon (lon) float64 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5\n", "Dimensions without coordinates: bnds\n", "Data variables:\n", " time_bnds (time, bnds) object ...\n", " lat_bnds (lat, bnds) float64 ...\n", " lon_bnds (lon, bnds) float64 ...\n", " pr (time, lat, lon) float32 ...\n", "Attributes: (12/27)\n", " institution: Norwegian Climate Centre\n", " institute_id: NCC\n", " experiment_id: rcp45\n", " source: NorESM1-M 2011 atmosphere: CAM-Oslo (CAM4-Oslo-n...\n", " model_id: NorESM1-M\n", " forcing: GHG, SA, Oz, Sl, Vl, BC, OC\n", " ... ...\n", " title: NorESM1-M model output prepared for CMIP5 RCP4.5\n", " parent_experiment: pre-industrial control\n", " modeling_realm: atmos\n", " realization: 1\n", " cmor_version: 2.6.0\n", " modification: Downloaded from Copernicus (Copernicus Climate Ch..." ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xr.open_dataset(df_2300.loc[df_2300.gcm=='NorESM1-M'].iloc[0].path)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2300 files: \n", "[-90. -88.10526316 -86.21052632 -84.31578947 -82.42105263\n", " -80.52631579 -78.63157895 -76.73684211 -74.84210526 -72.94736842]\n", "_g025.nc files: \n", "[-88.75 -86.25 -83.75 -81.25 -78.75 -76.25 -73.75 -71.25 -68.75 -66.25]\n" ] }, { "data": { "text/plain": [ "(None, None)" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# here you see the different grids\n", "print('2300 files: '), print(xr.open_dataset(df_2300.loc[df_2300.gcm=='NorESM1-M'].iloc[0].path).lat[:10].values)\n", "print('_g025.nc files: '), print(xr.open_dataset(df_2100.loc[df_2100.gcm=='NorESM1-M'].iloc[0].path).lat[:10].values)" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "df_2100['interpolation'] = '_g025' # bilinear latitude-longitude grid\n", "df_2300['interpolation'] = 'regular' # \"Regular latitude-longitude grid\"\n", "\n", "for i, p in df_2100.iterrows():\n", " with xr.open_dataset(p['path'], use_cftime=True) as ds:\n", " df_2100.loc[i, 'y0'] = str(ds['time.year'][0].data)\n", " assert str(ds['time.month'][0].data) == '1'\n", " df_2100.loc[i, 'y1'] = str(ds['time.year'][-1].data)\n", " assert str(ds['time.month'][-1].data) == '12'\n", " df_2100.loc[i, 'lon_resolution'] = '{:.2f}'.format(float(ds.lon[1] - ds.lon[0]))\n", " \n", "for i, p in df_2300.iterrows():\n", " with xr.open_dataset(p['path'], use_cftime=True) as ds:\n", " df_2300.loc[i, 'y0'] = str(ds['time.year'][0].data)\n", " assert str(ds['time.month'][0].data) == '1'\n", " df_2300.loc[i, 'y1'] = str(ds['time.year'][-1].data)\n", " assert str(ds['time.month'][-1].data) == '12'\n", " df_2300.loc[i, 'lon_resolution'] = '{:.2f}'.format(float(ds.lon[1] - ds.lon[0]))\n" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CESM1-CAM5rcp26\n", "CESM1-CAM5rcp45\n", "CESM1-CAM5rcp60\n" ] } ], "source": [ "for gcm_rcp in (df_2300['gcm'] + df_2300['rcp']).unique():\n", " if gcm_rcp not in (df_2100['gcm'] + df_2100['rcp']).unique():\n", " print(gcm_rcp)\n", "# CESM1-CAM5 is missing in the GCMs until 2100 \n", "df_2100x = pd.concat([df_2100, df_2300.loc[df_2300.gcm == 'CESM1-CAM5']])" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array(['r1i1p1'], dtype=object), array(['r1i1p1'], dtype=object))" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2100x.rea.unique(), df_2300.rea.unique()" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array(['rcp45', 'historicalGHG', 'rcp60', 'historicalNat', 'rcp26',\n", " 'rcp85'], dtype=object),\n", " array(['rcp26', 'rcp85', 'rcp45', 'rcp60'], dtype=object))" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2100x.rcp.unique(), df_2300.rcp.unique()" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array(['pr', 'tas'], dtype=object), array(['pr', 'tas'], dtype=object))" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2100x['var'].unique(), df_2300['var'].unique()" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['NorESM1-M', 'CanESM2', 'CCSM4', 'IPSL-CM5A-LR', 'CNRM-CM5',\n", " 'CSIRO-Mk3-6-0', 'GISS-E2-R', 'MPI-ESM-LR', 'GFDL-CM3',\n", " 'GFDL-ESM2G', 'CESM1-CAM5'], dtype=object)" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2100x.gcm.unique()" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['CanESM2', 'MPI-ESM-LR', 'CESM1-CAM5', 'CSIRO-Mk3-6-0',\n", " 'NorESM1-M', 'CCSM4'], dtype=object)" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2300.gcm.unique()" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "assert np.all(df_2300.y1=='2300')\n", "df_2100x = df_2100x.loc[df_2100x.y1!='2005']\n", "df_2100x.to_csv(os.path.join(fdir, 'all_gcm_list_2100.csv'))\n", "df_2300.to_csv(os.path.join(fdir, 'all_gcm_list_2300.csv'))\n", "df = pd.concat([df_2100, df_2300])\n", "df.to_csv(os.path.join(fdir, 'all_gcm_list.csv'))" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "df = df.sort_values(by=['fname',\n", " 'var'], ascending=True)\n", "with open(os.path.join(fdir, 'all_gcm_table.html'), 'w') as fo:\n", " df.to_html(fo, columns=[\"fname\", \"gcm\", \"rcp\", \"rea\", \"var\", \"interpolation\", 'lon_resolution', \"y0\", \"y1\"])" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2100\n", " rcp26 rcp45 rcp60 rcp85\n", "CCSM4 X X X X\n", "CESM1-CAM5 X X X \n", "CNRM-CM5 X X X\n", "CSIRO-Mk3-6-0 X X X X\n", "CanESM2 X X X\n", "GFDL-CM3 X X X X\n", "GFDL-ESM2G X X X X\n", "GISS-E2-R X X X X\n", "IPSL-CM5A-LR X X X X\n", "MPI-ESM-LR X X X\n", "NorESM1-M X X X X\n", "\n", "\n", "2300\n", " rcp26 rcp45 rcp60 rcp85\n", "CCSM4 X X\n", "CESM1-CAM5 X X X \n", "CSIRO-Mk3-6-0 X X\n", "CanESM2 X \n", "MPI-ESM-LR X X X\n", "NorESM1-M X \n", "\n", "\n" ] } ], "source": [ "for _df,endyr in zip([df_2100x, df_2300], ['2100', '2300']):\n", " odf = pd.DataFrame()\n", " for gcm in _df.gcm.unique():\n", " s = _df.loc[_df.gcm == gcm]\n", " for rcp in s.rcp.unique():\n", " ss = s.loc[s.rcp == rcp]\n", " assert ss['var'].str.contains('pr').sum() == 1\n", " assert ss['var'].str.contains('tas').sum() == 1\n", " odf.loc[gcm, rcp] = 'X'\n", " odf = odf[sorted(odf.columns)]\n", " odf = odf.fillna('')\n", " odf = odf.sort_index()\n", " with open(os.path.join(fdir, f'gcm_table_{endyr}.html'), 'w') as fo:\n", " odf.to_html(fo)\n", " print(endyr)\n", " print(odf)\n", " print('\\n')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 4 }