{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "415a8b22-4c2c-4af6-8aeb-c3d57c65af12", "metadata": {}, "outputs": [], "source": [ "# updated script version for flattening W5E5 v2025.11.25\n", "# before it was 2023.3 (for 11900 glaciers not the nearest climate gridpoint was chosen in v2022.2, but should now work \n", "# in this v2023.2)\n", "\n", "#- from: https://data.isimip.org/search/query/w5e5/climate_forcing/w5e5v2.0/climate_variable/pr/climate_variable/orog/climate_variable/tas/\n", "#- download in terminal: `wget -i download_W5E5_script.txt` \n", "\n", "###\n", "# for the monthly files, I had to do that here to remove the time variable from lon/lat\n", "# with xr.open_dataset(w5e5_files[n]) as _d:\n", "# _d['latitude'] = _d.latitude.isel(time=0, drop=True)\n", "# _d['longitude'] = _d.longitude.isel(time=0, drop=True)\n", "# _d.to_netcdf(isimip3a_files[n][:-3]+'x.nc')\n", "\n", "import xarray as xr\n", "import numpy as np\n", "import pandas as pd\n", "from oggm import utils\n", "\n", "import sys\n", "import matplotlib.pyplot as plt\n", "import os\n", "\n", "version = '2025.11.25'\n", "v = '_v'+version\n", "res = 0.5" ] }, { "cell_type": "code", "execution_count": null, "id": "06aa4812-f5cd-474f-84c7-307d3b9f80b6", "metadata": {}, "outputs": [], "source": [ "\n", "\n", "#### this part here is the same for all W5E5 / ISIMIP3a/ISIMIP3b files\n", "ds_inv = xr.open_dataset('orog_W5E5v2.0.nc')\n", "ds_inv = ds_inv.rename({'lat': 'latitude'})\n", "ds_inv = ds_inv.rename({'lon': 'longitude'})\n", "ds_inv.coords['longitude'] = np.where(ds_inv.longitude.values < 0,\n", " ds_inv.longitude.values + 360,\n", " ds_inv.longitude.values)\n", "ds_inv = ds_inv.sortby(ds_inv.longitude)\n", "ds_inv.longitude.attrs['units'] = 'degrees_onlypositive'\n", "\n", "nx, ny = ds_inv.sizes['longitude'], ds_inv.sizes['latitude']\n", "# get the dataset where coordinates of glaciers are stored\n", "frgi_6 = utils.file_downloader('https://cluster.klima.uni-bremen.de/~oggm/rgi/rgi62_stats.h5')\n", "odf_6 = pd.read_hdf(frgi_6, index_col=0)\n", "#### glacier complexes\n", "frgi_7C = utils.file_downloader('https://cluster.klima.uni-bremen.de/~oggm/rgi/RGI2000-v7.0-C-global-attributes.csv')\n", "odf_7C = pd.read_csv(frgi_7C, index_col=0)\n", "#### glaciers \n", "frgi_7G = utils.file_downloader('https://cluster.klima.uni-bremen.de/~oggm/rgi/RGI2000-v7.0-G-global-attributes.csv')\n", "odf_7G = pd.read_csv(frgi_7G, index_col=0)\n", "\n", "odf_l = []\n", "for odf,n in zip([odf_6,odf_7C,odf_7G],['RGI6', 'RGI7C', 'RGI7G']):\n", " # need to repeat this for every option here to have one flattened file that has the necessary points for all three versions\n", " \n", " if (n == 'RGI7C') or (n=='RGI7G'):\n", " # has slightly different column names ... \n", " odf['CenLon'] = odf['cenlon']\n", " odf['CenLat'] = odf['cenlat']\n", " odf['Area'] = odf['area_km2']\n", " # attention the rgi62 stats are in longitude from -179 to 180 \n", " assert odf.CenLon.max() <180\n", " assert odf.CenLon.min() <-175 \n", " cenlon_for_bins = np.where(odf['CenLon'] < 0, odf['CenLon']+360, odf['CenLon'])\n", " # Nearest neighbor lookup --> W5E5 is 0.5° grid, ERA5 is 0.25° grid ... --> here we use 0.125\n", " lon_bins = np.linspace(ds_inv.longitude.data[0] - res/2, ds_inv.longitude.data[-1] + res/2, nx+1)\n", " # !!! attention W5E5 and ERA5 sorted from 90 to -90 !!!!\n", " lat_bins = np.linspace(ds_inv.latitude.data[0] + res/2, ds_inv.latitude.data[-1] - res/2, ny+1)\n", " assert np.diff(lon_bins).min() == res\n", " assert np.diff(lon_bins).max() == res\n", " odf['lon_id'] = np.digitize(cenlon_for_bins, lon_bins)-1\n", " odf['lat_id'] = np.digitize(odf['CenLat'], lat_bins)-1\n", " # if a glacier has a longitude above or equal 359.875, it hould be again in the first bin \n", " # thus if \"lon_id\" ==1440 it is actually in the first bin ... \n", " odf.loc[odf['lon_id'] == nx, 'lon_id'] = 0\n", " \n", " odf['lon_val'] = ds_inv.longitude.data[odf.lon_id]\n", " odf['lat_val'] = ds_inv.latitude.data[odf.lat_id]\n", " # Use unique grid points as index and compute the area per location\n", " odf['unique_id'] = ['{:03d}_{:03d}'.format(lon, lat) for (lon, lat) in zip(odf['lon_id'], odf['lat_id'])]\n", " mdf = odf.drop_duplicates(subset='unique_id').set_index('unique_id')\n", " mdf['Area'] = odf.groupby('unique_id').sum()['Area']\n", " if n == 'RGI7C':\n", " print('{} ––– Total number of glacier complexes: {} and number of W5E5/ISIMIP3ab gridpoints with glacier complexes in them: {}'.format(n, len(odf), len(mdf)))\n", " else:\n", " print('{} ––– Total number of glaciers: {} and number of W5E5/ISIMIP3ab gridpoints with glaciers in them: {}'.format(n, len(odf), len(mdf)))\n", "\n", " # at the end, we want to have one \n", " odf['rgi_v'] = n\n", " odf_l.append(odf.copy())\n", "odf_all = pd.concat(odf_l)\n", "mdf = odf_all.drop_duplicates(subset='unique_id').set_index('unique_id')\n", "mdf['Area'] = odf_all.groupby('unique_id').sum()['Area']\n", "print('All versions ––– Total number of glacier IDs: {} and number of W5E5/ISIMIP3ab gridpoints with glaciers in them: {}'.format(len(odf_all), len(mdf)))\n", "\n", "# this is the mask that we need to remove all non-glacierized gridpoints\n", "mask = np.full((ny, nx), np.NaN)\n", "mask[mdf['lat_id'], mdf['lon_id']] = 1 \n", "ds_inv['glacier_mask'] = (('latitude', 'longitude'), np.isfinite(mask))\n", "\n", "# check the distance to the gridpoints-> it should never be larger than \n", "diff_lon = ds_inv.longitude.data[odf.lon_id] - odf.CenLon\n", "# if the distance is 360 -> it is the same as 0,\n", "diff_lon = np.where(np.abs(diff_lon - 360) < 170, diff_lon-360, diff_lon)\n", "odf['ll_dist_to_point'] = ((diff_lon)**2 + (ds_inv.latitude.data[odf.lat_id] - odf.CenLat)**2)**0.5\n", "assert odf['ll_dist_to_point'].max() < ((res/2)**2 + (res/2)**2)**0.5\n", "\n", "### from here it is different for every script ...\n", "\n", "\n", "make_daily = True\n", "if make_daily:\n", " \n", " try:\n", " os.mkdir('tmp_pr')\n", " os.mkdir('tmp_tas')\n", " except:\n", " pass\n", "\n", " yss = [1979, 1981, 1991, 2001, 2011]\n", " yee = [1980, 1990, 2000, 2010, 2019]\n", " for var in ['pr', 'tas']:\n", " for ys, ye in zip(yss, yee):\n", " ds = xr.open_dataset('{}_W5E5v2.0_{}0101-{}1231.nc'.format(var, ys, ye))\n", "\n", " ds = ds.rename({'lon':'longitude'}).rename({'lat':'latitude'})\n", " ds.coords['longitude'] = np.where(ds.longitude.values < 0,\n", " ds.longitude.values + 360,\n", " ds.longitude.values)\n", "\n", " ds = ds.sortby(ds.longitude)\n", " ds.longitude.attrs['units'] = 'degrees_onlypositive'\n", "\n", " # this takes too long !!!\n", " # ds_merged_glaciers = xr_prcp.where(ds_inv['glacier_mask'], drop = True)\n", "\n", " ds['ASurf'] = ds_inv['orog'] #ds_inv['ASurf']\n", " if ys==1979 and var =='pr':\n", "\n", " # as we use here only those gridpoints where glaciers are involved, need to put the mask on dsi as well!\n", " # dsi = ds_inv.where(ds_inv['glacier_mask'], drop = True) # this makes out of in total 6483600 points only 116280 points!!!\n", " dsi = ds.isel(time=[0]).where(ds_inv['glacier_mask'], drop = True)\n", " # we do not want any dependency on latitude and longitude\n", " dsif = dsi.stack(points=('latitude', 'longitude')).reset_index(('points'))\n", " #dsif # so far still many points \n", "\n", " # drop the non-glacierized points\n", " dsifs = dsif.where(np.isfinite(dsi[var].stack(points=('latitude',\n", " 'longitude')).reset_index(('time',\n", " 'points'))), drop=True)\n", " # I have to drop the 'time_' dimension, to be equal to the era5_land example, because the invariant file should not have any time dependence !\n", " dsifs = dsifs.drop_vars(var)\n", " dsifs = dsifs.drop('time')\n", " try:\n", " dsifs = dsifs.drop('time_')\n", " except:\n", " pass\n", " dsifs.attrs['version'] = version\n", " if version == '2025.11.25':\n", " dsifs.attrs['note'] = 'includes points from RGI6, RGI7C and RGI7G central longitude and latitude'\n", " dsifs.to_netcdf(f'../flattened/{version}/w5e5v2.0_glacier_invariant_flat{v}.nc')\n", "\n", " #check if gridpoint nearest to hef is right!!!\n", " # happened once that something went wrong here ...\n", " lon, lat = (10.7584, 46.8003)\n", " #orog = xr.open_dataset('/home/lilianschuster/Downloads/w5e5v2.0_glacier_invariant_flat.nc').ASurf\n", " orog = dsifs.ASurf\n", " c = (orog.longitude - lon)**2 + (orog.latitude - lat)**2\n", " surf_hef = orog.isel(points=c.argmin())\n", " np.testing.assert_allclose(surf_hef, 2250, rtol=0.1)\n", " \n", " # check wrong glacier in previous version\n", " # RGI60-19.00124\n", " lon,lat = (-70.8931 +360, -72.4474)\n", " orog = dsifs.ASurf\n", " c = (orog.longitude - lon)**2 + (orog.latitude - lat)**2\n", " surf_g = orog.isel(points=c.argmin())\n", " np.testing.assert_allclose(surf_g.longitude, lon, rtol=0.2)\n", " np.testing.assert_allclose(surf_g.latitude, lat, rtol=0.2)\n", " \n", " \n", " ds = ds.drop_vars('ASurf')\n", "\n", " for t in ds.time.values[:]: # ds_merged_glaciers.time.values: .sel(time=slice('1901-04','2016-12'))\n", " # produce a temporary file for each month\n", " sel_l = ds.sel(time=[t])\n", " # don't do the dropping twice!!!!\n", " #sel = sel_l.where(ds_inv['glacier_mask'], drop = True)\n", " sel = sel_l.where(ds_inv['glacier_mask'])\n", " sel = sel.stack(points=('latitude', 'longitude')).reset_index(('time', 'points'))\n", " sel = sel.where(np.isfinite(sel[var]), drop=True)\n", " sel.to_netcdf('tmp_{}/tmp_{}.nc'.format(var, str(t)[:10]))\n", "\n", "\n", "###\n", "\n", "# rename precipitation to 'pr'\n", "# make yearly files\n", "for var in ['pr', 'tas']:\n", " for y in np.arange(1979, 2020):\n", " dso_y = xr.open_mfdataset('tmp_{}/tmp_{}-*.nc'.format(var, str(y)),\n", " concat_dim='time', combine='nested') # .rename_vars({'time_':'time'})\n", " dso_y.to_netcdf('tmp_{}/tmp2_{}.nc'.format(var, str(y)))\n", " dso_y.close()\n", " \n" ] }, { "cell_type": "code", "execution_count": null, "id": "5ddeebbd-70df-40d6-9b24-bbf7154129c4", "metadata": {}, "outputs": [], "source": [ "# aggregate yearly files\n", "for var in ['pr', 'tas']:\n", " dso_all2 = xr.open_mfdataset('tmp_{}/tmp2_*.nc'.format(var),\n", " concat_dim='time', combine='nested', parallel = True) #.rename_vars({'time_':'time'})\n", " \n", " dso_all2.attrs['history'] = 'longitudes to 0 -> 360, only glacier gridpoints chosen and flattened latitude/longitude --> points'\n", " dso_all2.attrs['postprocessing_date'] = str(np.datetime64('today','D'))\n", " dso_all2.attrs['postprocessing_scientist'] = 'lilian.schuster@uibk.ac.at'\n", " dso_all2.attrs['version'] = version\n", " if version == '2025.11.25':\n", " dso_all2.attrs['note'] = 'includes points from RGI6, RGI7C and RGI7G central longitude and latitude'\n", " dso_all2.to_netcdf(f'../flattened/{version}/daily/w5e5v2.0_{var}_global_daily_flat_glaciers_1979_2019{v}.nc')" ] }, { "cell_type": "code", "execution_count": 4, "id": "e10dc303-341a-4c8e-bdab-8df43f8f72af", "metadata": {}, "outputs": [], "source": [ "\n", " \n", "# make monthly files \n", "for var in ['pr', 'tas']:\n", "\n", " pathi= f'../flattened/{version}/daily/w5e5v2.0_{var}_global_daily_flat_glaciers_1979_2019{v}.nc'\n", " ds = xr.open_dataset(pathi)\n", "\n", "\n", " ds_monthly = ds.resample(time='MS').mean(keep_attrs=True)\n", "\n", " ds_monthly.attrs['postprocessing_scientist'] = 'lilian.schuster@.uibk.ac.at'\n", " ds_monthly.attrs['postprocessing_actions'] = (\"using xarray: ds_monthly = ds.resample(time='MS', keep_attrs=True).mean(keep_attrs=True)\\n\" \"ds_monthly.to_netcdf()\\n\")\n", " ds_monthly.attrs['version'] = version\n", " if version == '2025.11.25':\n", " ds_monthly.attrs['note'] = 'includes points from RGI6, RGI7C and RGI7G central longitude and latitude'\n", " ds_monthly.to_netcdf(f'../flattened/{version}/monthly/w5e5v2.0_{var}_global_monthly_flat_glaciers_1979_2019{v}.nc')\n", " \n", " if var == 'tas':\n", " # also compute monthly daily std:\n", " \n", " # attention- >this below is slightly wrong, it creates lon/ lat =0 because the std of lon/lat is zero\n", " # ds_tas_daily_std = _d.resample(time='MS').std(keep_attrs=True)\n", " # instead only do std over tas and add lat/lon afterwards again ...\n", " ds_tas_daily_std = ds.tas.resample(time='MS').std(keep_attrs=True)\n", " ds_tas_daily_std['latitude'] = ds.latitude\n", " ds_tas_daily_std['longitude'] = ds.longitude\n", " ds_tas_daily_std = ds_tas_daily_std.to_dataset()\n", " \n", " ds_tas_daily_std = ds_tas_daily_std.rename_vars(dict(tas='tas_std'))\n", " # now have to change variable tas to tas_std and its attributes \n", " ds_tas_daily_std.tas_std.attrs['standard_name'] = 'air_temperature_daily_std'\n", " ds_tas_daily_std.tas_std.attrs['long_name'] = 'Near-Surface Air Temperature daily standard deviation'\n", " ds_tas_daily_std.attrs['postprocessing_date'] = str(np.datetime64('today','D'))\n", " ds_tas_daily_std.attrs['postprocessing_scientist'] = 'lilian.schuster@.uibk.ac.at'\n", " ds_tas_daily_std.attrs['version'] = version\n", " if version == '2025.11.25':\n", " ds_tas_daily_std.attrs['note'] = 'includes points from RGI6, RGI7C and RGI7G central longitude and latitude'\n", " ds_tas_daily_std.to_netcdf(f'../flattened/{version}/monthly/w5e5v2.0_tas_std_global_monthly_flat_glaciers_1979_2019{v}.nc')\n", " \n", " \n", " \n", "#import os\n", "import glob\n", "for var in ['pr', 'tas']:\n", " files = glob.glob('tmp_{}/tmp*'.format(var))\n", " for f in files:\n", " os.remove(f)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "6f7f197c-959f-41be-822f-5cc171818401", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:oggm_env_2025]", "language": "python", "name": "conda-env-oggm_env_2025-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.14" } }, "nbformat": 4, "nbformat_minor": 5 }