{ "cells": [ { "cell_type": "markdown", "id": "dffd70cd-400c-4634-afb7-2afdf7f23209", "metadata": {}, "source": [ "# Flatten daily or monthly ERA5 files and select only glaciated gridpoints from RGI6, RGI7C and RGI7G\n", "\n", "--> currently from 1940-01 to 2025-12\n", "\n", "In http://nbviewer.org/urls/cluster.klima.uni-bremen.de/~oggm/climate/notebooks/flatten_glacier_gridpoint_tests.ipynb, we check if the flattening approach works as it should. \n" ] }, { "cell_type": "code", "execution_count": 1, "id": "83acffa7-6af9-402e-92df-a24ccd179136", "metadata": {}, "outputs": [], "source": [ "import xarray as xr\n", "import numpy as np\n", "import pandas as pd\n", "from oggm import utils\n", "\n", "import sys\n", "import matplotlib.pyplot as plt\n", "import os\n", "import glob\n" ] }, { "cell_type": "code", "execution_count": 20, "id": "5299a1f3-1437-47bc-bdda-871264d1f807", "metadata": {}, "outputs": [], "source": [ "resolution = 'Daily' # Choose here Monthly ... or Daily\n", "y0 = 2025 # 1940\n", "y1 = 2025 #2024\n", "### this version includes RGI6, RGI7C and RGI7G\n", "v = 'v2026.01.14'\n", "res = 0.25 # resolution of ERA5\n", "path = '/home/www/oggm/climate/notebooks/era5_both/'\n", "\n", "if resolution == 'Daily':\n", " path_unflattened = '/home/www/oggm/climate/era5/daily/v1.2/unflattened/' \n", "else:\n", " path_unflattened = '/home/www/oggm/climate/era5/monthly/v1.2/unflattened/' \n" ] }, { "cell_type": "code", "execution_count": 21, "id": "ee88791e-13d8-4c11-909b-b679096e71e4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Size: 6kB\n", "array([ 90. , 89.75, 89.5 , ..., -89.5 , -89.75, -90. ])\n", "Coordinates:\n", " * latitude (latitude) float64 6kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0\n", " number int64 8B ...\n", " expver Size: 12kB\n", "array([0.0000e+00, 2.5000e-01, 5.0000e-01, ..., 3.5925e+02, 3.5950e+02,\n", " 3.5975e+02])\n", "Coordinates:\n", " * longitude (longitude) float64 12kB 0.0 0.25 0.5 0.75 ... 359.2 359.5 359.8\n", " number int64 8B ...\n", " expver = 0)\n", "nx, ny = ds_inv.sizes['longitude'], ds_inv.sizes['latitude']\n", "# get the dataset where coordinates of glaciers are stored\n", "frgi_6 = utils.file_downloader('https://cluster.klima.uni-bremen.de/~oggm/rgi/rgi62_stats.h5')\n", "odf_6 = pd.read_hdf(frgi_6, index_col=0)\n", "#### glacier complexes\n", "frgi_7C = utils.file_downloader('https://cluster.klima.uni-bremen.de/~oggm/rgi/RGI2000-v7.0-C-global-attributes.csv')\n", "odf_7C = pd.read_csv(frgi_7C, index_col=0)\n", "#### glaciers \n", "frgi_7G = utils.file_downloader('https://cluster.klima.uni-bremen.de/~oggm/rgi/RGI2000-v7.0-G-global-attributes.csv')\n", "odf_7G = pd.read_csv(frgi_7G, index_col=0)\n", "\n", "odf_l = []\n", "for odf,n in zip([odf_6,odf_7C,odf_7G],['RGI6', 'RGI7C', 'RGI7G']):\n", " # need to repeat this for every option here to have one flattened file that has the necessary points for all three versions\n", " \n", " if (n == 'RGI7C') or (n=='RGI7G'):\n", " # has slightly different column names ... \n", " odf['CenLon'] = odf['cenlon']\n", " odf['CenLat'] = odf['cenlat']\n", " odf['Area'] = odf['area_km2']\n", " # attention the rgi62 stats are in longitude from -179 to 180 \n", " assert odf.CenLon.max() <180\n", " assert odf.CenLon.min() <-175 \n", " cenlon_for_bins = np.where(odf['CenLon'] < 0, odf['CenLon']+360, odf['CenLon'])\n", " # Nearest neighbor lookup --> W5E5 is 0.5° grid, ERA5 is 0.25° grid ... --> here we use 0.125\n", " lon_bins = np.linspace(ds_inv.longitude.data[0] - res/2, ds_inv.longitude.data[-1] + res/2, nx+1)\n", " # !!! attention W5E5 and ERA5 sorted from 90 to -90 !!!!\n", " lat_bins = np.linspace(ds_inv.latitude.data[0] + res/2, ds_inv.latitude.data[-1] - res/2, ny+1)\n", " assert np.diff(lon_bins).min() == res\n", " assert np.diff(lon_bins).max() == res\n", " odf['lon_id'] = np.digitize(cenlon_for_bins, lon_bins)-1\n", " odf['lat_id'] = np.digitize(odf['CenLat'], lat_bins)-1\n", " # if a glacier has a longitude above or equal 359.875, it hould be again in the first bin \n", " # thus if \"lon_id\" ==1440 it is actually in the first bin ... \n", " odf.loc[odf['lon_id'] == nx, 'lon_id'] = 0\n", " \n", " odf['lon_val'] = ds_inv.longitude.data[odf.lon_id]\n", " odf['lat_val'] = ds_inv.latitude.data[odf.lat_id]\n", " # Use unique grid points as index and compute the area per location\n", " odf['unique_id'] = ['{:03d}_{:03d}'.format(lon, lat) for (lon, lat) in zip(odf['lon_id'], odf['lat_id'])]\n", " mdf = odf.drop_duplicates(subset='unique_id').set_index('unique_id')\n", " mdf['Area'] = odf.groupby('unique_id').sum()['Area']\n", " if n == 'RGI7C':\n", " print('{} ––– Total number of glacier complexes: {} and number of ERA5 gridpoints with glacier complexes in them: {}'.format(n, len(odf), len(mdf)))\n", " else:\n", " print('{} ––– Total number of glaciers: {} and number of ERA5 gridpoints with glaciers in them: {}'.format(n, len(odf), len(mdf)))\n", "\n", " # at the end, we want to have one \n", " odf['rgi_v'] = n\n", " odf_l.append(odf.copy())\n", "odf_all = pd.concat(odf_l)\n", "mdf = odf_all.drop_duplicates(subset='unique_id').set_index('unique_id')\n", "mdf['Area'] = odf_all.groupby('unique_id').sum()['Area']\n", "print('All versions ––– Total number of glacier IDs: {} and number of ERA5 gridpoints with glaciers in them: {}'.format(len(odf_all), len(mdf)))" ] }, { "cell_type": "code", "execution_count": 22, "id": "96fa21bb-6162-4ea5-8091-de185d58be12", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "532\n", "CenLon -141.670274\n", "CenLat 69.166921\n", "Name: 873_083, dtype: object\n" ] } ], "source": [ "# let's look into those gripoints that are new for RGI7C\n", "print(len(mdf.loc[mdf.rgi_v == 'RGI7C']))\n", "print(mdf.loc[mdf.rgi_v == 'RGI7C'].iloc[0][['CenLon','CenLat']])" ] }, { "cell_type": "code", "execution_count": 23, "id": "42363681-5200-4f73-96cc-0f179f83aa25", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "255\n", "CenLon -66.855668\n", "CenLat -67.535551\n", "Name: 1173_630, dtype: object\n" ] } ], "source": [ "# let's look into those gripoints that are new for RGI7G\n", "print(len(mdf.loc[mdf.rgi_v == 'RGI7G']))\n", "print(mdf.loc[mdf.rgi_v == 'RGI7G'].iloc[-1][['CenLon','CenLat']])" ] }, { "cell_type": "code", "execution_count": 24, "id": "3813435a-d58f-4582-819c-676e1dc017e7", "metadata": {}, "outputs": [], "source": [ "# check the distance to the gridpoints-> it should never be larger than the resolution of the chosen dataset (for ERA5 : 0.125)\n", "diff_lon = ds_inv.longitude.data[odf_all.lon_id] - odf_all.CenLon\n", "# if the distance is 360 -> it is the same as 0,\n", "diff_lon = np.where(np.abs(diff_lon - 360) < 170, diff_lon-360, diff_lon)\n", "odf_all['ll_dist_to_point'] = ((diff_lon)**2 + (ds_inv.latitude.data[odf_all.lat_id] - odf_all.CenLat)**2)**0.5\n", "assert odf_all['ll_dist_to_point'].max() < ((res/2)**2 + (res/2)**2)**0.5" ] }, { "cell_type": "code", "execution_count": 25, "id": "bbc660ee-c05c-41b1-b879-d29796fc2e48", "metadata": {}, "outputs": [], "source": [ "# this is the mask that we need to remove all non-glacierized gridpoints\n", "mask = np.full((ny, nx), np.NaN)\n", "mask[mdf['lat_id'], mdf['lon_id']] = 1 \n", "ds_inv['glacier_mask'] = (('latitude', 'longitude'), np.isfinite(mask))\n", "\n", "# check the distance to the gridpoints-> it should never be larger than the resolution of the chosen dataset (for ERA5 : 0.125)\n", "diff_lon = ds_inv.longitude.data[odf_all.lon_id] - odf_all.CenLon\n", "# if the distance is 360 -> it is the same as 0,\n", "diff_lon = np.where(np.abs(diff_lon - 360) < 170, diff_lon-360, diff_lon)\n", "odf_all['ll_dist_to_point'] = ((diff_lon)**2 + (ds_inv.latitude.data[odf_all.lat_id] - odf_all.CenLat)**2)**0.5\n", "assert odf_all['ll_dist_to_point'].max() < ((res/2)**2 + (res/2)**2)**0.5\n", "\n", "# somehow the geopotential has a date, but we want to remove tat\n", "ds_inv['z'] = ds_inv['z'].sel(valid_time=\"2020-01-01\") \n", "\n", "###### create the invariant file from the first Monthly dataset using one of the two variables\n", "if resolution == 'Monthly':\n", " var = 'tp'\n", " ds = xr.open_dataset(path_unflattened +f\"ERA5_Monthly_{var}_1940.nc\").isel(time=[0])\n", " # they are named already correctly, and are correctly going already from 0 to 360 \n", " # (so I removed all transformation steps)\n", " assert np.all(ds.longitude >= 0)\n", " assert np.all(ds.latitude <= 90)\n", " ds['z'] = ds_inv['z'].copy() #ds_inv['ASurf']\n", " ds['z'].attrs['name'] = 'ERA5 geopotential surface'\n", " # as we use here only those gridpoints where glaciers are involved, need to put the mask on dsi as well!\n", " # dsi = ds_inv.where(ds_inv['glacier_mask'], drop = True) # this makes out of in total 6483600 points only XXX points!!!\n", " dsi = ds.where(ds_inv['glacier_mask'], drop = True)\n", " # we do not want any dependency on latitude and longitude\n", " dsif = dsi.stack(points=('latitude', 'longitude')).reset_index(('points'))\n", " #dsif # so far still many points \n", " \n", " # drop the non-glacierized points\n", " dsifs = dsif.where(np.isfinite(dsi[var].stack(points=('latitude',\n", " 'longitude')).reset_index(('time',\n", " 'points'))), drop=True)\n", " # I have to drop the 'time_' dimension, to be equal to the era5_land example, because the invariant file should not have any time dependence !\n", " dsifs = dsifs.drop_vars(var)\n", " dsifs = dsifs.drop_vars(['number','expver']) # valid_time\n", " #dsifs = dsifs.drop_vars('time')\n", " # I don't want to drop the dimension as \"z\" is dependent on the timestep, but I want to remove the time dependency\n", " dsifs = dsifs.squeeze('time')\n", " dsifs = dsifs.drop_vars('time')\n", " #dsifs = dsifs.drop('time_')\n", " dsifs.attrs['version'] = v\n", " dsifs.attrs['history'] = dsifs.history + '; only glacier gridpoints chosen and flattened latitude/longitude --> points'\n", " dsifs.attrs['postprocessing_date'] = str(np.datetime64('today','D'))\n", " dsifs.attrs['postprocessing_scientist'] = 'lilian.schuster@uibk.ac.at'\n", " dsifs.attrs['note'] = 'includes points from RGI6, RGI7C and RGI7G central longitude and latitude'\n", " utils.mkdir(path + f'flattened/{v[1:8]}')\n", " dsifs.to_netcdf(f'../flattened/{v[1:8]}/era5_glacier_invariant_flat_{v}.nc')\n", " \n", " #check if gridpoint nearest to hef is right!!!\n", " # happened once that something went wrong here ...\n", " lon, lat = (10.7584, 46.8003)\n", " geo = dsifs.z\n", " c = (geo.longitude - lon)**2 + (geo.latitude - lat)**2\n", " geo_hef = geo.isel(points=c.argmin())\n", " np.testing.assert_allclose(geo_hef/9.8, 2250, rtol=0.1)\n", " \n", " # check another glacier\n", " # RGI60-19.00124\n", " lon,lat = (-70.8931 +360, -72.4474)\n", " geo = dsifs.z\n", " c = (geo.longitude - lon)**2 + (geo.latitude - lat)**2\n", " surf_g = geo.isel(points=c.argmin())\n", " np.testing.assert_allclose(surf_g.longitude, lon, atol=res/2)\n", " np.testing.assert_allclose(surf_g.latitude, lat, atol=res/2)\n", " \n", " ## check another glacier RGI60-19.01497\n", " lon,lat = (-179.917000+360, -67.400600)\n", " geo = dsifs.z\n", " c = (geo.longitude - lon)**2 + (geo.latitude - lat)**2\n", " surf_g = geo.isel(points=c.argmin())\n", " np.testing.assert_allclose(surf_g.longitude, lon, atol=res/2)\n", " np.testing.assert_allclose(surf_g.latitude, lat, atol=res/2)\n", "\n", " ##RGI60-11.03228 \t11 \t11-02 \tTaillon \t\n", " lon,lat = (-0.039683+360, 42.695419) \t\n", " if lon>360-res/2:\n", " # in that case we actually want to select longitude zero!\n", " lon = 0\n", " geo = dsifs.z\n", " c = (geo.longitude - lon)**2 + (geo.latitude - lat)**2\n", " surf_g = geo.isel(points=c.argmin())\n", " np.testing.assert_allclose(surf_g.longitude, lon, atol=res/2)\n", " np.testing.assert_allclose(surf_g.latitude, lat, atol=res/2)\n", "\n", " # RGI60-11.03229 \t-0.057456 \t\n", " lon,lat = (-0.057456+360, 42.695129) \t\n", " if lon>360-res/2:\n", " # in that case we actually want to select longitude zero!\n", " lon = 0\n", " geo = dsifs.z\n", " c = (geo.longitude - lon)**2 + (geo.latitude - lat)**2\n", " surf_g = geo.isel(points=c.argmin())\n", " np.testing.assert_allclose(surf_g.longitude, lon, atol=res/2)\n", " np.testing.assert_allclose(surf_g.latitude, lat, atol=res/2)" ] }, { "cell_type": "code", "execution_count": 26, "id": "c95cb54d-cc4e-47cb-bf24-83d1868be478", "metadata": {}, "outputs": [], "source": [ "# Now run it over all years \n", "try:\n", " os.mkdir(path + '/_raw_files/tmp_{}'.format(resolution.lower()))\n", "except:\n", " pass\n", "\n", "for y in np.arange(y0,y1+1): \n", " for var in ['t2m','tp']:\n", " if resolution == 'Monthly':\n", " ds = xr.open_dataset(path_unflattened + f\"ERA5_{resolution}_{var}_{y}.nc\")\n", " else:\n", " ds = xr.open_mfdataset(path_unflattened + f\"ERA5_{resolution}_{var}_{y}*.nc\")\n", " ds = ds.load()\n", " # they are named already correctly, and are correctly going already from 0 to 360 \n", " # (so I removed all transformation steps)\n", " assert np.all(ds.longitude >= 0)\n", " assert np.all(ds.latitude <= 90)\n", " # this takes too long !!!\n", " # ds_merged_glaciers = xr_prcp.where(ds_inv['glacier_mask'], drop = True)\n", " ds = ds.drop_vars(['number'])\n", " try:\n", " ds = ds.drop_vars(['expver'])\n", " except: \n", " pass\n", " for t in ds.time.values[:]: \n", " # produce a temporary file for each month (if \"Monthly\" or day if \"Daily\")\n", " sel_l = ds.sel(time=[t])\n", " # don't do the dropping twice!!!!\n", " #sel = sel_l.where(ds_inv['glacier_mask'], drop = True)\n", " sel = sel_l.where(ds_inv['glacier_mask'])\n", " sel = sel.stack(points=('latitude', 'longitude')).reset_index(('time', 'points'))\n", " sel = sel.where(np.isfinite(sel[var]), drop=True)\n", " try: \n", " sel = sel.drop_vars(['valid_time'])\n", " except:\n", " pass\n", " sel.to_netcdf(path+'_raw_files/tmp_{}/tmp_{}_{}.nc'.format(resolution.lower(),var, str(t)[:10]))\n", " sel.close()\n" ] }, { "cell_type": "code", "execution_count": 27, "id": "ec3ca152-e07a-438c-b63d-ae0d28a92338", "metadata": {}, "outputs": [], "source": [ "# make yearly files\n", "for var in ['t2m', 'tp']:\n", " for y in np.arange(y0, y1+1):\n", " dso_y = xr.open_mfdataset(path+'_raw_files/tmp_{}/tmp_{}_{}-*.nc'.format(resolution.lower(),var, str(y)),\n", " concat_dim='time', combine='nested') # .rename_vars({'time_':'time'})\n", " dso_y.to_netcdf(path+'_raw_files/tmp_{}/tmp2_{}_{}.nc'.format(resolution.lower(),var, str(y)))\n", " dso_y.close()\n", " " ] }, { "cell_type": "code", "execution_count": 44, "id": "1052625d-391e-489e-9d8f-d34aa8fd8434", "metadata": {}, "outputs": [], "source": [ "for var in ['tp', 't2m']:\n", " files = sorted(glob.glob(path + f'/_raw_files/tmp_{resolution.lower()}/tmp2_{var}*.nc'))\n", " ds_list = []\n", "\n", " for f in files:\n", " ds = xr.open_dataset(f)\n", " # drop variables that could break append\n", " ds = ds.drop_vars(['expver','number','valid_time'], errors='ignore')\n", " ds = ds.reset_coords() # not drop =True, as I still need the latitude, longitudes\n", " ds.load()\n", " if resolution == 'Monthly':\n", " if ds.time.size < 12:\n", " if f == files[-1]: # only do this if it is the last file\n", " m_last = f'-{int(ds.time.size):02d}'\n", " # pad last file if monthly --> actually not necessary\n", " #ds = pad_months_after_last(ds, expected_n=12)\n", " else:\n", " raise ValueError(f\"File {f} has only {ds.time.size} months, but it is NOT the last file!\")\n", " else:\n", " m_last = ''\n", " if resolution == 'Daily':\n", " if ds.time.size <28:\n", " raise ValueError(f\"File {f} has less than 28 days, something wrong\")\n", " # I will do the complete test later ...\n", " if f == files[-1]:\n", " # this is only if some months are missing for the most recent year\n", " last_timestamp = ds.time.values[-1]\n", " _m = last_timestamp.astype('datetime64[M]').astype(int) % 12 + 1\n", " _d = (last_timestamp - last_timestamp.astype('datetime64[M]')).astype('timedelta64[D]').astype(int) + 1\n", " if _m==12 and _d==31:\n", " m_last = ''\n", " else:\n", " m_last = f'-{int(_m):02d}'\n", " ds_list.append(ds)\n", "\n", " # concatenate all along time\n", " ds_all = xr.concat(ds_list, dim='time', data_vars='all')\n", "\n", " # make lat/lon time-independent ... (I don't know at which step, lat/lon time-dependence got inside ...)\n", " lon_1d = ds_all['longitude'].isel(time=0).drop_vars('time') \n", " lat_1d = ds_all['latitude'].isel(time=0).drop_vars('time') \n", " # first check that they are really time-independent\n", " assert np.all(ds_all['latitude'].values == ds_all['latitude'].isel(time=0).values)\n", " assert np.all(ds_all['longitude'].values == ds_all['longitude'].isel(time=0).values)\n", " ds_all = ds_all.assign_coords(\n", " longitude=lon_1d,\n", " latitude=lat_1d)\n", "\n", " \n", " # update attributes\n", " ds_all.attrs['history'] = ds_all.history + '; only glacier gridpoints chosen and flattened latitude/longitude --> points'\n", " ds_all.attrs['postprocessing_date'] = str(np.datetime64('today','D'))\n", " ds_all.attrs['postprocessing_scientist'] = 'lilian.schuster@uibk.ac.at'\n", " ds_all.attrs['version'] = v\n", " ds_all.attrs['note'] = 'includes points from RGI6, RGI7C and RGI7G central longitude and latitude'\n", "\n", " # write once\n", " utils.mkdir(path + f'flattened/{v[1:8]}/{resolution.lower()}')\n", " out_path = path + f'flattened/{v[1:8]}/{resolution.lower()}/era5_{var}_global_{resolution.lower()}_{y0}_{y1}{m_last}_flat_glaciers_{v}.nc'\n", " # this got manually moved then to e.g.: '/home/www/oggm/climate/era5/daily/v1.2/flattened/era5_t2m_global_daily_1940_2025_flat_glaciers_v2026.01.14.nc'\n", " ds_all.to_netcdf(out_path, format='NETCDF4', engine='netcdf4', unlimited_dims=['time'])\n" ] }, { "cell_type": "code", "execution_count": null, "id": "dd07b61a-8b79-408a-b9bb-ea8acb1621bd", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 62, "id": "9f6dde4d-aaf6-4577-87de-a5d5e307ccfe", "metadata": {}, "outputs": [], "source": [ "adding_single_year = False\n", "if adding_single_year:\n", " for var in ['tp', 't2m']:\n", " out_path = path + f'flattened/{v[1:8]}/{resolution.lower()}/era5_{var}_global_{resolution.lower()}_{y0}_{y1}{m_last}_flat_glaciers_{v}.nc'\n", " ds_all = xr.open_dataset(out_path)\n", " \n", " ds = xr.open_dataset(f'/home/www/oggm/climate/era5/daily/v1.2/flattened/era5_{var}_global_daily_1940_2024_flat_glaciers_v2025.11.25.nc')\n", " mask = ~ds.time.isin(ds_all.time)\n", " ds_not_in_all = ds.sel(time=mask)\n", " ds_all.attrs['postprocessing_date'] = ds.attrs['postprocessing_date']\n", " ds_correct = xr.concat([ds_not_in_all, ds_all], dim='time')\n", " ds_correct.to_netcdf(f'/home/www/oggm/climate/era5/daily/v1.2/flattened/era5_{var}_global_daily_1940_{y1}_flat_glaciers_{v}.nc',\n", " format='NETCDF4', engine='netcdf4', unlimited_dims=['time'])\n" ] }, { "cell_type": "code", "execution_count": 94, "id": "65d8bac5-ec7d-4399-98ed-b1c77781ae78", "metadata": {}, "outputs": [], "source": [ "# check if all years / months / days are there ... \n", "_y0 = 1940 \n", "for var in ['tp', 't2m']: \n", " ds = xr.open_dataset(f'/home/www/oggm/climate/era5/daily/v1.2/flattened/era5_{var}_global_daily_{_y0}_{y1}_flat_glaciers_{v}.nc')\n", " \n", " # Expected full daily range\n", " expected_daily = pd.date_range(\n", " start=f\"{_y0}-01-01\",\n", " end=f\"{y1}-12-31\",\n", " freq=\"D\"\n", " )\n", " # actual time series \n", " actual = pd.to_datetime(ds_correct[\"time\"].values)\n", " # missing \n", " missing = expected.difference(actual)\n", " assert len(missing) == 0\n", "\n", " ds_m = xr.open_dataset(f'/home/www/oggm/climate/era5/monthly/v1.2/flattened/era5_{var}_global_monthly_{_y0}_{y1}_flat_glaciers_{v}.nc')\n", " assert len(ds_m.time)/12 == y1+1-_y0 #y1+1-y0\n", "\n", " assert ds_m.time[0]['time.year'] == _y0\n", " assert ds_m.time[0]['time.month'] == 1\n", " assert ds_m.time[-1]['time.year'] == y1\n", " assert ds_m.time[-1]['time.month'] == 12\n", "\n", " assert ds.time[0] == pd.Timestamp(f'{_y0}-01-01')\n", " assert ds.time[-1] == pd.Timestamp(f'{y1}-12-31')\n" ] }, { "cell_type": "code", "execution_count": 95, "id": "d6fad2b9-327b-4bb4-8006-d4b7773f4580", "metadata": {}, "outputs": [], "source": [ "for var in ['tp', 't2m']:\n", " files = glob.glob(path+'_raw_files/tmp_{}/tmp_{}*'.format(resolution.lower(), var))\n", " for f in files:\n", " os.remove(f)\n", " files = glob.glob(path+'_raw_files/tmp_{}/tmp2_{}*'.format(resolution.lower(), var))\n", " for f in files:\n", " os.remove(f)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "9dcc87e4-c61f-44c7-981c-d1a6bbe1ff79", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "2ec8ae41-1272-4030-951c-5aee0a616bc5", "metadata": {}, "source": [ "# Appendix" ] }, { "cell_type": "code", "execution_count": null, "id": "2dec896b-35ae-4b4e-b5ed-91152e7e6e0a", "metadata": {}, "outputs": [], "source": [ "# necessary if there is not a full year available, but actually when using normal xr.concat not anymore necessary ... \n", "def pad_months_after_last(ds, expected_n=12):\n", " \n", " n = ds.sizes['time']\n", " if n >= expected_n:\n", " return ds\n", " missing = expected_n - n\n", " # get last time\n", " last = ds.time.values[-1]\n", "\n", " # branch depending on dtype: datetime64 vs cftime\n", " if np.issubdtype(ds.time.dtype, np.datetime64):\n", " last_ts = pd.to_datetime(last)\n", " # compute first missing month: first day of next month\n", " start = (last_ts + pd.offsets.MonthBegin()).normalize()\n", " pad_times = pd.date_range(start=start, periods=missing, freq='MS')\n", " pad_time_values = pad_times.values # numpy.datetime64[ns]\n", " else:\n", " # use cftime: get a cftime datetime for the next month\n", " # handle different cftime calendars via cftime library defaults\n", " if isinstance(last, cftime.datetime):\n", " year = last.year\n", " month = last.month + 1\n", " year += (month - 1) // 12\n", " month = (month - 1) % 12 + 1\n", " pad_time_values = []\n", " for i in range(missing):\n", " m = month + i\n", " y = year + (m - 1) // 12\n", " mm = (m - 1) % 12 + 1\n", " pad_time_values.append(cftime.datetime(y, mm, 1))\n", " else:\n", " # fallback: convert to pandas and then to cftime datetimes\n", " last_ts = pd.to_datetime(last)\n", " start = (last_ts + pd.offsets.MonthBegin()).normalize()\n", " pad_times = pd.date_range(start=start, periods=missing, freq='MS')\n", " pad_time_values = [cftime.datetime(t.year, t.month, t.day) for t in pad_times]\n", "\n", " # create a pad dataset: copy single-time slice and expand with NaNs\n", " pad = ds.isel(time=slice(0, 1)).copy(deep=True) * np.nan\n", " # tile to required number of missing months\n", " pad = pad.isel(time=0).expand_dims(time=pad_time_values)\n", " # assign correct time coordinate\n", " pad = pad.assign_coords(time=('time', pad_time_values))\n", " # concatenate along time\n", " ds_padded = xr.concat([ds, pad], dim='time')\n", " return ds_padded\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:oggm_env_2025]", "language": "python", "name": "conda-env-oggm_env_2025-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.14" } }, "nbformat": 4, "nbformat_minor": 5 }