import xarray as xr import numpy as np import pandas as pd import os import glob import matplotlib.pyplot as plt from datetime import date from oggm.utils import mkdir import oggm import geopandas as gpd import sys ### start via: `sbatch postprocessing_fill_missing_glaciers_rounce.slurm '01' ` ... remove_runaway_glaciers = True today = '16/05/2023' model_author = 'OGGM_v16' gcms = ['gfdl-esm4', 'ipsl-cm6a-lr', 'mpi-esm1-2-hr', 'mri-esm2-0', 'ukesm1-0-ll' ] scenarios = ['historical', 'ssp126', 'ssp370', 'ssp585'] y0_times = [1851, 1901, 1951, 1995, 2021, 2041, 2061, 2081] #y0_times = [2021, 2041, 2061, 2081] #y0_times = [2021, 2041] #, #y0_times = [2021, 2041] #y0_times = [1995] print(y0_times) # get the RGI area / ITMIX volumes of the glaciers df_itmix = pd.read_hdf(oggm.utils.get_demo_file('rgi62_itmix_df.h5')) rgi_reg_sel = sys.argv[1] rgi_regs = [rgi_reg_sel] rgidf_dict = {} for rgi_reg in rgi_regs: path_rgi = oggm.utils.get_rgi_region_file(rgi_reg, version='6') rgidf = gpd.read_file(path_rgi) #Greenland periphery : all glaciers with connectivity level 0 and 1 should be included, level 2 glaciers should be excluded (as was the case for GlacierMIP2) # total RGI area: 89,651km2 if rgi_reg == '05': rgidf = rgidf.loc[(rgidf['Connect'] == 0) | (rgidf['Connect'] ==1)] rgidf_dict[rgi_reg] = rgidf rgidf_dict[rgi_reg] = rgidf_dict[rgi_reg].set_index('RGIId') ### get the per glacier volumes and areas for each rgi_region, period, scenario and gcm: ### then estimate which glaciers are missing and get the missing_vs_working rgi area ### we also have to enlarge our dataset with the missing glaciers (first just put NaN values inside, those will be filled later) ### then fill the missing glaciers with the mean statistics from glaciers with a relative glacier area difference of less than 1% #folder_path = '/home/www/drounce/per_glacier/' folder_path = f'/home/www/lschuster/runs_glacierMIP3_oggm_v16/output/' #RGI{rgi_reg}/{gcm}_historical_1851-1870.nc' for rgi_reg in rgi_regs: print(f'rgi_reg: {rgi_reg} started') for y0_time in y0_times: period = f'{y0_time}-{y0_time+19}' if y0_time in [1851, 1901, 1951, 1995]: scenarios_sel = ['historical'] else: scenarios_sel = scenarios[1:] # just because I have to repeat the 1995 again !!! for scenario in scenarios_sel: for gcm in gcms: #path = folder_path + f'{rgi_reg}/{model_author}_rgi{rgi_reg}_glaciers_{period}_{gcm}_{scenario}*.nc' path = folder_path + f'RGI{rgi_reg}/{gcm}_{scenario}_{period}.nc' try: ds = xr.open_dataset(path) except: path = folder_path + f'RGI{rgi_reg}/{gcm}_{scenario}_{period}_stop.nc' ds = xr.open_dataset(path) ds = xr.open_dataset(path) ds = ds.rename_vars({'volume':'volume_m3'}) ds = ds.rename_vars({'area':'area_m2'}) ds = ds.rename_dims({'time':'simulation_year'}) ds = ds[['volume_m3','area_m2']] ### very IMPORTANT--> THIS is much faster!!! ds = ds.load() rgi_reg_working = ds.volume_m3.isel(simulation_year=0).dropna(dim='rgi_id').rgi_id.values # IMPORTANT: just to be sure, only select the RGIs that are working (May 12th 2023) ds = ds.sel(rgi_id = rgi_reg_working) if remove_runaway_glaciers: condition = ('final volume 5 times larger than after 0yrs\n& final volume 2 times larger after 1000yrs\n&' 'final volume 1.1 times larger than 300 years before \n& in total a final volume that is larger than 1% of total RGI-reg volume') #condition_short = 'V_end > 5*V_0 & V_end > 2*V_1000 & V_end > 1.1*V_end-300 & V_end/V_init_rgi_reg.sum() > 1%' #runaway_glac = (ds.isel(time=-1)/ds.isel(time=1000) > 5) & (ds.isel(time=-1)/ds.isel(time=0) > 5) & (ds.isel(time=-1)/ds.isel(time=0).sum() > 0.01) # want to make sure that the glacier has not stabilised, so also in the last 200 years, the volume should increase (-> to be a "runaway" glacier) condi_runaway = (ds.isel(simulation_year=-1)/ds.isel(simulation_year=-300) > 1.1) condi_growing_after1000yrs = (ds.volume_m3.isel(simulation_year=-1)/ds.volume_m3.isel(simulation_year=1000) > 2) condi_growing_strongly = (ds.volume_m3.isel(simulation_year=-1)/ds.volume_m3.isel(simulation_year=0) > 5) condi_large_final_glacier = (ds.volume_m3.isel(simulation_year=-1)/ds.volume_m3.isel(simulation_year=0).sum() > 0.01) runaway_glac_mask = condi_runaway & condi_growing_after1000yrs & condi_growing_strongly & condi_large_final_glacier runaway_glaciers = ds.where(runaway_glac_mask).dropna(dim='rgi_id').rgi_id.values runaway_ratio = rgidf_dict[rgi_reg].loc[runaway_glaciers].Area.sum()/rgidf_dict[rgi_reg].loc[rgi_reg_working].Area.sum() print(period,scenario,gcm, 'runaway vs total initial area ratio:',runaway_ratio) non_runaway_glac_mask = ~runaway_glac_mask # only keep the non-runaway glaciers ds = ds.where(non_runaway_glac_mask).dropna(dim='rgi_id') # important: overwrite rgi_reg_working rgi_reg_working = ds.rgi_id.values #bad_rgi = [] #for r in rgidf_dict[rgi_reg].index: # if r not in rgi_reg_working: # bad_rgi.append(r) # this is much faster !!! bad_rgi = list(set(rgidf_dict[rgi_reg].index).difference(rgi_reg_working)) bad = rgidf_dict[rgi_reg].loc[bad_rgi] good = rgidf_dict[rgi_reg].loc[rgi_reg_working] # working glaciers bad_ratio = bad.Area.sum() / good.Area.sum() #bad_glacier_count_perc = (len(bad)/len(good)) #print('Correcting for {:.2f}% bad area'.format(bad_ratio * 100)) assert len(bad)+len(good) == len(rgidf_dict[rgi_reg].index) # create dummy xarray dataset with empty data for the missing glaciers # that will be filled afterwards ds_dummy = ds.isel(rgi_id=0).load().copy(deep=True) ds_dummy['volume_m3'].data[...] = np.NaN ds_dummy['area_m2'].data[...] = np.NaN #ds_dummy = ds_dummy.drop('rgi_id') #ds_dummy = ds_dummy.expand_dims({'rgi_id':bad.index}).transpose() ds_dummy = ds_dummy.expand_dims({'rgi_id':bad.index.values}).transpose() # NEW: also need to add the coords values -> maybe ds_dummy.coords['rgi_id'] = bad.index.values # add the dummy xarray dataset to the existing glaciers ds_sel_full = xr.concat([ds, ds_dummy], dim='rgi_id') # Repair the missing glaciers ds_rep_sel = ds_sel_full.copy(deep=True) ds_rep_sel = ds_rep_sel.squeeze() ds_index = ds_rep_sel.rgi_id.data # need to loop over every glacier for rid, g in bad.iterrows(): area_diff = np.abs(good.Area - g.Area) / g.Area # if possible, take all glacers of that RGI region that # have a smaller relative area difference than 1 % sel = area_diff.loc[area_diff < 0.01] if len(sel) < 3: # if less than 3 glaciers have a similar area, # take instead the 9 area-most similar glaciers sel = area_diff.sort_values().iloc[:9] # Now, we take the average over the selected area-similar glaciers # and add them as fake statistics for that missing glacier # (in our case it is area and volume that that # are estimated for each simulation year like that) new = ds.sel(rgi_id=sel.index).mean(dim='rgi_id').squeeze() rep_id = np.nonzero(ds_index == rid)[0][0] for vn in ds_rep_sel.data_vars: ds_rep_sel[vn].data[...,rep_id] = new[vn].data # Convert to f64 for exact summing ds_rep_sel = ds_rep_sel.astype(np.float64) # add some statistics about how much we had to fill up this RGI region ds_rep_sel.attrs['ratio_area_missing_filled_vs_working'] = bad_ratio #*100 ds_rep_sel.attrs['n_glaciers_missing_filled'] = len(bad) if remove_runaway_glaciers: ds_rep_sel.attrs['n_glaciers_of_missing_removed_runaway_glaciers_filled'] = len(runaway_glaciers) #*100 ds_rep_sel.attrs['runaway_glaciers_removed_filled'] = str(runaway_glaciers) #*100 ds_rep_sel.attrs['ratio_area_runaway_glaciers_vs_non_runaway'] = runaway_ratio ds_rep_sel.attrs['runaway_removed_glacier_condition'] = condition # create a dataset that has the same form as the "raw" uncorrected dataset # but we add an information that the data was corrected ds_rep_sel.attrs['contributor'] = model_author ds_rep_sel.attrs['contributor_email'] = 'fabien.maussion@uibk.ac.at' ds_rep_sel.attrs['creation_date'] = str(today) ds_rep_sel.attrs['rgi-region'] = f'rgi{rgi_reg}' ds_rep_sel.attrs['aggregation-level'] = 'sum' ds_rep_sel.attrs['period'] = period ds_rep_sel.attrs['gcm'] = gcm ds_rep_sel.attrs['ssp'] = scenario # for gfdl I used the unofficial dev-branch that's why it can be different in the raw files # but the code should be the same #ds_rep_sel.attrs['oggm_version'] = '1.6' if remove_runaway_glaciers: add = ('\n(missing glaciers were filled by Lilian Schuster with average glacier' 'statistics from similar-area glaciers of that RGI region, and runaway glaciers have been removed)') else: add = ('\n(missing glaciers were filled by Lilian Schuster with average glacier' 'statistics from similar-area glaciers of that RGI region)') ds_rep_sel.attrs['information'] = 'OGGM test run w. stop criterion 1'+ add ds_rep_sel.attrs['stop_criterion'] = 'stop criterion 1 : least 5 years with zero volume in 20 years' # check if there are any NaN values (should not be the case, all glaciers should have been filled!) assert set(ds_rep_sel.dropna(dim='rgi_id', how='any').rgi_id.values) == set(rgidf_dict[rgi_reg].index) # should be the correct amount of rgi ids assert len(ds_rep_sel.rgi_id) == len(rgidf_dict[rgi_reg].index) # we only keep the regional summed up filled files for the moment ds_rep_sel = ds_rep_sel.sum(dim='rgi_id', keep_attrs=True) ## save the data out_dir = '/home/users/lschuster/www_lschuster/glacierMIP3_analysis/OGGM_v16/regional_filled/' out_path = out_dir + f'{rgi_reg}/{model_author}_rgi{rgi_reg}_sum_{period}_{gcm}_{scenario}_filled.nc' mkdir(out_dir + rgi_reg) ds_rep_sel.to_netcdf(out_path) # close the datasets to be sure ... ds_rep_sel.close() ds_sel_full.close() ds.close()