import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import time
from oggm import utils
import sys
import pandas as pd
import os


# two variables: bias-adjusted precipitation and near-surface temperature
typs = ['tas', 'pr']

# create monthly files out of the daily files 
run = False
if run:
    start = time.time()
    ybegin = 1901
    yend = 2019
    for typ in typs:
        print(typ)
        # folder_output = 'isimip3a_{}_monthly'.format(typ)
        path_input = 'gswp3-w5e5_obsclim_{}_global_daily_*.nc'.format(typ)
        # isimip3a/gswp3-w5e5_obsclim_pr_global_daily_1901_1910.nc
        path_output = '../monthly/gswp3-w5e5_obsclim_{}_global_monthly_{}_{}.nc'.format(typ, ybegin, yend)
        #path_output = 'isimip3b/{}/{}_{}_w5e5_{}_{}_global_monthly_{}_{}.nc'.format(folder_output, gcm, ensemble, scenario, typ, ybegin, yend)
        # open all at once
        ds = xr.open_mfdataset(path_input, concat_dim='time')

        # resample by monthly means, time stamp is of first day of the month
        ds_monthly = ds.resample(time='MS', keep_attrs=True).mean(keep_attrs=True)

        # add some postprocessing attributes
        ds_monthly.attrs['postprocessing_date'] = str(np.datetime64('today','D'))
        ds_monthly.attrs['postprocessing_scientist'] = 'lilian.schuster@student.uibk.ac.at'
        ds_monthly.attrs['postprocessing_actions'] =  ("using xarray: ds = xr.open_mfdataset({}, concat_dim='time') \n"
                                                          "ds_monthly = ds.resample(time='MS', keep_attrs=True).mean(keep_attrs=True)\n"
                                                          "ds_monthly.to_netcdf({})\n".format(path_input, path_output))
        # save the file
        ds_monthly.to_netcdf(path_output)

        # if temperature as variable, also compute daily temperature std for each month and save it
        if typ == 'tas':
            path_output_std = 'monthly/gswp3-w5e5_obsclim_temp_std_global_monthly_{}_{}.nc'.format(ybegin, yend)

            ds_tas_daily_std = ds.resample(time='MS', keep_attrs=True).std(keep_attrs=True)
            ds_tas_daily_std = ds_tas_daily_std.rename_vars(dict(tas='temp_std'))
            # now have to change variable tas to tas_std and its attributes 
            ds_tas_daily_std.temp_std.attrs['standard_name'] = 'air_temperature_daily_std'
            ds_tas_daily_std.temp_std.attrs['long_name'] = 'Near-Surface Air Temperature daily standard deviation'
            ds_tas_daily_std.attrs['postprocessing_date'] = str(np.datetime64('today','D'))
            ds_tas_daily_std.attrs['postprocessing_scientist'] = 'lilian.schuster@student.uibk.ac.at'
            ds_tas_daily_std.attrs['postprocessing_actions'] =  ("using xarray: ds = xr.open_mfdataset({}, concat_dim='time') \n"
                                                              "ds_tas_daily_std = ds.resample(time='MS', keep_attrs=True).std(keep_attrs=True)\n"
                                                               "ds_tas_daily_std = ds_tas_daily_std.rename_vars(dict(tas='temp_std'))\n"
                                                               "ds_tas_daily_std.temp_std.attrs['standard_name'] = 'air_temperature_daily_std'\n"
                                                               "ds_tas_daily_std.temp_std.attrs['long_name'] = 'Near-Surface Air Temperature daily standard deviation'\n"
                                                              "ds_tas_daily_std.to_netcdf({})\n".format(path_input, path_output_std))
            ds_tas_daily_std.to_netcdf('{}'.format(path_output_std))                     

        end = time.time()
        print('elapsed time: {} minutes'.format((end - start)/60))
        
        
### this is the same for W5E5/ ISIMIP3a/ISIMIP3b
ds_inv = xr.open_dataset('orog_W5E5v2.0.nc')
ds_inv = ds_inv.rename({'lat': 'latitude'})
ds_inv = ds_inv.rename({'lon': 'longitude'})
ds_inv.coords['longitude'] = np.where(ds_inv.longitude.values < 0,
                                      ds_inv.longitude.values + 360,
                                      ds_inv.longitude.values)
ds_inv = ds_inv.sortby(ds_inv.longitude)
ds_inv.longitude.attrs['units'] = 'degrees_onlypositive'

# get the dataset where coordinates of glaciers are stored
frgi = utils.file_downloader('https://cluster.klima.uni-bremen.de/~oggm/rgi/rgi62_stats.h5')
#frgi = '/home/users/lschuster/glacierMIP/rgi62_stats.h5'
odf = pd.read_hdf(frgi, index_col=0)

nx, ny = ds_inv.dims['longitude'], ds_inv.dims['latitude']
# just make them into 0-> 360 scheme
cenlon_for_bins = np.where(odf['CenLon'] < 0,
                           odf['CenLon']+360, odf['CenLon'])
# Nearest neighbor lookup
lon_bins = np.linspace(ds_inv.longitude.data[0] - 0.25, ds_inv.longitude.data[-1] + 0.25, nx+1)
# !!! attention W5E5 sorted from 90 to -90 !!!!
lat_bins = np.linspace(ds_inv.latitude.data[0] + 0.25, ds_inv.latitude.data[-1] - 0.25, ny+1)
# before it was wrongly 
# lon_bins = np.linspace(0, 360, nx), lat_bins = np.linspace(90, -90, ny)
# which created a non-aligned bins, in addition there was one bin missing, creating a slightly 
# larger resolution which after adding up a lot got problematic... 
# at the end it resulted in 19 glaciers where the nearest grid point was not found

odf['lon_id'] = np.digitize(cenlon_for_bins, lon_bins)-1
odf['lat_id'] = np.digitize(odf['CenLat'], lat_bins)-1
odf['lon_val'] = ds_inv.longitude.data[odf.lon_id]
odf['lat_val'] = ds_inv.latitude.data[odf.lat_id]
# Use unique grid points as index and compute the area per location
odf['unique_id'] = ['{:03d}_{:03d}'.format(lon, lat) for (lon, lat) in zip(odf['lon_id'], odf['lat_id'])]
mdf = odf.drop_duplicates(subset='unique_id').set_index('unique_id')
mdf['Area'] = odf.groupby('unique_id').sum()['Area']
print('Total number of glaciers: {} and number of W5E5 gridpoints with glaciers in them: {}'.format(len(odf), len(mdf)))

# this is the mask that we need to remove all non-glacierized gridpoints
mask = np.full((ny, nx), np.NaN)
mask[mdf['lat_id'], mdf['lon_id']] = 1 
ds_inv['glacier_mask'] = (('latitude', 'longitude'), np.isfinite(mask))

# check the distance to the gridpoints-> it should never be larger than 
diff_lon = ds_inv.longitude.data[odf.lon_id] - odf.CenLon
# if the distance is 360 -> it is the same as 0,
diff_lon = np.where(np.abs(diff_lon - 360) < 170, diff_lon-360, diff_lon)
odf['ll_dist_to_point'] = ((diff_lon)**2 + (ds_inv.latitude.data[odf.lat_id] - odf.CenLat)**2)**0.5
assert odf['ll_dist_to_point'].max() < (0.25**2 + 0.25**2)**0.5


#####

for var in ['pr', 'tas', 'temp_std']:

    try:
        os.mkdir('tmp_monthly_{}'.format(var))
    except:
        pass


    with xr.open_dataset(f'../monthly/gswp3-w5e5_obsclim_{var}_global_monthly_1901_2019.nc') as xr_prcp:

        xr_prcp = xr_prcp.rename({'lon':'longitude'}).rename({'lat':'latitude'})
        xr_prcp.coords['longitude'] = np.where(xr_prcp.longitude.values < 0,
                                               xr_prcp.longitude.values + 360,
                                               xr_prcp.longitude.values)

        xr_prcp = xr_prcp.sortby(xr_prcp.longitude)
        xr_prcp.longitude.attrs['units'] = 'degrees_onlypositive'

        # this takes too long (if we use daily stuff)!!!
        # ds_merged_glaciers = xr_prcp.where(ds_inv['glacier_mask'], drop = True)
        xr_prcp['ASurf'] = ds_inv['orog'] #ds_inv['ASurf']
        # as we use here only those gridpoints where glaciers are involved, need to put the mask on dsi as well!
        # dsi = ds_inv.where(ds_inv['glacier_mask'], drop = True)  # this makes out of in total 6483600 points only 116280 points!!!
        dsi = xr_prcp.isel(time=[0]).where(ds_inv['glacier_mask'], drop = True)
        # we do not want any dependency on latitude and longitude
        dsif = dsi.stack(points=('latitude', 'longitude')).reset_index(('points'))
        
        # so far still many points:
        # drop the non-glacierized points
        dsifs = dsif.where(np.isfinite(dsi[var].stack(points=('latitude',
                                                              'longitude')).reset_index(('time', 'points'))), drop=True)
        # I have to drop the 'time_' dimension, to be equal to the era5_land example, because the invariant file should not have any time dependence !
        dsifs = dsifs.drop_vars(var)
        dsifs = dsifs.drop('time')
        dsifs = dsifs.drop('time_')

        ### test 
        # check HEF
        lon, lat = (10.7584, 46.8003)
        orog = dsifs.ASurf
        c = (orog.longitude - lon)**2 + (orog.latitude - lat)**2
        surf_hef = orog.isel(points=c.argmin())
        np.testing.assert_allclose(surf_hef, 2250, rtol=0.1)
        np.testing.assert_allclose(surf_hef.longitude, lon, rtol=0.2)
        np.testing.assert_allclose(surf_hef.latitude, lat, rtol=0.2)
        
        # check wrong glacier in previous version
        # RGI60-19.00124
        lon,lat = (-70.8931 +360, -72.4474)
        orog = dsifs.ASurf
        c = (orog.longitude - lon)**2 + (orog.latitude - lat)**2
        surf_g = orog.isel(points=c.argmin())
        np.testing.assert_allclose(surf_g.longitude, lon, rtol=0.2)
        np.testing.assert_allclose(surf_g.latitude, lat, rtol=0.2)
        ####

        if var=='pr':
            dsifs.attrs['version'] = '2023.2'
            dsifs.to_netcdf('../flattened/2023.2/gswp3-w5e5_glacier_invariant_flat.nc')
        xr_prcp = xr_prcp.drop_vars('ASurf')

        for t in xr_prcp.time.values[:]:  # ds_merged_glaciers.time.values: .sel(time=slice('1901-04','2016-12'))
            # produce a temporary file for each month
            sel_l = xr_prcp.sel(time=[t])
            sel = sel_l.where(ds_inv['glacier_mask'])
            sel = sel.stack(points=('latitude', 'longitude')).reset_index(('time', 'points'))
            sel = sel.where(np.isfinite(sel[var]), drop=True)
            sel.to_netcdf('tmp_monthly_{}/tmp_{}.nc'.format(var, str(t)[:10]))

    with xr.open_mfdataset('tmp_monthly_{}/tmp_*-*.nc'.format(var),
                                 concat_dim='time', combine='nested', parallel = True).rename_vars({'time_':'time'}) as dso_all2:

        dso_all2.attrs['history'] = 'longitudes to 0 -> 360,  only glacier gridpoints chosen and flattened latitude/longitude --> points'
        dso_all2.attrs['version'] = '2023.2'
        dso_all2.attrs['postprocessing_date'] = str(np.datetime64('today','D'))
        dso_all2.attrs['postprocessing_scientist'] = 'lilian.schuster@uibk.ac.at'
        dso_all2.to_netcdf('../flattened/2023.2/monthly/gswp3-w5e5_obsclim_{}_global_monthly_1901_2019_flat_glaciers.nc'.format(var))

    import glob
    files = glob.glob('tmp_monthly_{}/tmp_*'.format(var))
    for f in files:
        os.remove(f)