import os
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
import json

#import salem


import oggm.cfg
from oggm import  utils, workflow, tasks, graphics
import logging
# Module logger
log = logging.getLogger(__name__)
from oggm import entity_task


# Built ins
from packaging.version import Version
import warnings

# External libs
import cftime
import numpy as np
import netCDF4
import xarray as xr

# Locals
from oggm import cfg
from oggm.exceptions import InvalidParamsError

# normally from oggm.shop.gcm_climate import process_cmip_data
# but the changes are not yet updated ...
from oggm.shop.gcm_climate import process_gcm_data
@entity_task(log, writes=['gcm_data'])
def process_monthly_isimip_data(gdir, output_filesuffix='',
                                member='mri-esm2-0_r1i1p1f1',
                                ssp='ssp126',
                                year_range=('1979', '2014'),
                                apply_bias_correction=False,
                                testing=False,
                                y0=None, y1=None,
                                **kwargs):
    """Read, process and store the isimip3b gcm data for this glacier.

    It stores the data in a format that can be used by the OGGM mass balance
    model and in the glacier directory.

    Currently, this function is built for the ISIMIP3b
    simulations that are on the OGGM servers.

    Parameters
    ----------
    output_filesuffix : str
        append a suffix to the filename (useful for ensemble experiments).
        If it is not set, we create a filesuffix with applied ensemble and ssp
    member : str
        ensemble member gcm that you want to process
    ssp : str
        ssp scenario to process (only 'ssp126', 'ssp370' or 'ssp585' are available)
    year_range : tuple of str
        the year range for which the anomalies are computed
        (passed to process_gcm_gdata). Default for ISIMIP3b `('1979', '2014')
    apply_bias_correction : bool
        whether the bias correction is applied (default is False) or not. As
        we use already internally bias-corrected GCMs, it is default set
        to False!
    testing : boolean
        Default is False. If testing is set to True,
        the smaller test ISIMIP3b gcm files are downloaded
        instead (only useful for pytest)
    y0 : int
        start year of the ISIMIP3b data processing.
        Default is None which processes the entire timeseries.
        Set this to the beginning of your bias correction/
        projection period minus half of bc period
        to make processing faster.
    y1 : int
        end year of the CMIP data processing.
        Set this to the end of your projection period
        plus half of bc period. Default is None to process
        the entire time series, same as y0.
    **kwargs: any kwarg to be passed to ref:`process_gcm_data`
    """

    if output_filesuffix == '':
        # recognize the gcm climate file for later
        output_filesuffix = '_monthly_ISIMIP3b_{}_{}'.format(member, ssp)

    # Glacier location
    glon = gdir.cenlon
    glat = gdir.cenlat

    if y0 is not None:
        assert y0 < 2014, 'y0 has to be below 2014'
    if y1 is not None:
        assert y1 > 2014, 'y0 has to be above 2014 at the moment'

    if testing:
        gcm_server = 'https://cluster.klima.uni-bremen.de/~oggm/test_climate/'
    else:
        gcm_server = 'https://cluster.klima.uni-bremen.de/~oggm/'

    path = f'{gcm_server}/cmip6/isimip3b/flat/2023.2/monthly/'
    add = '_global_monthly_flat_glaciers.nc'

    fpath_spec = path + '{}_w5e5_'.format(member) + '{ssp}_{var}' + add
    fpath_temp = fpath_spec.format(var='tasAdjust', ssp=ssp)
    fpath_temp_h = fpath_spec.format(var='tasAdjust', ssp='historical')
    fpath_precip = fpath_spec.format(var='prAdjust', ssp=ssp)
    fpath_precip_h = fpath_spec.format(var='prAdjust', ssp='historical')
    with utils.get_lock():
        fpath_temp = utils.file_downloader(fpath_temp)
        fpath_temp_h = utils.file_downloader(fpath_temp_h)
        fpath_precip = utils.file_downloader(fpath_precip)
        fpath_precip_h = utils.file_downloader(fpath_precip_h)

    # Read the GCM files
    with xr.open_dataset(fpath_temp_h, use_cftime=True) as tempds_hist, \
            xr.open_dataset(fpath_temp, use_cftime=True) as tempds_gcm:
        # make processing faster
        if y0 is not None:
            tempds_hist = tempds_hist.sel(time=slice(str(y0), None))
        if y1 is not None:
            tempds_gcm = tempds_gcm.sel(time=slice(None, str(y1)))
        # Check longitude conventions
        if tempds_gcm.longitude.min() >= 0 and glon <= 0:
            glon += 360
        assert tempds_gcm.attrs['experiment'] == ssp
        # Take the closest to the glacier
        # Should we consider GCM interpolation?
        # try:
        # computing all the distances and choose the nearest gridpoint
        c = ((tempds_gcm.longitude - glon) ** 2 +
             (tempds_gcm.latitude - glat) ** 2)
        # first select gridpoint, then merge, should be faster!!!
        temp_a_gcm = tempds_gcm.isel(points=np.argmin(c.data))
        temp_a_hist = tempds_hist.isel(points=np.argmin(c.data))
        # merge historical with gcm together
        # TODO: change to drop_conflicts when xarray version v0.17.0 can
        #  be used with salem
        temp_a = xr.merge([temp_a_gcm, temp_a_hist],
                          combine_attrs='override')
        temp = temp_a.tasAdjust
        temp['lon'] = temp_a.longitude
        temp['lat'] = temp_a.latitude

        temp.lon.values = temp.lon if temp.lon <= 180 else temp.lon - 360

    with xr.open_dataset(fpath_precip_h, use_cftime=True) as precipds_hist, \
            xr.open_dataset(fpath_precip, use_cftime=True) as precipds_gcm:
        # make processing faster
        if y0 is not None:
            precipds_hist = precipds_hist.sel(time=slice(str(y0), None))
        if y1 is not None:
            precipds_gcm = precipds_gcm.sel(time=slice(None, str(y1)))
        c = ((precipds_gcm.longitude - glon) ** 2 +
             (precipds_gcm.latitude - glat) ** 2)
        precip_a_gcm = precipds_gcm.isel(points=np.argmin(c.data))
        precip_a_hist = precipds_hist.isel(points=np.argmin(c.data))
        precip_a = xr.merge([precip_a_gcm, precip_a_hist],
                            combine_attrs='override')

        precip = precip_a.prAdjust
        precip['lon'] = precip_a.longitude
        precip['lat'] = precip_a.latitude

        # Back to [-180, 180] for OGGM
        precip.lon.values = precip.lon if precip.lon <= 180 else precip.lon - 360

        # Convert kg m-2 s-1 to mm mth-1 => 1 kg m-2 = 1 mm !!!
        assert 'kg m-2 s-1' in precip.units, 'Precip units not understood'
        ny, r = divmod(len(temp), 12)
        assert r == 0
        dimo = [cfg.DAYS_IN_MONTH[m - 1] for m in temp['time.month']]
        precip = precip * dimo * (60 * 60 * 24)

    process_gcm_data(gdir, filesuffix=output_filesuffix, prcp=precip, temp=temp,
                     year_range=year_range, source=output_filesuffix,
                     apply_bias_correction=apply_bias_correction,
                     **kwargs)


@entity_task(log, writes=['gcm_data'])
def process_cmip_data(gdir, filesuffix='', fpath_temp=None,
                      fpath_precip=None, y0=None, y1=None,
                      **kwargs):
    """Read, process and store the CMIP5 and CMIP6 climate data for this glacier.

    It stores the data in a format that can be used by the OGGM mass balance
    model and in the glacier directory.

    Currently, this function is built for the CMIP5 and CMIP6 projection
    simulations that are on the OGGM servers.

    Parameters
    ----------
    filesuffix : str
        append a suffix to the filename (useful for ensemble experiments).
    fpath_temp : str
        path to the temp file
    fpath_precip : str
        path to the precip file
    y0 : int
        start year of the CMIP data processing.
        Default is None which processes the entire timeseries.
        Set this to the beginning of your bias correction/
        projection period minus half of bc period
        to make process_cmip_data faster.
    y1 : int
        end year of the CMIP data processing.
        Set this to the end of your projection period
        plus half of bc period. Default is None to process
        the entire time series, same as y0.
    **kwargs: any kwarg to be passed to ref:`process_gcm_data`
    """

    # Glacier location
    glon = gdir.cenlon
    glat = gdir.cenlat

    if y0 is not None:
        y0 = str(y0)
    if y1 is not None:
        y1 = str(y1)
    # Read the GCM files
    with xr.open_dataset(fpath_temp, use_cftime=True) as tempds, \
            xr.open_dataset(fpath_precip, use_cftime=True) as precipds:

        # only process and save the gcm data selected --> saves some time!
        if (y0 is not None) or (y1 is not None):
            tempds = tempds.sel(time=slice(y0, y1))
            precipds = precipds.sel(time=slice(y0, y1))
        # Check longitude conventions
        if tempds.lon.min() >= 0 and glon <= 0:
            glon += 360

        # Take the closest to the glacier
        # Should we consider GCM interpolation?
        try:
            # if gcms are not flattened, do:
            # this is the default, so try this first
            temp = tempds.tas.sel(lat=glat, lon=glon, method='nearest')
            precip = precipds.pr.sel(lat=glat, lon=glon, method='nearest')
        except:
            # are the gcms flattened? if yes,
            # compute all the distances and choose the
            # nearest gridpoint
            c_tempds = ((tempds.lon - glon) ** 2 +
                 (tempds.lat - glat) ** 2)
            c_precipds = ((precipds.lon - glon) ** 2 +
                        (precipds.lat - glat) ** 2)
            temp_0 = tempds.isel(points=np.argmin(c_tempds.data))
            precip_0 = precipds.isel(points=np.argmin(c_precipds.data))
            temp = temp_0.tas
            temp['lon'] = temp_0.lon
            temp['lat'] = temp_0.lat
            precip = precip_0.pr
            precip['lon'] = precip_0.lon
            precip['lat'] = precip_0.lat

        # Back to [-180, 180] for OGGM
        temp.lon.values = temp.lon if temp.lon <= 180 else temp.lon - 360
        precip.lon.values = precip.lon if precip.lon <= 180 else precip.lon - 360

        # Convert kg m-2 s-1 to mm mth-1 => 1 kg m-2 = 1 mm !!!
        assert 'kg m-2 s-1' in precip.units, 'Precip units not understood'

        ny, r = divmod(len(temp), 12)
        assert r == 0
        dimo = [cfg.DAYS_IN_MONTH[m - 1] for m in temp['time.month']]
        precip = precip * dimo * (60 * 60 * 24)

    process_gcm_data(gdir, filesuffix=filesuffix, prcp=precip, temp=temp,
                     source=filesuffix, **kwargs)


@entity_task(log)
def run_hydro_from_2000_ref_area_2000_hist_gcm(gdir, end_yr=2100):
    '''Runs historical and future hydro climate from 2000 until end_yr (2100 or 2300)
    by using the GCM directly!!! The ref_area_yr is 2000. Here, the bias correction had been done from 2000-2019
    
    end_yr:
        - default 2100, can also be 2300, 
    
    '''
    # very similar to run_hydro_from_2000_ref_area_2000_hist_w5e5, but we directly start from the GCM run
    # so the run_with_hydro with gcm takes the spinup at year 2000
    
    
    if end_yr == 2100:
        ### ISIMPI3b
        members = ['gfdl-esm4_r1i1p1f1', 'mpi-esm1-2-hr_r1i1p1f1', 'mri-esm2-0_r1i1p1f1',
               'ipsl-cm6a-lr_r1i1p1f1', 'ukesm1-0-ll_r1i1p1f2' ]
        # only this part needs to be recomputed for every member and ssp...
        for member in members:
            for ssp in ['ssp126', 'ssp370', 'ssp585']:
                rid = f'_ISIMIP3b_{member}_{ssp}'
                # No additional bias correction from 2000-2019
                tasks.run_with_hydro(gdir,
                                     run_task=tasks.run_from_climate_data, ys=2000, # this is important! Start from 200 glacier
                                     init_model_yr=2000,
                                     # use gcm_data, not climate_historical
                                     climate_filename='gcm_data',
                                     ye=end_yr,
                                     # use the chosen scenario
                                     climate_input_filesuffix=rid,
                                     # we start from the previous run, 
                                     init_model_filesuffix='_spinup_historical',
                                     ref_geometry_filesuffix='_spinup_historical',
                                     ref_area_yr = 2000,
                                     # recognize the run for later -> we directly have the full necessary run 
                                     output_filesuffix=f'_gcm_from_2000_run{rid}',
                                     # add monthly diagnostics
                                     store_monthly_hydro=True);

                
                # with additional bias correction from 2000-2019
                tasks.run_with_hydro(gdir,
                                     run_task=tasks.run_from_climate_data, ys=2000, # this is important! Start from 2000 glacier
                                     init_model_yr=2000,
                                     # use gcm_data, not climate_historical
                                     climate_filename='gcm_data',
                                     ye=end_yr,
                                     # use the chosen scenario
                                     climate_input_filesuffix=rid+'_bc_2000_2019',
                                     # we start from the previous run, 
                                     init_model_filesuffix='_spinup_historical',
                                     ref_geometry_filesuffix='_spinup_historical',
                                     ref_area_yr = 2000,
                                     # recognize the run for later
                                     output_filesuffix=f'_gcm_from_2000_run{rid}_bc_2000_2019',
                                     # add monthly diagnostics
                                     store_monthly_hydro=True);

    
    ### CMIP6 
    gcms_cmip6 = pd.read_csv('/home/www/oggm/cmip6/all_gcm_list.csv', index_col=0)
    if end_yr == 2300:
        # only select those that go until 2300:
        # updated with Rebekka's downloads
        gcms_cmip6 = pd.read_csv('/home/www/lschuster/runs_oggm_v16/all_gcm_list_2300_update.csv', index_col='Unnamed: 0')
        
    for gcm in gcms_cmip6.gcm.unique(): ##TEST:
        df1 = gcms_cmip6.loc[gcms_cmip6.gcm == gcm]
        for ssp in df1.ssp.unique():
            df2 = df1.loc[df1.ssp == ssp]
            assert len(df2) == 2
            ft = df2.loc[df2['var'] == 'tas'].iloc[0]
            rid = ft.fname.replace('_r1i1p1f1_tas.nc', '')
            rid='_CMIP6_' + rid
            
            tasks.run_with_hydro(gdir,
                     run_task=tasks.run_from_climate_data,
                                 ys=2000, # this is important! Start from 2000 glacier
                                 init_model_yr=2000,
                                 ye=end_yr,
                     # use gcm_data, not climate_historical
                     climate_filename='gcm_data',
                     # use the chosen scenario
                     climate_input_filesuffix=rid+'_bc_2000_2019',
                     # we start from the previous run, 
                     init_model_filesuffix='_spinup_historical',
                     ref_geometry_filesuffix='_spinup_historical',
                     ref_area_yr = 2000,
                     # recognize the run for later
                     output_filesuffix=f'_gcm_from_2000_run{rid}_endyr{end_yr}'+'_bc_2000_2019',
                     # add monthly diagnostics
                     store_monthly_hydro=True);
            
            if end_yr != 2300: 
                # only do that for comparison for the common ssps with isimip3b
                if gcm.lower() in ['gfdl-esm4', 'mpi-esm1-2-hr', 'mri-esm2-0']: 
                    if ssp in ['ssp126', 'ssp370', 'ssp585']:
                        tasks.run_with_hydro(gdir,
                                 run_task=tasks.run_from_climate_data,
                                             ys=2000, # this is important! Start from 2000 glacier
                                             init_model_yr=2000,
                                             ye=end_yr,
                                 # use gcm_data, not climate_historical
                                 climate_filename='gcm_data',
                                 # use the chosen scenario
                                 climate_input_filesuffix=rid+'_bc_1979_2014',
                                 # we start from the previous run, 
                                 init_model_filesuffix='_spinup_historical',
                                 ref_geometry_filesuffix='_spinup_historical',
                                 ref_area_yr = 2000,
                                 # recognize the run for later
                                 output_filesuffix=f'_gcm_from_2000_run{rid}_endyr{end_yr}'+'_bc_1979_2014',
                                 # add monthly diagnostics
                                 store_monthly_hydro=True);
    
    
@entity_task(log)
def run_hydro_from_2000_ref_area_2000_hist_w5e5(gdir, end_yr=2100):
    '''Runs historical hydro climate from 2000 until 2020 using W5E5. 
    Finally the future projections for the three SSPs by using ISIMIP3b or the GCMs.
    The ref_area_yr is 2000. Here, the bias correction had been done from 2000-2019
    
    end_yr:
        - default 2100, can also be 2300, 
    
    '''

    tasks.run_with_hydro(gdir, run_task=tasks.run_from_climate_data,
                         climate_filename='climate_historical',
                         ys=2000, init_model_yr=2000,
                         store_monthly_hydro=True,
                         init_model_filesuffix='_spinup_historical',
                         ref_geometry_filesuffix='_spinup_historical',
                         ref_area_yr = 2000,
                         output_filesuffix='_historical_from_2000_run')

    
    if end_yr == 2100:
        ### ISIMPI3b
        members = ['gfdl-esm4_r1i1p1f1', 'mpi-esm1-2-hr_r1i1p1f1', 'mri-esm2-0_r1i1p1f1',
               'ipsl-cm6a-lr_r1i1p1f1', 'ukesm1-0-ll_r1i1p1f2' ]
        # only this part needs to be recomputed for every member and ssp...
        for member in members:
            for ssp in ['ssp126', 'ssp370', 'ssp585']:
                rid = f'_ISIMIP3b_{member}_{ssp}'
                # No additional bias correction from 2000-2019
                tasks.run_with_hydro(gdir,
                                     run_task=tasks.run_from_climate_data, ys=2020, # this is important! Start from 2020 glacier
                                     # use gcm_data, not climate_historical
                                     climate_filename='gcm_data',
                                     ye=end_yr,
                                     # use the chosen scenario
                                     climate_input_filesuffix=rid,
                                     # we start from the previous run, 
                                     init_model_filesuffix='_historical_from_2000_run',
                                     ref_geometry_filesuffix='_historical_from_2000_run', 
                                     ref_area_yr = 2000,
                                     # recognize the run for later
                                     output_filesuffix=f'_future_run{rid}',
                                     # add monthly diagnostics
                                     store_monthly_hydro=True);


                utils.merge_consecutive_run_outputs(gdir,
                                                    input_filesuffix_1='_historical_from_2000_run',
                                                    input_filesuffix_2=f'_future_run{rid}',
                                                    output_filesuffix=f'_merged_from_2000_run{rid}',
                                                    delete_input=False,
                                                   ) # we will delete that later
                
                # with additional bias correction from 2000-2019
                tasks.run_with_hydro(gdir,
                                     run_task=tasks.run_from_climate_data, ys=2020, # this is important! Start from 2020 glacier
                                     # use gcm_data, not climate_historical
                                     climate_filename='gcm_data',
                                     ye=end_yr,
                                     # use the chosen scenario
                                     climate_input_filesuffix=rid+'_bc_2000_2019',
                                     # we start from the previous run, 
                                     init_model_filesuffix='_historical_from_2000_run',
                                     ref_geometry_filesuffix='_historical_from_2000_run', 
                                     ref_area_yr = 2000,
                                     # recognize the run for later
                                     output_filesuffix=f'_future_run{rid}_bc_2000_2019',
                                     # add monthly diagnostics
                                     store_monthly_hydro=True);


                utils.merge_consecutive_run_outputs(gdir,
                                                    input_filesuffix_1='_historical_from_2000_run',
                                                    input_filesuffix_2=f'_future_run{rid}_bc_2000_2019',
                                                    output_filesuffix=f'_merged_from_2000_run{rid}_bc_2000_2019',
                                                    delete_input=False,
                                                   ) # we will delete that later

    
    ### CMIP6 
    gcms_cmip6 = pd.read_csv('/home/www/oggm/cmip6/all_gcm_list.csv', index_col=0)
    if end_yr == 2300:
       # only select those that go until 2300:
        # updated with Rebekka's downloads
        gcms_cmip6 = pd.read_csv('/home/www/lschuster/runs_oggm_v16/all_gcm_list_2300_update.csv', index_col='Unnamed: 0')
        
    for gcm in gcms_cmip6.gcm.unique(): ##TEST:
        df1 = gcms_cmip6.loc[gcms_cmip6.gcm == gcm]
        for ssp in df1.ssp.unique():
            df2 = df1.loc[df1.ssp == ssp]
            assert len(df2) == 2
            ft = df2.loc[df2['var'] == 'tas'].iloc[0]
            rid = ft.fname.replace('_r1i1p1f1_tas.nc', '')
            rid='_CMIP6_' + rid
            
            tasks.run_with_hydro(gdir,
                     run_task=tasks.run_from_climate_data,
                                 ys=2020, # this is important! Start from 2020 glacier
                                 ye=end_yr,
                     # use gcm_data, not climate_historical
                     climate_filename='gcm_data',
                     # use the chosen scenario
                     climate_input_filesuffix=rid+'_bc_2000_2019',
                     # we start from the previous run, 
                     init_model_filesuffix='_historical_from_2000_run',
                     ref_geometry_filesuffix='_historical_from_2000_run', 
                     ref_area_yr = 2000,
                     # recognize the run for later
                     output_filesuffix=f'_future_run{rid}'+'_bc_2000_2019',
                     # add monthly diagnostics
                     store_monthly_hydro=True);
            
            
            utils.merge_consecutive_run_outputs(gdir,
                                    input_filesuffix_1='_historical_from_2000_run',
                                    input_filesuffix_2=f'_future_run{rid}'+'_bc_2000_2019',
                                    output_filesuffix=f'_merged_from_2000_run{rid}_endyr{end_yr}_bc_2000_2019',
                                    delete_input=False,
                                   ) # we will delete that later
            
            
            if end_yr != 2300: 
                # only do that for comparison for the common ssps with isimip3b
                if gcm.lower() in ['gfdl-esm4', 'mpi-esm1-2-hr', 'mri-esm2-0']: 
                    if ssp in ['ssp126', 'ssp370', 'ssp585']:
                        tasks.run_with_hydro(gdir,
                                 run_task=tasks.run_from_climate_data,
                                             ys=2020, # this is important! Start from 2020 glacier
                                             ye=end_yr,
                                 # use gcm_data, not climate_historical
                                 climate_filename='gcm_data',
                                 # use the chosen scenario
                                 climate_input_filesuffix=rid+'_bc_1979_2014',
                                 # we start from the previous run, 
                                 init_model_filesuffix='_historical_from_2000_run',
                                 ref_geometry_filesuffix='_historical_from_2000_run', 
                                 ref_area_yr = 2000,
                                 # recognize the run for later
                                 output_filesuffix=f'_future_run{rid}'+'_bc_1979_2014',
                                 # add monthly diagnostics
                                 store_monthly_hydro=True);

                        utils.merge_consecutive_run_outputs(gdir,
                                                input_filesuffix_1='_historical_from_2000_run',
                                                input_filesuffix_2=f'_future_run{rid}'+'_bc_1979_2014',
                                                output_filesuffix=f'_merged_from_2000_run{rid}_endyr{end_yr}_bc_1979_2014',
                                                delete_input=False,
                                               ) # we will delete that later