{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## GlaThiDa to RGI, step 1: attribute an RGI region to each point"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In the end, we have an HDF file of GlaThiDa per RGI sub-region."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import geopandas as gpd\n",
    "import shapely.geometry as shpg\n",
    "import numpy as np\n",
    "import os\n",
    "import progressbar\n",
    "import multiprocessing\n",
    "import tables\n",
    "import warnings\n",
    "import time\n",
    "from collections import OrderedDict\n",
    "from oggm import utils, cfg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "gtd_dir = './glathida-v3.1.0/data'\n",
    "# gtd_dir = './GlaThiDa_2016'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read the files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    df = pd.read_csv(os.path.join(gtd_dir, 'TTT.csv'), dtype={'SURVEY_DATE': 'str'}, low_memory=False)\n",
    "except FileNotFoundError:\n",
    "    df = pd.read_csv(os.path.join(gtd_dir, 'TTT_2016_corr.csv'), dtype={'SURVEY_DATE': 'str'}, low_memory=False, skiprows=[0, 1, 3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GlaThiDa_ID int64\n",
      "POLITICAL_UNIT object\n",
      "GLACIER_NAME object\n",
      "SURVEY_DATE object\n",
      "PROFILE_ID object\n",
      "POINT_ID object\n",
      "POINT_LAT float64\n",
      "POINT_LON float64\n",
      "ELEVATION float64\n",
      "THICKNESS int64\n",
      "THICKNESS_UNCERTAINTY float64\n",
      "DATA_FLAG float64\n",
      "REMARKS object\n"
     ]
    }
   ],
   "source": [
    "for c in df:\n",
    "    print(c, df[c].dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3854279"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.04538540152386478, 174928)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df.loc[df.THICKNESS == 0]) / len(df), len(df.loc[df.THICKNESS == 0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Replace with 62 when available\n",
    "rgi_reg = gpd.read_file(os.path.join(utils.get_rgi_dir('62'), '00_rgi62_regions/00_rgi62_O1Regions.shp'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Convert lon, lat to Point geometries "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100% (3854279 of 3854279) |##############| Elapsed Time: 0:00:41 Time:  0:00:41\n"
     ]
    }
   ],
   "source": [
    "geoms = []\n",
    "for lon, lat in progressbar.progressbar(zip(df.POINT_LON, df.POINT_LAT), max_value=len(df)):\n",
    "    geoms.append(shpg.Point(lon, lat))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['geometry'] = geoms"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Add the RGI Region attribute "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100% (3854279 of 3854279) |##############| Elapsed Time: 0:01:44 Time:  0:01:44\n"
     ]
    }
   ],
   "source": [
    "reg = np.ones(len(df), dtype=int) * -1\n",
    "prev_reg = None\n",
    "for i, p in progressbar.progressbar(enumerate(df.geometry), max_value=len(df)):\n",
    "    if prev_reg is not None and prev_reg.contains(p):\n",
    "        reg[i] = reg[i-1]\n",
    "        continue\n",
    "    try:\n",
    "        sel = rgi_reg.loc[rgi_reg.contains(p)].iloc[0]\n",
    "        reg[i] = sel.RGI_CODE\n",
    "        prev_reg = sel.geometry\n",
    "    except:\n",
    "        prev_reg = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 16, 17, 18, 19]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['RGI_REG'] = reg\n",
    "sorted(df['RGI_REG'].unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Separate the data in RGI Regions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfs = OrderedDict()\n",
    "for r in sorted(df['RGI_REG'].unique()):\n",
    "    dfs[r] = df.loc[df.RGI_REG == r].copy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prepare for writing and write to file "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i, (k, d) in enumerate(dfs.items()):\n",
    "    d.drop(['geometry'], axis=1, inplace=True)\n",
    "    d['RGI_REG'] = d['RGI_REG'].astype(str)\n",
    "    d['ELEVATION'] = d['ELEVATION'].astype(str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GlaThiDa_ID int64\n",
      "POLITICAL_UNIT object\n",
      "GLACIER_NAME object\n",
      "SURVEY_DATE object\n",
      "PROFILE_ID object\n",
      "POINT_ID object\n",
      "POINT_LAT float64\n",
      "POINT_LON float64\n",
      "ELEVATION object\n",
      "THICKNESS int64\n",
      "THICKNESS_UNCERTAINTY float64\n",
      "DATA_FLAG float64\n",
      "REMARKS object\n",
      "RGI_REG object\n"
     ]
    }
   ],
   "source": [
    "for c in d:\n",
    "    print(c, d[c].dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Writing 01 87157\n",
      "Writing 02 3406\n",
      "Writing 03 868346\n",
      "Writing 04 309453\n",
      "Writing 05 557136\n",
      "Writing 07 966408\n",
      "Writing 08 10801\n",
      "Writing 10 7726\n",
      "Writing 11 478312\n",
      "Writing 12 2278\n",
      "Writing 13 15327\n",
      "Writing 16 1287\n",
      "Writing 17 8463\n",
      "Writing 18 619\n",
      "Writing 19 537560\n"
     ]
    }
   ],
   "source": [
    "outf = os.path.join(gtd_dir, 'TTT_per_reg.h5')\n",
    "if os.path.exists(outf):\n",
    "    os.remove(outf)\n",
    "count = 0\n",
    "for i, (k, d) in enumerate(dfs.items()):\n",
    "    key = '{:02d}'.format(int(k))\n",
    "    print('Writing', key, len(d))\n",
    "    with warnings.catch_warnings():\n",
    "        warnings.simplefilter('ignore', tables.NaturalNameWarning)\n",
    "        d.to_hdf(outf, key, append=True, complevel=5)\n",
    "        count += len(d)\n",
    "assert count == len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}