{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## New attempt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In the end, we have an HDF file of GlaThiDa per RGI sub-region."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import geopandas as gpd\n",
    "import shapely.geometry as shpg\n",
    "import numpy as np\n",
    "import os\n",
    "import progressbar\n",
    "import time\n",
    "from oggm import utils, cfg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "gtd_dir = './glathida-v3.1.0/data'\n",
    "# gtd_dir = './GlaThiDa_2016'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read the files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(os.path.join(gtd_dir, 'TTT.csv'), dtype={'SURVEY_DATE': 'str'}, low_memory=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GlaThiDa_ID int64\n",
      "POLITICAL_UNIT object\n",
      "GLACIER_NAME object\n",
      "SURVEY_DATE object\n",
      "PROFILE_ID object\n",
      "POINT_ID object\n",
      "POINT_LAT float64\n",
      "POINT_LON float64\n",
      "ELEVATION float64\n",
      "THICKNESS int64\n",
      "THICKNESS_UNCERTAINTY float64\n",
      "DATA_FLAG float64\n",
      "REMARKS object\n"
     ]
    }
   ],
   "source": [
    "for c in df:\n",
    "    print(c, df[c].dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3854279"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.04538540152386478, 174928)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df.loc[df.THICKNESS == 0]) / len(df), len(df.loc[df.THICKNESS == 0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Replace with 62 when available\n",
    "rgi_reg = gpd.read_file(os.path.join(utils.get_rgi_dir('62'), '00_rgi62_regions/00_rgi62_O1Regions.shp'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Convert lon, lat to Point geometries "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100% (3854279 of 3854279) |##############| Elapsed Time: 0:00:25 Time:  0:00:25\n"
     ]
    }
   ],
   "source": [
    "geoms = []\n",
    "for lon, lat in progressbar.progressbar(zip(df.POINT_LON, df.POINT_LAT), max_value=len(df)):\n",
    "    geoms.append(shpg.Point(lon, lat))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['geometry'] = geoms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = gpd.GeoDataFrame(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read RGI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "rdf = []\n",
    "for reg in range(1, 20):\n",
    "    rdf.append(gpd.read_file(utils.get_rgi_region_file(f'{reg:02d}', version='70G')))\n",
    "rdf = pd.concat(rdf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.crs = rdf.crs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3854279, 274531)"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df), len(rdf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "joined = gpd.sjoin(df, rdf, how='left', predicate='within')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3854279, 265675)"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(joined), len(joined.loc[joined.rgi_id.isnull()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "no_join = joined.loc[joined.rgi_id.isnull()]\n",
    "ok_join = joined.loc[~joined.rgi_id.isnull()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9310701171347482"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(ok_join) / len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>GlaThiDa_ID</th>\n",
       "      <td>33</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>POLITICAL_UNIT</th>\n",
       "      <td>US</td>\n",
       "      <td>US</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GLACIER_NAME</th>\n",
       "      <td>EASTON</td>\n",
       "      <td>EASTON</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SURVEY_DATE</th>\n",
       "      <td>19929999</td>\n",
       "      <td>19929999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PROFILE_ID</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>POINT_ID</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>POINT_LAT</th>\n",
       "      <td>48.76738</td>\n",
       "      <td>48.764904</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>POINT_LON</th>\n",
       "      <td>-121.819644</td>\n",
       "      <td>-121.821909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ELEVATION</th>\n",
       "      <td>2962.0</td>\n",
       "      <td>2813.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>THICKNESS</th>\n",
       "      <td>0</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>THICKNESS_UNCERTAINTY</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DATA_FLAG</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>REMARKS</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>geometry</th>\n",
       "      <td>POINT (-121.819644 48.7673801)</td>\n",
       "      <td>POINT (-121.8219093 48.7649042)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index_right</th>\n",
       "      <td>15002.0</td>\n",
       "      <td>15001.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>rgi_id</th>\n",
       "      <td>RGI2000-v7.0-G-02-15003</td>\n",
       "      <td>RGI2000-v7.0-G-02-15002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>o1region</th>\n",
       "      <td>02</td>\n",
       "      <td>02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>o2region</th>\n",
       "      <td>02-04</td>\n",
       "      <td>02-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glims_id</th>\n",
       "      <td>G238158E48759N</td>\n",
       "      <td>G238169E48750N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>anlys_id</th>\n",
       "      <td>761915.0</td>\n",
       "      <td>761926.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>subm_id</th>\n",
       "      <td>744.0</td>\n",
       "      <td>744.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>src_date</th>\n",
       "      <td>1975-09-09T00:00:00</td>\n",
       "      <td>1974-09-09T00:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cenlon</th>\n",
       "      <td>-121.844147</td>\n",
       "      <td>-121.830812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cenlat</th>\n",
       "      <td>48.758784</td>\n",
       "      <td>48.749909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>utm_zone</th>\n",
       "      <td>10.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>area_km2</th>\n",
       "      <td>4.995923</td>\n",
       "      <td>2.887898</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>primeclass</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>conn_lvl</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>surge_type</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>term_type</th>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glac_name</th>\n",
       "      <td>Deming Glacier</td>\n",
       "      <td>Easton Glacier</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>is_rgi6</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>termlon</th>\n",
       "      <td>-121.868505</td>\n",
       "      <td>-121.837315</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>termlat</th>\n",
       "      <td>48.741924</td>\n",
       "      <td>48.7325</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zmin_m</th>\n",
       "      <td>1179.0857</td>\n",
       "      <td>1578.7251</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zmax_m</th>\n",
       "      <td>3273.7449</td>\n",
       "      <td>2971.7751</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zmed_m</th>\n",
       "      <td>2251.0093</td>\n",
       "      <td>2154.4375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zmean_m</th>\n",
       "      <td>2254.844</td>\n",
       "      <td>2160.2273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>slope_deg</th>\n",
       "      <td>23.269547</td>\n",
       "      <td>18.759949</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aspect_deg</th>\n",
       "      <td>217.05867</td>\n",
       "      <td>201.118759</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aspect_sec</th>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dem_source</th>\n",
       "      <td>COPDEM30</td>\n",
       "      <td>COPDEM30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lmax_m</th>\n",
       "      <td>5346.0</td>\n",
       "      <td>4251.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    0  \\\n",
       "GlaThiDa_ID                                        33   \n",
       "POLITICAL_UNIT                                     US   \n",
       "GLACIER_NAME                                   EASTON   \n",
       "SURVEY_DATE                                  19929999   \n",
       "PROFILE_ID                                        NaN   \n",
       "POINT_ID                                            1   \n",
       "POINT_LAT                                    48.76738   \n",
       "POINT_LON                                 -121.819644   \n",
       "ELEVATION                                      2962.0   \n",
       "THICKNESS                                           0   \n",
       "THICKNESS_UNCERTAINTY                             NaN   \n",
       "DATA_FLAG                                         NaN   \n",
       "REMARKS                                           NaN   \n",
       "geometry               POINT (-121.819644 48.7673801)   \n",
       "index_right                                   15002.0   \n",
       "rgi_id                        RGI2000-v7.0-G-02-15003   \n",
       "o1region                                           02   \n",
       "o2region                                        02-04   \n",
       "glims_id                               G238158E48759N   \n",
       "anlys_id                                     761915.0   \n",
       "subm_id                                         744.0   \n",
       "src_date                          1975-09-09T00:00:00   \n",
       "cenlon                                    -121.844147   \n",
       "cenlat                                      48.758784   \n",
       "utm_zone                                         10.0   \n",
       "area_km2                                     4.995923   \n",
       "primeclass                                        0.0   \n",
       "conn_lvl                                          0.0   \n",
       "surge_type                                        0.0   \n",
       "term_type                                         9.0   \n",
       "glac_name                              Deming Glacier   \n",
       "is_rgi6                                           1.0   \n",
       "termlon                                   -121.868505   \n",
       "termlat                                     48.741924   \n",
       "zmin_m                                      1179.0857   \n",
       "zmax_m                                      3273.7449   \n",
       "zmed_m                                      2251.0093   \n",
       "zmean_m                                      2254.844   \n",
       "slope_deg                                   23.269547   \n",
       "aspect_deg                                  217.05867   \n",
       "aspect_sec                                        6.0   \n",
       "dem_source                                   COPDEM30   \n",
       "lmax_m                                         5346.0   \n",
       "\n",
       "                                                     1  \n",
       "GlaThiDa_ID                                         33  \n",
       "POLITICAL_UNIT                                      US  \n",
       "GLACIER_NAME                                    EASTON  \n",
       "SURVEY_DATE                                   19929999  \n",
       "PROFILE_ID                                         NaN  \n",
       "POINT_ID                                             2  \n",
       "POINT_LAT                                    48.764904  \n",
       "POINT_LON                                  -121.821909  \n",
       "ELEVATION                                       2813.0  \n",
       "THICKNESS                                           29  \n",
       "THICKNESS_UNCERTAINTY                              NaN  \n",
       "DATA_FLAG                                          NaN  \n",
       "REMARKS                                            NaN  \n",
       "geometry               POINT (-121.8219093 48.7649042)  \n",
       "index_right                                    15001.0  \n",
       "rgi_id                         RGI2000-v7.0-G-02-15002  \n",
       "o1region                                            02  \n",
       "o2region                                         02-04  \n",
       "glims_id                                G238169E48750N  \n",
       "anlys_id                                      761926.0  \n",
       "subm_id                                          744.0  \n",
       "src_date                           1974-09-09T00:00:00  \n",
       "cenlon                                     -121.830812  \n",
       "cenlat                                       48.749909  \n",
       "utm_zone                                          10.0  \n",
       "area_km2                                      2.887898  \n",
       "primeclass                                         0.0  \n",
       "conn_lvl                                           0.0  \n",
       "surge_type                                         0.0  \n",
       "term_type                                          9.0  \n",
       "glac_name                               Easton Glacier  \n",
       "is_rgi6                                            1.0  \n",
       "termlon                                    -121.837315  \n",
       "termlat                                        48.7325  \n",
       "zmin_m                                       1578.7251  \n",
       "zmax_m                                       2971.7751  \n",
       "zmed_m                                       2154.4375  \n",
       "zmean_m                                      2160.2273  \n",
       "slope_deg                                    18.759949  \n",
       "aspect_deg                                  201.118759  \n",
       "aspect_sec                                         5.0  \n",
       "dem_source                                    COPDEM30  \n",
       "lmax_m                                          4251.0  "
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ok_join.iloc[:2].T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "ok_join = ok_join[np.append(ok_join.columns[:13], ok_join.columns[15:16])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4126691/2178455800.py:1: PerformanceWarning: \n",
      "your performance may suffer as PyTables will pickle object types that it cannot\n",
      "map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['POLITICAL_UNIT', 'GLACIER_NAME', 'SURVEY_DATE', 'PROFILE_ID',\n",
      "       'POINT_ID', 'REMARKS', 'rgi_id'],\n",
      "      dtype='object')]\n",
      "\n",
      "  ok_join.to_hdf('glathida-v3.1.0/data/TTT_RGI_v70G.h5', key='data')\n"
     ]
    }
   ],
   "source": [
    "ok_join.to_hdf('glathida-v3.1.0/data/TTT_RGI_v70G.h5', key='data')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "import tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "file = 'glathida-v3.1.0/data/TTT_RGI_v70G_per_id.h5'\n",
    "if os.path.exists(file):\n",
    "    os.remove(file)\n",
    "\n",
    "rids = ok_join.rgi_id.unique()\n",
    "for rid in rids:\n",
    "    tt = ok_join.loc[ok_join.rgi_id == rid]\n",
    "    with warnings.catch_warnings():\n",
    "        warnings.simplefilter('ignore', tables.NaturalNameWarning)\n",
    "        tt.to_hdf(file, key=rid, append=True, complevel=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_rids():\n",
    "    with pd.HDFStore('glathida-v3.1.0/data/TTT_RGI_v70C_per_id.h5') as store:\n",
    "        rgi_ids = list(store.keys())\n",
    "        return np.array([s[1:] for s in rgi_ids])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "946 ms ± 10.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
     ]
    }
   ],
   "source": [
    "%timeit get_rids()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_data(rid):\n",
    "    out = None\n",
    "    try:\n",
    "        out = pd.read_hdf('glathida-v3.1.0/data/TTT_RGI_v70C_per_id.h5', key=rid)\n",
    "    except KeyError:\n",
    "        pass\n",
    "    return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "16.5 ms ± 218 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit read_data('RGI2000-v7.0-C-02-10810')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "503 µs ± 11.3 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit read_data('RGI2000-v7.0-C-02-10812')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = pd.read_hdf('glathida-v3.1.0/data/TTT_RGI_v70C.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.96 s ± 99.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
     ]
    }
   ],
   "source": [
    "%timeit  pd.read_hdf('glathida-v3.1.0/data/TTT_RGI_v70C.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "191 ms ± 8.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit t.loc[t.rgi_id == 'RGI2000-v7.0-C-02-10810']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "192 ms ± 5.84 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit t.loc[t.rgi_id == 'RGI2000-v7.0-C-02-10210']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>GlaThiDa_ID</th>\n",
       "      <th>POLITICAL_UNIT</th>\n",
       "      <th>GLACIER_NAME</th>\n",
       "      <th>SURVEY_DATE</th>\n",
       "      <th>PROFILE_ID</th>\n",
       "      <th>POINT_ID</th>\n",
       "      <th>POINT_LAT</th>\n",
       "      <th>POINT_LON</th>\n",
       "      <th>ELEVATION</th>\n",
       "      <th>THICKNESS</th>\n",
       "      <th>THICKNESS_UNCERTAINTY</th>\n",
       "      <th>DATA_FLAG</th>\n",
       "      <th>REMARKS</th>\n",
       "      <th>rgi_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>33</td>\n",
       "      <td>US</td>\n",
       "      <td>EASTON</td>\n",
       "      <td>19929999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>48.767380</td>\n",
       "      <td>-121.819644</td>\n",
       "      <td>2962.0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RGI2000-v7.0-C-02-10810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>33</td>\n",
       "      <td>US</td>\n",
       "      <td>EASTON</td>\n",
       "      <td>19929999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>48.764904</td>\n",
       "      <td>-121.821909</td>\n",
       "      <td>2813.0</td>\n",
       "      <td>29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RGI2000-v7.0-C-02-10810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>33</td>\n",
       "      <td>US</td>\n",
       "      <td>EASTON</td>\n",
       "      <td>19929999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>48.761662</td>\n",
       "      <td>-121.825264</td>\n",
       "      <td>2598.0</td>\n",
       "      <td>41</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RGI2000-v7.0-C-02-10810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>US</td>\n",
       "      <td>EASTON</td>\n",
       "      <td>19929999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>48.757063</td>\n",
       "      <td>-121.829107</td>\n",
       "      <td>2383.0</td>\n",
       "      <td>71</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RGI2000-v7.0-C-02-10810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>33</td>\n",
       "      <td>US</td>\n",
       "      <td>EASTON</td>\n",
       "      <td>19929999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>48.753715</td>\n",
       "      <td>-121.832006</td>\n",
       "      <td>2284.0</td>\n",
       "      <td>82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RGI2000-v7.0-C-02-10810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19089</th>\n",
       "      <td>502</td>\n",
       "      <td>US</td>\n",
       "      <td>SHERMAN CRATER</td>\n",
       "      <td>20109999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>76</td>\n",
       "      <td>48.768840</td>\n",
       "      <td>-121.816270</td>\n",
       "      <td>2931.0</td>\n",
       "      <td>59</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RGI2000-v7.0-C-02-10810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19090</th>\n",
       "      <td>502</td>\n",
       "      <td>US</td>\n",
       "      <td>SHERMAN CRATER</td>\n",
       "      <td>20109999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77</td>\n",
       "      <td>48.768892</td>\n",
       "      <td>-121.816151</td>\n",
       "      <td>2928.0</td>\n",
       "      <td>54</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RGI2000-v7.0-C-02-10810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19091</th>\n",
       "      <td>502</td>\n",
       "      <td>US</td>\n",
       "      <td>SHERMAN CRATER</td>\n",
       "      <td>20109999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>78</td>\n",
       "      <td>48.768944</td>\n",
       "      <td>-121.816032</td>\n",
       "      <td>2926.0</td>\n",
       "      <td>51</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RGI2000-v7.0-C-02-10810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19092</th>\n",
       "      <td>502</td>\n",
       "      <td>US</td>\n",
       "      <td>SHERMAN CRATER</td>\n",
       "      <td>20109999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>79</td>\n",
       "      <td>48.768990</td>\n",
       "      <td>-121.815914</td>\n",
       "      <td>2923.0</td>\n",
       "      <td>49</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RGI2000-v7.0-C-02-10810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19093</th>\n",
       "      <td>502</td>\n",
       "      <td>US</td>\n",
       "      <td>SHERMAN CRATER</td>\n",
       "      <td>20109999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>80</td>\n",
       "      <td>48.769043</td>\n",
       "      <td>-121.815795</td>\n",
       "      <td>2921.0</td>\n",
       "      <td>43</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RGI2000-v7.0-C-02-10810</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2533 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       GlaThiDa_ID POLITICAL_UNIT    GLACIER_NAME SURVEY_DATE PROFILE_ID  \\\n",
       "0               33             US          EASTON    19929999        NaN   \n",
       "1               33             US          EASTON    19929999        NaN   \n",
       "2               33             US          EASTON    19929999        NaN   \n",
       "3               33             US          EASTON    19929999        NaN   \n",
       "4               33             US          EASTON    19929999        NaN   \n",
       "...            ...            ...             ...         ...        ...   \n",
       "19089          502             US  SHERMAN CRATER    20109999        NaN   \n",
       "19090          502             US  SHERMAN CRATER    20109999        NaN   \n",
       "19091          502             US  SHERMAN CRATER    20109999        NaN   \n",
       "19092          502             US  SHERMAN CRATER    20109999        NaN   \n",
       "19093          502             US  SHERMAN CRATER    20109999        NaN   \n",
       "\n",
       "      POINT_ID  POINT_LAT   POINT_LON  ELEVATION  THICKNESS  \\\n",
       "0            1  48.767380 -121.819644     2962.0          0   \n",
       "1            2  48.764904 -121.821909     2813.0         29   \n",
       "2            3  48.761662 -121.825264     2598.0         41   \n",
       "3            4  48.757063 -121.829107     2383.0         71   \n",
       "4            5  48.753715 -121.832006     2284.0         82   \n",
       "...        ...        ...         ...        ...        ...   \n",
       "19089       76  48.768840 -121.816270     2931.0         59   \n",
       "19090       77  48.768892 -121.816151     2928.0         54   \n",
       "19091       78  48.768944 -121.816032     2926.0         51   \n",
       "19092       79  48.768990 -121.815914     2923.0         49   \n",
       "19093       80  48.769043 -121.815795     2921.0         43   \n",
       "\n",
       "       THICKNESS_UNCERTAINTY  DATA_FLAG REMARKS                   rgi_id  \n",
       "0                        NaN        NaN     NaN  RGI2000-v7.0-C-02-10810  \n",
       "1                        NaN        NaN     NaN  RGI2000-v7.0-C-02-10810  \n",
       "2                        NaN        NaN     NaN  RGI2000-v7.0-C-02-10810  \n",
       "3                        NaN        NaN     NaN  RGI2000-v7.0-C-02-10810  \n",
       "4                        NaN        NaN     NaN  RGI2000-v7.0-C-02-10810  \n",
       "...                      ...        ...     ...                      ...  \n",
       "19089                    5.0        NaN     NaN  RGI2000-v7.0-C-02-10810  \n",
       "19090                    5.0        NaN     NaN  RGI2000-v7.0-C-02-10810  \n",
       "19091                    5.0        NaN     NaN  RGI2000-v7.0-C-02-10810  \n",
       "19092                    5.0        NaN     NaN  RGI2000-v7.0-C-02-10810  \n",
       "19093                    5.0        NaN     NaN  RGI2000-v7.0-C-02-10810  \n",
       "\n",
       "[2533 rows x 14 columns]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t.loc[t.rgi_id == 'RGI2000-v7.0-C-02-10810']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Add the RGI Region attribute "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100% (3854279 of 3854279) |##############| Elapsed Time: 0:01:44 Time:  0:01:44\n"
     ]
    }
   ],
   "source": [
    "reg = np.ones(len(df), dtype=int) * -1\n",
    "prev_reg = None\n",
    "for i, p in progressbar.progressbar(enumerate(df.geometry), max_value=len(df)):\n",
    "    if prev_reg is not None and prev_reg.contains(p):\n",
    "        reg[i] = reg[i-1]\n",
    "        continue\n",
    "    try:\n",
    "        sel = rgi_reg.loc[rgi_reg.contains(p)].iloc[0]\n",
    "        reg[i] = sel.RGI_CODE\n",
    "        prev_reg = sel.geometry\n",
    "    except:\n",
    "        prev_reg = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 16, 17, 18, 19]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['RGI_REG'] = reg\n",
    "sorted(df['RGI_REG'].unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Separate the data in RGI Regions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfs = OrderedDict()\n",
    "for r in sorted(df['RGI_REG'].unique()):\n",
    "    dfs[r] = df.loc[df.RGI_REG == r].copy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prepare for writing and write to file "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i, (k, d) in enumerate(dfs.items()):\n",
    "    d.drop(['geometry'], axis=1, inplace=True)\n",
    "    d['RGI_REG'] = d['RGI_REG'].astype(str)\n",
    "    d['ELEVATION'] = d['ELEVATION'].astype(str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GlaThiDa_ID int64\n",
      "POLITICAL_UNIT object\n",
      "GLACIER_NAME object\n",
      "SURVEY_DATE object\n",
      "PROFILE_ID object\n",
      "POINT_ID object\n",
      "POINT_LAT float64\n",
      "POINT_LON float64\n",
      "ELEVATION object\n",
      "THICKNESS int64\n",
      "THICKNESS_UNCERTAINTY float64\n",
      "DATA_FLAG float64\n",
      "REMARKS object\n",
      "RGI_REG object\n"
     ]
    }
   ],
   "source": [
    "for c in d:\n",
    "    print(c, d[c].dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Writing 01 87157\n",
      "Writing 02 3406\n",
      "Writing 03 868346\n",
      "Writing 04 309453\n",
      "Writing 05 557136\n",
      "Writing 07 966408\n",
      "Writing 08 10801\n",
      "Writing 10 7726\n",
      "Writing 11 478312\n",
      "Writing 12 2278\n",
      "Writing 13 15327\n",
      "Writing 16 1287\n",
      "Writing 17 8463\n",
      "Writing 18 619\n",
      "Writing 19 537560\n"
     ]
    }
   ],
   "source": [
    "outf = os.path.join(gtd_dir, 'TTT_per_reg.h5')\n",
    "if os.path.exists(outf):\n",
    "    os.remove(outf)\n",
    "count = 0\n",
    "for i, (k, d) in enumerate(dfs.items()):\n",
    "    key = '{:02d}'.format(int(k))\n",
    "    print('Writing', key, len(d))\n",
    "    with warnings.catch_warnings():\n",
    "        warnings.simplefilter('ignore', tables.NaturalNameWarning)\n",
    "        d.to_hdf(outf, key, append=True, complevel=5)\n",
    "        count += len(d)\n",
    "assert count == len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}