{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## GlaThiDa to RGI, step 3: check results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We assume the HDF file is ready." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "import numpy as np\n", "import shapely.geometry as shpg\n", "import os\n", "import glob\n", "import time\n", "import progressbar\n", "from collections import OrderedDict\n", "from oggm import utils, cfg\n", "import matplotlib.pyplot as plt\n", "import warnings\n", "import tables" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "gtd_dir = 'glathida-v3.1.0/data'\n", "# gtd_dir = 'GlaThiDa_2016'" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "gtd_f = os.path.join(gtd_dir, 'TTT_per_reg_with_id.h5')\n", "out_f = os.path.join(gtd_dir, 'TTT_per_rgi_id.h5')\n", "out_f_all = os.path.join(gtd_dir, 'TTT_all_with_id.h5')" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['01' '02' '03' '04' '05' '07' '08' '10' '11' '12' '13' '16' '17' '18'\n", " '19']\n" ] } ], "source": [ "with pd.HDFStore(gtd_f) as store:\n", " sub_regs = list(store.keys())\n", "sub_regs = np.array([s[1:] for s in sub_regs])\n", "print(sub_regs)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "odf = []\n", "for k in sub_regs:\n", " odf.append(pd.read_hdf(gtd_f, k))\n", "odf = pd.concat(odf).sort_index()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "nodf = odf.loc[odf.RGI_ID.isna()].copy()\n", "okdf = odf.loc[~odf.RGI_ID.isna()].copy()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.004422357592691137" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(nodf) / len(odf)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "rids = okdf.RGI_ID.unique()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2770" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(rids)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GlaThiDa_IDPOLITICAL_UNITGLACIER_NAMESURVEY_DATEPROFILE_IDPOINT_IDPOINT_LATPOINT_LONELEVATIONTHICKNESSTHICKNESS_UNCERTAINTYDATA_FLAGREMARKSRGI_ID
RGI_REG
1871218712184947871218091887121871218712180918871218710307656187121
1077107710771077107051771077107710771077100007710
11464884464884464884464884804648844648844648844633624648842320610464884
1220702070207020700207020702070207020702070002070
131487814878148781487833461487814878148781487814878102340014878
161287128712871287012871287128712871287208001287
1772527252725272520725272527252072520007252
186146146146140614614614614614000614
195361495361493402595361495361495361495361495361495361495361494770670481717536149
2334933493349330525643349334933493037334980003349
386834686834639563186834686834686834686834686834686834686834600746640868346
43094533094536894530945330945330945330945330945330945330945300308684309453
555713655713666990557136546842557136557136557136553838557136431170521073557136
796619796619796557496619735572966197966197966197504590966197874523033561966197
81078810788107881078879107881078810788107881078827120010788
\n", "
" ], "text/plain": [ " GlaThiDa_ID POLITICAL_UNIT GLACIER_NAME SURVEY_DATE PROFILE_ID \\\n", "RGI_REG \n", "1 87121 87121 84947 87121 80918 \n", "10 7710 7710 7710 7710 7051 \n", "11 464884 464884 464884 464884 80 \n", "12 2070 2070 2070 2070 0 \n", "13 14878 14878 14878 14878 3346 \n", "16 1287 1287 1287 1287 0 \n", "17 7252 7252 7252 7252 0 \n", "18 614 614 614 614 0 \n", "19 536149 536149 340259 536149 536149 \n", "2 3349 3349 3349 3305 2564 \n", "3 868346 868346 395631 868346 868346 \n", "4 309453 309453 68945 309453 309453 \n", "5 557136 557136 66990 557136 546842 \n", "7 966197 966197 965574 966197 35572 \n", "8 10788 10788 10788 10788 79 \n", "\n", " POINT_ID POINT_LAT POINT_LON ELEVATION THICKNESS \\\n", "RGI_REG \n", "1 87121 87121 87121 80918 87121 \n", "10 7710 7710 7710 7710 7710 \n", "11 464884 464884 464884 463362 464884 \n", "12 2070 2070 2070 2070 2070 \n", "13 14878 14878 14878 14878 14878 \n", "16 1287 1287 1287 1287 1287 \n", "17 7252 7252 7252 0 7252 \n", "18 614 614 614 614 614 \n", "19 536149 536149 536149 536149 536149 \n", "2 3349 3349 3349 3037 3349 \n", "3 868346 868346 868346 868346 868346 \n", "4 309453 309453 309453 309453 309453 \n", "5 557136 557136 557136 553838 557136 \n", "7 966197 966197 966197 504590 966197 \n", "8 10788 10788 10788 10788 10788 \n", "\n", " THICKNESS_UNCERTAINTY DATA_FLAG REMARKS RGI_ID \n", "RGI_REG \n", "1 87103 0 76561 87121 \n", "10 0 0 0 7710 \n", "11 23206 1 0 464884 \n", "12 2070 0 0 2070 \n", "13 10234 0 0 14878 \n", "16 208 0 0 1287 \n", "17 0 0 0 7252 \n", "18 0 0 0 614 \n", "19 477067 0 481717 536149 \n", "2 80 0 0 3349 \n", "3 0 0 746640 868346 \n", "4 0 0 308684 309453 \n", "5 43117 0 521073 557136 \n", "7 874523 0 33561 966197 \n", "8 2712 0 0 10788 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "okdf.groupby(okdf.RGI_REG).count()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GlaThiDa_IDPOLITICAL_UNITGLACIER_NAMESURVEY_DATEPOINT_IDPOINT_LATPOINT_LONELEVATIONTHICKNESSTHICKNESS_UNCERTAINTYDATA_FLAGREMARKSRGI_ID
RGI_REG
136363636203636136350190
1016161616161616161600160
114071407140714071407140714071407140713462028180
1220820820820820820820820820820802080
133333333333333333330000
171211121112111211121112111211012110012110
185555555550000
191922192214111922192219221922192219220018190
253535353535353535300490
31263126301263126312631263126312630012630
41111110111111111111111111001110
55845840584584584584584584005840
7151515151515151515150150
81010101010101010100000
\n", "
" ], "text/plain": [ " GlaThiDa_ID POLITICAL_UNIT GLACIER_NAME SURVEY_DATE POINT_ID \\\n", "RGI_REG \n", "1 36 36 36 36 20 \n", "10 16 16 16 16 16 \n", "11 4071 4071 4071 4071 4071 \n", "12 208 208 208 208 208 \n", "13 33 33 33 33 33 \n", "17 1211 1211 1211 1211 1211 \n", "18 5 5 5 5 5 \n", "19 1922 1922 1411 1922 1922 \n", "2 53 53 53 53 53 \n", "3 1263 1263 0 1263 1263 \n", "4 111 111 0 111 111 \n", "5 584 584 0 584 584 \n", "7 15 15 15 15 15 \n", "8 10 10 10 10 10 \n", "\n", " POINT_LAT POINT_LON ELEVATION THICKNESS THICKNESS_UNCERTAINTY \\\n", "RGI_REG \n", "1 36 36 1 36 35 \n", "10 16 16 16 16 0 \n", "11 4071 4071 4071 4071 3462 \n", "12 208 208 208 208 208 \n", "13 33 33 33 33 0 \n", "17 1211 1211 0 1211 0 \n", "18 5 5 5 5 0 \n", "19 1922 1922 1922 1922 0 \n", "2 53 53 53 53 0 \n", "3 1263 1263 1263 1263 0 \n", "4 111 111 111 111 0 \n", "5 584 584 584 584 0 \n", "7 15 15 15 15 15 \n", "8 10 10 10 10 0 \n", "\n", " DATA_FLAG REMARKS RGI_ID \n", "RGI_REG \n", "1 0 19 0 \n", "10 0 16 0 \n", "11 0 2818 0 \n", "12 0 208 0 \n", "13 0 0 0 \n", "17 0 1211 0 \n", "18 0 0 0 \n", "19 0 1819 0 \n", "2 0 49 0 \n", "3 0 1263 0 \n", "4 0 111 0 \n", "5 0 584 0 \n", "7 0 15 0 \n", "8 0 0 0 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nodf.groupby(nodf.RGI_REG).count()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "nodf['geometry'] = [shpg.Point(lon, lat) for lon, lat in zip(nodf.POINT_LON, nodf.POINT_LAT)]\n", "nodf = gpd.GeoDataFrame(nodf)\n", "nodf.to_file('gtd_not_found_oldgtd.shp')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100% (871 of 871) |######################| Elapsed Time: 0:00:45 Time: 0:00:45\n" ] } ], "source": [ "if os.path.exists(out_f):\n", " os.remove(out_f)\n", "\n", "for rid, df in progressbar.progressbar(okdf.groupby('RGI_ID')):\n", " with warnings.catch_warnings():\n", " warnings.simplefilter('ignore', tables.NaturalNameWarning)\n", " df.to_hdf(out_f, rid, append=True, complevel=5)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/users/fmaussion/.py3/lib/python3.8/site-packages/pandas/core/generic.py:2434: PerformanceWarning: \n", "your performance may suffer as PyTables will pickle object types that it cannot\n", "map directly to c-types [inferred_type->mixed,key->block2_values] [items->Index(['POLITICAL_UNIT', 'GLACIER_NAME', 'SURVEY_DATE', 'POINT_ID',\n", " 'ELEVATION', 'REMARKS', 'RGI_REG', 'RGI_ID'],\n", " dtype='object')]\n", "\n", " pytables.to_hdf(\n" ] } ], "source": [ "if os.path.exists(out_f_all):\n", " os.remove(out_f_all)\n", "\n", "okdf.to_hdf(out_f_all, 'df', complevel=5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }