{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## GlaThiDa to RGI, step 2: attribute an RGI glacier to each point" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We assume the HDF file is ready." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "import shapely.geometry as shpg\n", "import numpy as np\n", "import os\n", "import progressbar\n", "import multiprocessing\n", "import glob\n", "import time\n", "from collections import OrderedDict\n", "from oggm import utils, cfg\n", "import matplotlib.pyplot as plt\n", "import warnings\n", "import tables" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read the file" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "gtd_dir = 'glathida-v3.1.0/data'\n", "# gtd_dir = 'GlaThiDa_2016'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GTD Dir: glathida-v3.1.0/data\n" ] } ], "source": [ "print('GTD Dir: ', gtd_dir, flush=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "gtd_f = os.path.join(gtd_dir, 'TTT_per_reg.h5')\n", "out_f = os.path.join(gtd_dir, 'TTT_per_reg_with_id.h5')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "regions: ['01' '02' '03' '04' '05' '07' '08' '10' '11' '12' '13' '16' '17' '18'\n", " '19']\n" ] } ], "source": [ "with pd.HDFStore(gtd_f) as store:\n", " regs = list(store.keys())\n", "regs = np.array([s[1:] for s in regs])\n", "print('regions: ', regs, flush=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def find_glaciers(arg):\n", " \n", " gtd, sh = arg\n", " \n", " # loop over the points\n", " rgi_links = np.empty_like(gtd.REMARKS.values)\n", " prev_geom = None\n", " for i, (lon, lat) in enumerate(zip(gtd.POINT_LON, gtd.POINT_LAT)):\n", "\n", " # Check if the previous one makes it\n", " p = shpg.Point(lon, lat)\n", " if prev_geom is not None and prev_geom.contains(p):\n", " rgi_links[i] = rgi_links[i-1]\n", " continue\n", "\n", " # Select candidates by radius\n", " candidates = sh.loc[(lon <= sh.max_lon) & (lon >= sh.min_lon) & (lat <= sh.max_lat) & (lat >= sh.min_lat)]\n", " if len(candidates) == 0:\n", " prev_geom = None\n", " continue\n", "\n", " # Check geometries now\n", " candidates = candidates.loc[candidates.contains(p)]\n", " if len(candidates) == 0:\n", " prev_geom = None\n", " continue\n", "\n", " # Ok found it\n", " fin = candidates.iloc[0]\n", " rgi_links[i] = fin.name\n", " prev_geom = fin.geometry \n", " return rgi_links" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mpp pool: 64\n" ] } ], "source": [ "mpp = multiprocessing.cpu_count()\n", "mp_pool = multiprocessing.Pool(mpp) \n", "print('mpp pool: ', mpp, flush=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Read RGI Reg 01...\n", "Start attribution RGI Reg 01...\n", "RGI Reg 01 done! Total time: 13m 9s\n", "Read RGI Reg 02...\n", "Start attribution RGI Reg 02...\n", "RGI Reg 02 done! Total time: 13m 41s\n", "Read RGI Reg 03...\n", "Start attribution RGI Reg 03...\n", "RGI Reg 03 done! Total time: 23m 22s\n", "Read RGI Reg 04...\n", "Start attribution RGI Reg 04...\n", "RGI Reg 04 done! Total time: 25m 41s\n", "Read RGI Reg 05...\n", "Start attribution RGI Reg 05...\n", "RGI Reg 05 done! Total time: 85m 36s\n", "Read RGI Reg 07...\n", "Start attribution RGI Reg 07...\n", "RGI Reg 07 done! Total time: 88m 39s\n", "Read RGI Reg 08...\n", "Start attribution RGI Reg 08...\n", "RGI Reg 08 done! Total time: 88m 46s\n", "Read RGI Reg 10...\n", "Start attribution RGI Reg 10...\n", "RGI Reg 10 done! Total time: 88m 55s\n", "Read RGI Reg 11...\n", "Start attribution RGI Reg 11...\n", "RGI Reg 11 done! Total time: 91m 0s\n", "Read RGI Reg 12...\n", "Start attribution RGI Reg 12...\n", "RGI Reg 12 done! Total time: 91m 4s\n", "Read RGI Reg 13...\n", "Start attribution RGI Reg 13...\n", "RGI Reg 13 done! Total time: 92m 35s\n", "Read RGI Reg 16...\n", "Start attribution RGI Reg 16...\n", "RGI Reg 16 done! Total time: 92m 41s\n", "Read RGI Reg 17...\n", "Start attribution RGI Reg 17...\n", "RGI Reg 17 done! Total time: 93m 13s\n", "Read RGI Reg 18...\n", "Start attribution RGI Reg 18...\n", "RGI Reg 18 done! Total time: 93m 19s\n", "Read RGI Reg 19...\n", "Start attribution RGI Reg 19...\n", "RGI Reg 19 done! Total time: 95m 47s\n" ] } ], "source": [ "# Actual heavy stuff\n", "if os.path.exists(out_f):\n", " os.remove(out_f)\n", "\n", "start_time = time.time()\n", "\n", "for reg in np.unique(regs):\n", " \n", " print('Read RGI Reg {}...'.format(reg), flush=True)\n", " \n", " # read rgi region file\n", " sh = gpd.read_file(utils.get_rgi_region_file(reg, version='62')).set_index('RGIId')\n", " \n", " # compute the influence radius of each glacier\n", " min_lon = sh.CenLon.values * np.NaN\n", " min_lat = sh.CenLon.values * np.NaN\n", " max_lon = sh.CenLon.values * np.NaN\n", " max_lat = sh.CenLon.values * np.NaN\n", " for i, g in enumerate(sh.geometry):\n", " bounds = g.bounds\n", " min_lon[i] = bounds[0]\n", " min_lat[i] = bounds[1]\n", " max_lon[i] = bounds[2]\n", " max_lat[i] = bounds[3]\n", " sh['min_lon'] = min_lon\n", " sh['min_lat'] = min_lat\n", " sh['max_lon'] = max_lon\n", " sh['max_lat'] = max_lat\n", "\n", " # get ready for the loop\n", " gtd = pd.read_hdf(gtd_f, reg)\n", "\n", " # We split the problem in at least mpp otherwise 1000\n", " gtds = np.array_split(gtd, utils.clip_min(len(gtd) // 1000, mpp))\n", "\n", " # Actual stuff\n", " print('Start attribution RGI Reg {}...'.format(reg), flush=True)\n", " args = [(gt, sh) for gt in gtds]\n", " out = mp_pool.map(find_glaciers, args);\n", "\n", " # done\n", " gtd['RGI_ID'] = np.concatenate(out)\n", " with warnings.catch_warnings():\n", " warnings.simplefilter('ignore', tables.NaturalNameWarning)\n", " gtd.to_hdf(out_f, reg, append=True, complevel=5)\n", "\n", " # Log\n", " m, s = divmod(time.time() - start_time, 60)\n", " print('RGI Reg {} done! Total time: {}m {}s'.format(reg, int(m), int(s)), flush=True)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }