{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Convert GLIMS polygons to RGI polygons: Level 1 files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "GLIMS outlines are shared in a format where each polygon is either a glacier outline or a rock outcrop (nunatak). RGI outlines are provided as polygons with exterior (glacier outlines) and interior (outcrops) boundaries. GLIMS does offer download in either format, but it is buggy and [silently fails on some glaciers](https://github.com/GLIMS-RGI/glims_issue_tracker/issues/2).\n", "\n", "Therefore, this script is there to convert the GLIMS format to RGI format. L1 files are simply the same data as L0 but with the new format. Additionally, we kept a list of \"orphaned\" outcrops which hint at errors in either the GLIMS database or this script." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import geopandas as gpd\n", "import pandas as pd\n", "import numpy as np\n", "import shutil\n", "import glob\n", "import os\n", "import subprocess\n", "import tarfile\n", "import shapely.geometry as shpg\n", "import progressbar\n", "import warnings\n", "import utils" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# go down from rgi7_scripts/workflow/preprocessing\n", "data_dir = '../../../rgi7_data/'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "reg_file = os.path.join(data_dir, '00_rgi70_regions', '00_rgi70_O1Regions')\n", "reg_f = gpd.read_file(reg_file)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../../../rgi7_data/l0_from_glims/00_RGI01.tgz\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100% (60787 of 60787) |##################| Elapsed Time: 0:06:04 Time: 0:06:04\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Writing...\n", "Taring...\n", "RGI01/\n", "RGI01/RGI01.dbf\n", "RGI01/RGI01.shp\n", "RGI01/RGI01.shx\n", "RGI01/RGI01.prj\n", "RGI01/RGI01.cpg\n", "CompletedProcess(args=['tar', '-zcvf', '../../../rgi7_data/l1_with_interiors_tars/RGI01.tar.gz', '-C', '../../../rgi7_data/l1_with_interiors', 'RGI01'], returncode=0)\n", "Writing orphans (N=374)...\n", "Taring orphans...\n", "RGI01/\n", "RGI01/RGI01.shx\n", "RGI01/RGI01.prj\n", "RGI01/RGI01.cpg\n", "RGI01/RGI01.shp\n", "RGI01/RGI01.dbf\n", "CompletedProcess(args=['tar', '-zcvf', '../../../rgi7_data/l1_orphan_interiors_tars/RGI01.tar.gz', '-C', '../../../rgi7_data/l1_orphan_interiors', 'RGI01'], returncode=0)\n", "../../../rgi7_data/l0_from_glims/01_RGI02.tgz\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100% (34207 of 34207) |##################| Elapsed Time: 0:02:46 Time: 0:02:46\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Writing...\n", "Taring...\n", "RGI02/\n", "RGI02/RGI02.prj\n", "RGI02/RGI02.cpg\n", "RGI02/RGI02.shx\n", "RGI02/RGI02.shp\n", "RGI02/RGI02.dbf\n", "CompletedProcess(args=['tar', '-zcvf', '../../../rgi7_data/l1_with_interiors_tars/RGI02.tar.gz', '-C', '../../../rgi7_data/l1_with_interiors', 'RGI02'], returncode=0)\n", "Writing orphans (N=10)...\n", "Taring orphans...\n", "RGI02/\n", "RGI02/RGI02.shx\n", "RGI02/RGI02.prj\n", "RGI02/RGI02.cpg\n", "RGI02/RGI02.shp\n", "RGI02/RGI02.dbf\n", "CompletedProcess(args=['tar', '-zcvf', '../../../rgi7_data/l1_orphan_interiors_tars/RGI02.tar.gz', '-C', '../../../rgi7_data/l1_orphan_interiors', 'RGI02'], returncode=0)\n", "../../../rgi7_data/l0_from_glims/02_RGI03.tgz\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100% (15418 of 15418) |##################| Elapsed Time: 0:01:21 Time: 0:01:21\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Writing...\n", "Taring...\n", "RGI03/\n", "RGI03/RGI03.cpg\n", "RGI03/RGI03.prj\n", "RGI03/RGI03.shx\n", "RGI03/RGI03.dbf\n", "RGI03/RGI03.shp\n", "CompletedProcess(args=['tar', '-zcvf', '../../../rgi7_data/l1_with_interiors_tars/RGI03.tar.gz', '-C', '../../../rgi7_data/l1_with_interiors', 'RGI03'], returncode=0)\n", "Writing orphans (N=4)...\n", "Taring orphans...\n", "RGI03/\n", "RGI03/RGI03.cpg\n", "RGI03/RGI03.prj\n", "RGI03/RGI03.shx\n", "RGI03/RGI03.shp\n", "RGI03/RGI03.dbf\n", "CompletedProcess(args=['tar', '-zcvf', '../../../rgi7_data/l1_orphan_interiors_tars/RGI03.tar.gz', '-C', '../../../rgi7_data/l1_orphan_interiors', 'RGI03'], returncode=0)\n", "../../../rgi7_data/l0_from_glims/03_RGI04.tgz\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100% (18336 of 18336) |##################| Elapsed Time: 0:01:46 Time: 0:01:46\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Writing...\n", "Taring...\n", "RGI04/\n", "RGI04/RGI04.dbf\n", "RGI04/RGI04.shp\n", "RGI04/RGI04.shx\n", "RGI04/RGI04.prj\n", "RGI04/RGI04.cpg\n", "CompletedProcess(args=['tar', '-zcvf', '../../../rgi7_data/l1_with_interiors_tars/RGI04.tar.gz', '-C', '../../../rgi7_data/l1_with_interiors', 'RGI04'], returncode=0)\n", "../../../rgi7_data/l0_from_glims/04_RGI05.tgz\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 38% (16410 of 43157) |###### | Elapsed Time: 0:01:26 ETA: 0:02:10" ] } ], "source": [ "odir = utils.mkdir(os.path.join(data_dir, 'l1_with_interiors'))\n", "odir_tar = utils.mkdir(os.path.join(data_dir, 'l1_with_interiors_tars'))\n", "\n", "odir_failed = utils.mkdir(os.path.join(data_dir, 'l1_orphan_interiors'))\n", "odir_failed_tar = utils.mkdir(os.path.join(data_dir, 'l1_orphan_interiors_tars'))\n", "\n", "for reg in range(1, 20):\n", "\n", " # Read the shapes for one or more boxes\n", " shp = [] \n", " for i, sreg in reg_f.loc[reg_f.o1region == f'{reg:02d}'].iterrows():\n", " \n", " filebasename = os.path.join(data_dir, 'l0_from_glims', f'{i:02d}_RGI{reg:02d}.tgz')\n", " print(filebasename)\n", " with tarfile.open(filebasename, \"r:gz\") as tar:\n", " fn = tar.getnames()[0]\n", " fp = 'tar://' + filebasename + '/{}/glims_polygons.shp'.format(fn)\n", " shp.append(gpd.read_file(fp))\n", " \n", " # Merge them together\n", " shp = pd.concat(shp) \n", " \n", " # Compute areas\n", " shp['area'] = shp.to_crs({'proj':'cea'}).area\n", " \n", " # Now the interior things\n", " \n", " # We group per anlys_id which is the smalles unit of glacier in GLIMS \n", " # (a single GLIMSID can have several dates and/or contributor, for example)\n", " grouped = shp.groupby('anlys_id')\n", " \n", " # We loop over anlys_id\n", " odf = []\n", " odf_rocks = []\n", " for name, group in progressbar.progressbar(grouped):\n", "\n", " # Basic sanity checks\n", " assert len(group.glac_id.unique()) == 1\n", " assert len(group.anlys_time.unique()) == 1\n", " assert len(group.src_date.unique()) == 1\n", " assert len(group.release_dt.unique()) == 1\n", " assert len(group.submitters.unique()) == 1\n", " assert len(group.analysts.unique()) == 1\n", "\n", " # Select the various (1 - more) main rocks\n", " mains = group.loc[group.line_type == 'glac_bound'].copy()\n", "\n", " # And all rocks\n", " rocks = group.loc[group.line_type == 'intrnl_rock']\n", "\n", " # We sort areas to avoid misclassifications\n", " mains = mains.sort_values('area')\n", "\n", " # Now loop over the mains and check which rocks belong\n", " for i, main in mains.iterrows():\n", "\n", " try:\n", " # Check where the rocks belong\n", " isin = rocks.geometry.within(main.geometry)\n", " except:\n", " # An error occurred\n", " # The ids below have been manually checked - that's not many in light of the thousands of entries\n", " if main.glac_id in ['G282321E08973S', 'G286431E47184S', 'G293140E68314S', \n", " 'G294835E68160S', 'G295727E65746S']:\n", " mains = mains.drop(i)\n", " isin = []\n", " elif main.glac_id in ['G286765E46650S']:\n", " main['geometry'] = main.geometry.buffer(0).geoms[1]\n", " isin = rocks.geometry.within(main.geometry)\n", " elif main.glac_id in ['G282313E08974S', 'G286697E46659S', 'G286765E46650S', \n", " 'G292617E67510S', 'G294132E67719S', 'G012723E47107N']:\n", " rocks = rocks[rocks.is_valid]\n", " isin = rocks.geometry.within(main.geometry)\n", " else:\n", " raise\n", " exterior = main.geometry.exterior\n", " interiors = [p.exterior for p in rocks.loc[isin].geometry]\n", " if len(interiors) > 0:\n", " mains.loc[i, 'geometry'] = shpg.Polygon(exterior, interiors)\n", " rocks = rocks[~isin]\n", "\n", " odf.append(mains)\n", " odf_rocks.append(rocks)\n", "\n", " odf = pd.concat(odf)\n", " odf_rocks = pd.concat(odf_rocks)\n", " \n", " dd = utils.mkdir(f'{odir}/RGI{reg:02d}/', reset=True)\n", " print('Writing...')\n", " odf.to_file(dd + f'RGI{reg:02d}.shp')\n", " \n", " print('Taring...')\n", " print(subprocess.run(['tar', '-zcvf', f'{odir_tar}/RGI{reg:02d}.tar.gz', '-C', odir, f'RGI{reg:02d}']))\n", " \n", " if len(odf_rocks) > 0:\n", " dd = utils.mkdir(f'{odir_failed}/RGI{reg:02d}/', reset=True)\n", " print(f'Writing orphans (N={len(odf_rocks)})...')\n", " odf_rocks.to_file(dd + f'RGI{reg:02d}.shp')\n", "\n", " print(f'Taring orphans...')\n", " print(subprocess.run(['tar', '-zcvf', f'{odir_failed_tar}/RGI{reg:02d}.tar.gz', '-C', odir_failed, f'RGI{reg:02d}']))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Done\n" ] } ], "source": [ "print('Done')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 4 }