{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## New attempt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In the end, we have an HDF file of GlaThiDa per RGI sub-region." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "import shapely.geometry as shpg\n", "import numpy as np\n", "import os\n", "import progressbar\n", "import time\n", "from oggm import utils, cfg" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "gtd_dir = './glathida-v3.1.0/data'\n", "# gtd_dir = './GlaThiDa_2016'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read the files" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(os.path.join(gtd_dir, 'TTT.csv'), dtype={'SURVEY_DATE': 'str'}, low_memory=False)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GlaThiDa_ID int64\n", "POLITICAL_UNIT object\n", "GLACIER_NAME object\n", "SURVEY_DATE object\n", "PROFILE_ID object\n", "POINT_ID object\n", "POINT_LAT float64\n", "POINT_LON float64\n", "ELEVATION float64\n", "THICKNESS int64\n", "THICKNESS_UNCERTAINTY float64\n", "DATA_FLAG float64\n", "REMARKS object\n" ] } ], "source": [ "for c in df:\n", " print(c, df[c].dtype)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3854279" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.04538540152386478, 174928)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df.loc[df.THICKNESS == 0]) / len(df), len(df.loc[df.THICKNESS == 0])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Replace with 62 when available\n", "rgi_reg = gpd.read_file(os.path.join(utils.get_rgi_dir('62'), '00_rgi62_regions/00_rgi62_O1Regions.shp'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Convert lon, lat to Point geometries " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100% (3854279 of 3854279) |##############| Elapsed Time: 0:00:25 Time: 0:00:25\n" ] } ], "source": [ "geoms = []\n", "for lon, lat in progressbar.progressbar(zip(df.POINT_LON, df.POINT_LAT), max_value=len(df)):\n", " geoms.append(shpg.Point(lon, lat))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df['geometry'] = geoms" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "df = gpd.GeoDataFrame(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read RGI" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "rdf = []\n", "for reg in range(1, 20):\n", " rdf.append(gpd.read_file(utils.get_rgi_region_file(f'{reg:02d}', version='70G')))\n", "rdf = pd.concat(rdf)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "df.crs = rdf.crs" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3854279, 274531)" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df), len(rdf)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "joined = gpd.sjoin(df, rdf, how='left', predicate='within')" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3854279, 265675)" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(joined), len(joined.loc[joined.rgi_id.isnull()])" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "no_join = joined.loc[joined.rgi_id.isnull()]\n", "ok_join = joined.loc[~joined.rgi_id.isnull()]" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9310701171347482" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ok_join) / len(df)" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
GlaThiDa_ID3333
POLITICAL_UNITUSUS
GLACIER_NAMEEASTONEASTON
SURVEY_DATE1992999919929999
PROFILE_IDNaNNaN
POINT_ID12
POINT_LAT48.7673848.764904
POINT_LON-121.819644-121.821909
ELEVATION2962.02813.0
THICKNESS029
THICKNESS_UNCERTAINTYNaNNaN
DATA_FLAGNaNNaN
REMARKSNaNNaN
geometryPOINT (-121.819644 48.7673801)POINT (-121.8219093 48.7649042)
index_right15002.015001.0
rgi_idRGI2000-v7.0-G-02-15003RGI2000-v7.0-G-02-15002
o1region0202
o2region02-0402-04
glims_idG238158E48759NG238169E48750N
anlys_id761915.0761926.0
subm_id744.0744.0
src_date1975-09-09T00:00:001974-09-09T00:00:00
cenlon-121.844147-121.830812
cenlat48.75878448.749909
utm_zone10.010.0
area_km24.9959232.887898
primeclass0.00.0
conn_lvl0.00.0
surge_type0.00.0
term_type9.09.0
glac_nameDeming GlacierEaston Glacier
is_rgi61.01.0
termlon-121.868505-121.837315
termlat48.74192448.7325
zmin_m1179.08571578.7251
zmax_m3273.74492971.7751
zmed_m2251.00932154.4375
zmean_m2254.8442160.2273
slope_deg23.26954718.759949
aspect_deg217.05867201.118759
aspect_sec6.05.0
dem_sourceCOPDEM30COPDEM30
lmax_m5346.04251.0
\n", "
" ], "text/plain": [ " 0 \\\n", "GlaThiDa_ID 33 \n", "POLITICAL_UNIT US \n", "GLACIER_NAME EASTON \n", "SURVEY_DATE 19929999 \n", "PROFILE_ID NaN \n", "POINT_ID 1 \n", "POINT_LAT 48.76738 \n", "POINT_LON -121.819644 \n", "ELEVATION 2962.0 \n", "THICKNESS 0 \n", "THICKNESS_UNCERTAINTY NaN \n", "DATA_FLAG NaN \n", "REMARKS NaN \n", "geometry POINT (-121.819644 48.7673801) \n", "index_right 15002.0 \n", "rgi_id RGI2000-v7.0-G-02-15003 \n", "o1region 02 \n", "o2region 02-04 \n", "glims_id G238158E48759N \n", "anlys_id 761915.0 \n", "subm_id 744.0 \n", "src_date 1975-09-09T00:00:00 \n", "cenlon -121.844147 \n", "cenlat 48.758784 \n", "utm_zone 10.0 \n", "area_km2 4.995923 \n", "primeclass 0.0 \n", "conn_lvl 0.0 \n", "surge_type 0.0 \n", "term_type 9.0 \n", "glac_name Deming Glacier \n", "is_rgi6 1.0 \n", "termlon -121.868505 \n", "termlat 48.741924 \n", "zmin_m 1179.0857 \n", "zmax_m 3273.7449 \n", "zmed_m 2251.0093 \n", "zmean_m 2254.844 \n", "slope_deg 23.269547 \n", "aspect_deg 217.05867 \n", "aspect_sec 6.0 \n", "dem_source COPDEM30 \n", "lmax_m 5346.0 \n", "\n", " 1 \n", "GlaThiDa_ID 33 \n", "POLITICAL_UNIT US \n", "GLACIER_NAME EASTON \n", "SURVEY_DATE 19929999 \n", "PROFILE_ID NaN \n", "POINT_ID 2 \n", "POINT_LAT 48.764904 \n", "POINT_LON -121.821909 \n", "ELEVATION 2813.0 \n", "THICKNESS 29 \n", "THICKNESS_UNCERTAINTY NaN \n", "DATA_FLAG NaN \n", "REMARKS NaN \n", "geometry POINT (-121.8219093 48.7649042) \n", "index_right 15001.0 \n", "rgi_id RGI2000-v7.0-G-02-15002 \n", "o1region 02 \n", "o2region 02-04 \n", "glims_id G238169E48750N \n", "anlys_id 761926.0 \n", "subm_id 744.0 \n", "src_date 1974-09-09T00:00:00 \n", "cenlon -121.830812 \n", "cenlat 48.749909 \n", "utm_zone 10.0 \n", "area_km2 2.887898 \n", "primeclass 0.0 \n", "conn_lvl 0.0 \n", "surge_type 0.0 \n", "term_type 9.0 \n", "glac_name Easton Glacier \n", "is_rgi6 1.0 \n", "termlon -121.837315 \n", "termlat 48.7325 \n", "zmin_m 1578.7251 \n", "zmax_m 2971.7751 \n", "zmed_m 2154.4375 \n", "zmean_m 2160.2273 \n", "slope_deg 18.759949 \n", "aspect_deg 201.118759 \n", "aspect_sec 5.0 \n", "dem_source COPDEM30 \n", "lmax_m 4251.0 " ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ok_join.iloc[:2].T" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "ok_join = ok_join[np.append(ok_join.columns[:13], ok_join.columns[15:16])]" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_4126691/2178455800.py:1: PerformanceWarning: \n", "your performance may suffer as PyTables will pickle object types that it cannot\n", "map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['POLITICAL_UNIT', 'GLACIER_NAME', 'SURVEY_DATE', 'PROFILE_ID',\n", " 'POINT_ID', 'REMARKS', 'rgi_id'],\n", " dtype='object')]\n", "\n", " ok_join.to_hdf('glathida-v3.1.0/data/TTT_RGI_v70G.h5', key='data')\n" ] } ], "source": [ "ok_join.to_hdf('glathida-v3.1.0/data/TTT_RGI_v70G.h5', key='data')" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "import tables" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "file = 'glathida-v3.1.0/data/TTT_RGI_v70G_per_id.h5'\n", "if os.path.exists(file):\n", " os.remove(file)\n", "\n", "rids = ok_join.rgi_id.unique()\n", "for rid in rids:\n", " tt = ok_join.loc[ok_join.rgi_id == rid]\n", " with warnings.catch_warnings():\n", " warnings.simplefilter('ignore', tables.NaturalNameWarning)\n", " tt.to_hdf(file, key=rid, append=True, complevel=5)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "def get_rids():\n", " with pd.HDFStore('glathida-v3.1.0/data/TTT_RGI_v70C_per_id.h5') as store:\n", " rgi_ids = list(store.keys())\n", " return np.array([s[1:] for s in rgi_ids])" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "946 ms ± 10.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%timeit get_rids()" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [], "source": [ "def read_data(rid):\n", " out = None\n", " try:\n", " out = pd.read_hdf('glathida-v3.1.0/data/TTT_RGI_v70C_per_id.h5', key=rid)\n", " except KeyError:\n", " pass\n", " return out" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "16.5 ms ± 218 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], "source": [ "%timeit read_data('RGI2000-v7.0-C-02-10810')" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "503 µs ± 11.3 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" ] } ], "source": [ "%timeit read_data('RGI2000-v7.0-C-02-10812')" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "t = pd.read_hdf('glathida-v3.1.0/data/TTT_RGI_v70C.h5')" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.96 s ± 99.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%timeit pd.read_hdf('glathida-v3.1.0/data/TTT_RGI_v70C.h5')" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "191 ms ± 8.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%timeit t.loc[t.rgi_id == 'RGI2000-v7.0-C-02-10810']" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "192 ms ± 5.84 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%timeit t.loc[t.rgi_id == 'RGI2000-v7.0-C-02-10210']" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GlaThiDa_IDPOLITICAL_UNITGLACIER_NAMESURVEY_DATEPROFILE_IDPOINT_IDPOINT_LATPOINT_LONELEVATIONTHICKNESSTHICKNESS_UNCERTAINTYDATA_FLAGREMARKSrgi_id
033USEASTON19929999NaN148.767380-121.8196442962.00NaNNaNNaNRGI2000-v7.0-C-02-10810
133USEASTON19929999NaN248.764904-121.8219092813.029NaNNaNNaNRGI2000-v7.0-C-02-10810
233USEASTON19929999NaN348.761662-121.8252642598.041NaNNaNNaNRGI2000-v7.0-C-02-10810
333USEASTON19929999NaN448.757063-121.8291072383.071NaNNaNNaNRGI2000-v7.0-C-02-10810
433USEASTON19929999NaN548.753715-121.8320062284.082NaNNaNNaNRGI2000-v7.0-C-02-10810
.............................................
19089502USSHERMAN CRATER20109999NaN7648.768840-121.8162702931.0595.0NaNNaNRGI2000-v7.0-C-02-10810
19090502USSHERMAN CRATER20109999NaN7748.768892-121.8161512928.0545.0NaNNaNRGI2000-v7.0-C-02-10810
19091502USSHERMAN CRATER20109999NaN7848.768944-121.8160322926.0515.0NaNNaNRGI2000-v7.0-C-02-10810
19092502USSHERMAN CRATER20109999NaN7948.768990-121.8159142923.0495.0NaNNaNRGI2000-v7.0-C-02-10810
19093502USSHERMAN CRATER20109999NaN8048.769043-121.8157952921.0435.0NaNNaNRGI2000-v7.0-C-02-10810
\n", "

2533 rows × 14 columns

\n", "
" ], "text/plain": [ " GlaThiDa_ID POLITICAL_UNIT GLACIER_NAME SURVEY_DATE PROFILE_ID \\\n", "0 33 US EASTON 19929999 NaN \n", "1 33 US EASTON 19929999 NaN \n", "2 33 US EASTON 19929999 NaN \n", "3 33 US EASTON 19929999 NaN \n", "4 33 US EASTON 19929999 NaN \n", "... ... ... ... ... ... \n", "19089 502 US SHERMAN CRATER 20109999 NaN \n", "19090 502 US SHERMAN CRATER 20109999 NaN \n", "19091 502 US SHERMAN CRATER 20109999 NaN \n", "19092 502 US SHERMAN CRATER 20109999 NaN \n", "19093 502 US SHERMAN CRATER 20109999 NaN \n", "\n", " POINT_ID POINT_LAT POINT_LON ELEVATION THICKNESS \\\n", "0 1 48.767380 -121.819644 2962.0 0 \n", "1 2 48.764904 -121.821909 2813.0 29 \n", "2 3 48.761662 -121.825264 2598.0 41 \n", "3 4 48.757063 -121.829107 2383.0 71 \n", "4 5 48.753715 -121.832006 2284.0 82 \n", "... ... ... ... ... ... \n", "19089 76 48.768840 -121.816270 2931.0 59 \n", "19090 77 48.768892 -121.816151 2928.0 54 \n", "19091 78 48.768944 -121.816032 2926.0 51 \n", "19092 79 48.768990 -121.815914 2923.0 49 \n", "19093 80 48.769043 -121.815795 2921.0 43 \n", "\n", " THICKNESS_UNCERTAINTY DATA_FLAG REMARKS rgi_id \n", "0 NaN NaN NaN RGI2000-v7.0-C-02-10810 \n", "1 NaN NaN NaN RGI2000-v7.0-C-02-10810 \n", "2 NaN NaN NaN RGI2000-v7.0-C-02-10810 \n", "3 NaN NaN NaN RGI2000-v7.0-C-02-10810 \n", "4 NaN NaN NaN RGI2000-v7.0-C-02-10810 \n", "... ... ... ... ... \n", "19089 5.0 NaN NaN RGI2000-v7.0-C-02-10810 \n", "19090 5.0 NaN NaN RGI2000-v7.0-C-02-10810 \n", "19091 5.0 NaN NaN RGI2000-v7.0-C-02-10810 \n", "19092 5.0 NaN NaN RGI2000-v7.0-C-02-10810 \n", "19093 5.0 NaN NaN RGI2000-v7.0-C-02-10810 \n", "\n", "[2533 rows x 14 columns]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t.loc[t.rgi_id == 'RGI2000-v7.0-C-02-10810']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Add the RGI Region attribute " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100% (3854279 of 3854279) |##############| Elapsed Time: 0:01:44 Time: 0:01:44\n" ] } ], "source": [ "reg = np.ones(len(df), dtype=int) * -1\n", "prev_reg = None\n", "for i, p in progressbar.progressbar(enumerate(df.geometry), max_value=len(df)):\n", " if prev_reg is not None and prev_reg.contains(p):\n", " reg[i] = reg[i-1]\n", " continue\n", " try:\n", " sel = rgi_reg.loc[rgi_reg.contains(p)].iloc[0]\n", " reg[i] = sel.RGI_CODE\n", " prev_reg = sel.geometry\n", " except:\n", " prev_reg = None" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 16, 17, 18, 19]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['RGI_REG'] = reg\n", "sorted(df['RGI_REG'].unique())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Separate the data in RGI Regions" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "dfs = OrderedDict()\n", "for r in sorted(df['RGI_REG'].unique()):\n", " dfs[r] = df.loc[df.RGI_REG == r].copy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Prepare for writing and write to file " ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "for i, (k, d) in enumerate(dfs.items()):\n", " d.drop(['geometry'], axis=1, inplace=True)\n", " d['RGI_REG'] = d['RGI_REG'].astype(str)\n", " d['ELEVATION'] = d['ELEVATION'].astype(str)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GlaThiDa_ID int64\n", "POLITICAL_UNIT object\n", "GLACIER_NAME object\n", "SURVEY_DATE object\n", "PROFILE_ID object\n", "POINT_ID object\n", "POINT_LAT float64\n", "POINT_LON float64\n", "ELEVATION object\n", "THICKNESS int64\n", "THICKNESS_UNCERTAINTY float64\n", "DATA_FLAG float64\n", "REMARKS object\n", "RGI_REG object\n" ] } ], "source": [ "for c in d:\n", " print(c, d[c].dtype)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing 01 87157\n", "Writing 02 3406\n", "Writing 03 868346\n", "Writing 04 309453\n", "Writing 05 557136\n", "Writing 07 966408\n", "Writing 08 10801\n", "Writing 10 7726\n", "Writing 11 478312\n", "Writing 12 2278\n", "Writing 13 15327\n", "Writing 16 1287\n", "Writing 17 8463\n", "Writing 18 619\n", "Writing 19 537560\n" ] } ], "source": [ "outf = os.path.join(gtd_dir, 'TTT_per_reg.h5')\n", "if os.path.exists(outf):\n", " os.remove(outf)\n", "count = 0\n", "for i, (k, d) in enumerate(dfs.items()):\n", " key = '{:02d}'.format(int(k))\n", " print('Writing', key, len(d))\n", " with warnings.catch_warnings():\n", " warnings.simplefilter('ignore', tables.NaturalNameWarning)\n", " d.to_hdf(outf, key, append=True, complevel=5)\n", " count += len(d)\n", "assert count == len(df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }