{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## New attempt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In the end, we have an HDF file of GlaThiDa per RGI sub-region." ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "import shapely.geometry as shpg\n", "import numpy as np\n", "import os\n", "import glob\n", "import progressbar\n", "import time\n", "from oggm import utils, cfg\n", "import warnings\n", "import tables" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [], "source": [ "gtd_dir = './glathida-main/data'\n", "# gtd_dir = './GlaThiDa_2016'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read the GTD files" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "./glathida-main/data/lippl-2019-gourdon/point.csv\n", "./glathida-main/data/ncdc-2023/point.csv\n", "./glathida-main/data/ben-pelto-columbia-basin/point.csv\n", "./glathida-main/data/oneel-2011-columbia-bering/point.csv\n", "./glathida-main/data/braun-2018-antarctic-peninsula/point.csv\n", "./glathida-main/data/luyendyk-2003-marie-byrd-land/point.csv\n", "./glathida-main/data/corr-2020-antarctic-peninsula/point.csv\n", "./glathida-main/data/oberreuter-2021-artesonraju/point.csv\n", "./glathida-main/data/benham-2020-canadian-arctic/point.csv\n", "./glathida-main/data/franke-2020-dronning-maud-land/point.csv\n", "./glathida-main/data/goel-2019-ice-rises/point.csv\n", "./glathida-main/data/lambrecht-2019-fedchenko/point.csv\n", "./glathida-main/data/24k-glacier-2019/point.csv\n", "./glathida-main/data/swiss-glacier-thickness-r2020/point.csv\n", "./glathida-main/data/heinrichs-1995-black-rapids/point.csv\n", "./glathida-main/data/conway-2016-black-rapids/point.csv\n", "./glathida-main/data/gacitua-2020-schiaparelli/point.csv\n" ] } ], "source": [ "dfs = [pd.read_csv(os.path.join(gtd_dir, 'point.csv'), dtype={'date': 'str', 'elevation_date': 'str', 'flag': 'str'}, low_memory=False)]\n", "\n", "for path in glob.glob(os.path.join(gtd_dir, '*', 'point.csv')):\n", " print(path)\n", " dfs.append(pd.read_csv(path, dtype={'date': 'str', 'elevation_date': 'str', 'flag': 'str'}, low_memory=False))\n", "\n", "df = pd.concat(dfs, ignore_index=True)\n", "\n", "df = gpd.GeoDataFrame(\n", " df, geometry=gpd.points_from_xy(df['longitude'], df['latitude'], crs=4326)\n", ")" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "survey_id int64\n", "glacier_id float64\n", "profile_id object\n", "point_id object\n", "date object\n", "max_date object\n", "elevation_date object\n", "max_elevation_date object\n", "latitude float64\n", "longitude float64\n", "elevation float64\n", "thickness int64\n", "thickness_uncertainty float64\n", "flag object\n", "remarks object\n", "date_max object\n", "geometry geometry\n" ] } ], "source": [ "for c in df:\n", " print(c, df[c].dtype)" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1432760" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df) - 3854279" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5287039" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df)" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.03687640662382101, 194967)" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df.loc[df.thickness == 0]) / len(df), len(df.loc[df.thickness == 0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read RGI, assign and write" ] }, { "cell_type": "code", "execution_count": 145, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "62 0.8385355205437297\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_2029233/185255322.py:26: PerformanceWarning: \n", "your performance may suffer as PyTables will pickle object types that it cannot\n", "map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['date', 'elevation_date', 'flag', 'rgi_id'], dtype='object')]\n", "\n", " to_write.to_hdf(gtd_dir + f'/glathida_2023-11-16_rgi_{rgi_version}.h5', key='data', complevel=5)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "70G 0.8368438364082429\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_2029233/185255322.py:26: PerformanceWarning: \n", "your performance may suffer as PyTables will pickle object types that it cannot\n", "map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['date', 'elevation_date', 'flag', 'rgi_id'], dtype='object')]\n", "\n", " to_write.to_hdf(gtd_dir + f'/glathida_2023-11-16_rgi_{rgi_version}.h5', key='data', complevel=5)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "70C 0.8368438364082429\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_2029233/185255322.py:26: PerformanceWarning: \n", "your performance may suffer as PyTables will pickle object types that it cannot\n", "map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['date', 'elevation_date', 'flag', 'rgi_id'], dtype='object')]\n", "\n", " to_write.to_hdf(gtd_dir + f'/glathida_2023-11-16_rgi_{rgi_version}.h5', key='data', complevel=5)\n" ] } ], "source": [ "for rgi_version in ['62', '70G', '70C']:\n", " \n", " rdf = []\n", " for reg in range(1, 20):\n", " rdf.append(gpd.read_file(utils.get_rgi_region_file(f'{reg:02d}', version=rgi_version)))\n", " rdf = pd.concat(rdf)\n", " \n", " if rgi_version == '62':\n", " rdf = rdf.loc[rdf['Connect'] != 2]\n", " rdf['rgi_id'] = rdf['RGIId']\n", " \n", " joined = gpd.sjoin(df, rdf, how='left', predicate='within')\n", " \n", " no_join = joined.loc[joined.rgi_id.isnull()]\n", " ok_join = joined.loc[~joined.rgi_id.isnull()]\n", " \n", " print(rgi_version, len(ok_join) / len(df))\n", " \n", " to_write = ok_join[['survey_id', 'date', 'elevation_date', \n", " 'latitude', 'longitude', 'elevation', 'thickness', \n", " 'thickness_uncertainty', 'flag',\n", " 'rgi_id']]\n", " \n", " with warnings.catch_warnings():\n", " warnings.simplefilter('ignore', tables.PerformanceWarning)\n", " to_write.to_hdf(gtd_dir + f'/glathida_2023-11-16_rgi_{rgi_version}.h5', key='data', complevel=5)\n", " \n", " file = gtd_dir + f'/glathida_2023-11-16_rgi_{rgi_version}_per_id.h5'\n", " if os.path.exists(file):\n", " os.remove(file)\n", "\n", " rids = to_write.rgi_id.unique()\n", " for rid in rids:\n", " tt = to_write.loc[to_write.rgi_id == rid].reset_index(drop=True)\n", " with warnings.catch_warnings():\n", " warnings.simplefilter('ignore', tables.NaturalNameWarning)\n", " tt.to_hdf(file, key=rid, append=True, complevel=5)" ] }, { "cell_type": "code", "execution_count": 153, "metadata": {}, "outputs": [], "source": [ "with pd.HDFStore('glathida-main/data/glathida_2023-11-16_rgi_70C_per_id.h5') as store:\n", " rgi_ids = list(store.keys())\n", " rgi_ids = np.array([s[1:] for s in rgi_ids])" ] }, { "cell_type": "code", "execution_count": 154, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1278" ] }, "execution_count": 154, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(rgi_ids)" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [], "source": [ "dfa = pd.read_hdf('glathida-main/data/glathida_2023-11-16_rgi_70C.h5')" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [], "source": [ "dfa = dfa.loc[['-03-' in c for c in dfa.rgi_id]]" ] }, { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [], "source": [ "dfa = gpd.GeoDataFrame(\n", " dfa, geometry=gpd.points_from_xy(dfa['longitude'], dfa['latitude'], crs=4326)\n", ")" ] }, { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_2029233/2911545548.py:1: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", " dfa.to_file(filename='gtd_reg03.shp.zip', driver='ESRI Shapefile')\n" ] } ], "source": [ "dfa.to_file(filename='gtd_reg03.shp.zip', driver='ESRI Shapefile')" ] }, { "cell_type": "code", "execution_count": 166, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 166, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t - 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 146, "metadata": {}, "outputs": [], "source": [ "t = 1" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "survey_id int64\n", "date object\n", "elevation_date object\n", "latitude float64\n", "longitude float64\n", "elevation float64\n", "thickness int64\n", "thickness_uncertainty float64\n", "flag object\n", "rgi_id object\n" ] } ], "source": [ "\n", "for c in to_write:\n", " print(c, to_write[c].dtype)" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [], "source": [ "to_write = to_write.reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_2029233/271614285.py:1: PerformanceWarning: \n", "your performance may suffer as PyTables will pickle object types that it cannot\n", "map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['date', 'elevation_date', 'flag', 'rgi_id'], dtype='object')]\n", "\n", " to_write.to_hdf(gtd_dir + '/glathida_2023-11-16_rgi_70G.h5', key='data')\n" ] } ], "source": [] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "def get_rids():\n", " with pd.HDFStore('glathida-v3.1.0/data/TTT_RGI_v70C_per_id.h5') as store:\n", " rgi_ids = list(store.keys())\n", " return np.array([s[1:] for s in rgi_ids])" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "946 ms ± 10.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%timeit get_rids()" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [], "source": [ "def read_data(rid):\n", " out = None\n", " try:\n", " out = pd.read_hdf('glathida-v3.1.0/data/TTT_RGI_v70C_per_id.h5', key=rid)\n", " except KeyError:\n", " pass\n", " return out" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "16.5 ms ± 218 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], "source": [ "%timeit read_data('RGI2000-v7.0-C-02-10810')" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "503 µs ± 11.3 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" ] } ], "source": [ "%timeit read_data('RGI2000-v7.0-C-02-10812')" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "t = pd.read_hdf('glathida-v3.1.0/data/TTT_RGI_v70C.h5')" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.96 s ± 99.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%timeit pd.read_hdf('glathida-v3.1.0/data/TTT_RGI_v70C.h5')" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "191 ms ± 8.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%timeit t.loc[t.rgi_id == 'RGI2000-v7.0-C-02-10810']" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "192 ms ± 5.84 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%timeit t.loc[t.rgi_id == 'RGI2000-v7.0-C-02-10210']" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GlaThiDa_IDPOLITICAL_UNITGLACIER_NAMESURVEY_DATEPROFILE_IDPOINT_IDPOINT_LATPOINT_LONELEVATIONTHICKNESSTHICKNESS_UNCERTAINTYDATA_FLAGREMARKSrgi_id
033USEASTON19929999NaN148.767380-121.8196442962.00NaNNaNNaNRGI2000-v7.0-C-02-10810
133USEASTON19929999NaN248.764904-121.8219092813.029NaNNaNNaNRGI2000-v7.0-C-02-10810
233USEASTON19929999NaN348.761662-121.8252642598.041NaNNaNNaNRGI2000-v7.0-C-02-10810
333USEASTON19929999NaN448.757063-121.8291072383.071NaNNaNNaNRGI2000-v7.0-C-02-10810
433USEASTON19929999NaN548.753715-121.8320062284.082NaNNaNNaNRGI2000-v7.0-C-02-10810
.............................................
19089502USSHERMAN CRATER20109999NaN7648.768840-121.8162702931.0595.0NaNNaNRGI2000-v7.0-C-02-10810
19090502USSHERMAN CRATER20109999NaN7748.768892-121.8161512928.0545.0NaNNaNRGI2000-v7.0-C-02-10810
19091502USSHERMAN CRATER20109999NaN7848.768944-121.8160322926.0515.0NaNNaNRGI2000-v7.0-C-02-10810
19092502USSHERMAN CRATER20109999NaN7948.768990-121.8159142923.0495.0NaNNaNRGI2000-v7.0-C-02-10810
19093502USSHERMAN CRATER20109999NaN8048.769043-121.8157952921.0435.0NaNNaNRGI2000-v7.0-C-02-10810
\n", "

2533 rows × 14 columns

\n", "
" ], "text/plain": [ " GlaThiDa_ID POLITICAL_UNIT GLACIER_NAME SURVEY_DATE PROFILE_ID \\\n", "0 33 US EASTON 19929999 NaN \n", "1 33 US EASTON 19929999 NaN \n", "2 33 US EASTON 19929999 NaN \n", "3 33 US EASTON 19929999 NaN \n", "4 33 US EASTON 19929999 NaN \n", "... ... ... ... ... ... \n", "19089 502 US SHERMAN CRATER 20109999 NaN \n", "19090 502 US SHERMAN CRATER 20109999 NaN \n", "19091 502 US SHERMAN CRATER 20109999 NaN \n", "19092 502 US SHERMAN CRATER 20109999 NaN \n", "19093 502 US SHERMAN CRATER 20109999 NaN \n", "\n", " POINT_ID POINT_LAT POINT_LON ELEVATION THICKNESS \\\n", "0 1 48.767380 -121.819644 2962.0 0 \n", "1 2 48.764904 -121.821909 2813.0 29 \n", "2 3 48.761662 -121.825264 2598.0 41 \n", "3 4 48.757063 -121.829107 2383.0 71 \n", "4 5 48.753715 -121.832006 2284.0 82 \n", "... ... ... ... ... ... \n", "19089 76 48.768840 -121.816270 2931.0 59 \n", "19090 77 48.768892 -121.816151 2928.0 54 \n", "19091 78 48.768944 -121.816032 2926.0 51 \n", "19092 79 48.768990 -121.815914 2923.0 49 \n", "19093 80 48.769043 -121.815795 2921.0 43 \n", "\n", " THICKNESS_UNCERTAINTY DATA_FLAG REMARKS rgi_id \n", "0 NaN NaN NaN RGI2000-v7.0-C-02-10810 \n", "1 NaN NaN NaN RGI2000-v7.0-C-02-10810 \n", "2 NaN NaN NaN RGI2000-v7.0-C-02-10810 \n", "3 NaN NaN NaN RGI2000-v7.0-C-02-10810 \n", "4 NaN NaN NaN RGI2000-v7.0-C-02-10810 \n", "... ... ... ... ... \n", "19089 5.0 NaN NaN RGI2000-v7.0-C-02-10810 \n", "19090 5.0 NaN NaN RGI2000-v7.0-C-02-10810 \n", "19091 5.0 NaN NaN RGI2000-v7.0-C-02-10810 \n", "19092 5.0 NaN NaN RGI2000-v7.0-C-02-10810 \n", "19093 5.0 NaN NaN RGI2000-v7.0-C-02-10810 \n", "\n", "[2533 rows x 14 columns]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t.loc[t.rgi_id == 'RGI2000-v7.0-C-02-10810']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Add the RGI Region attribute " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100% (3854279 of 3854279) |##############| Elapsed Time: 0:01:44 Time: 0:01:44\n" ] } ], "source": [ "reg = np.ones(len(df), dtype=int) * -1\n", "prev_reg = None\n", "for i, p in progressbar.progressbar(enumerate(df.geometry), max_value=len(df)):\n", " if prev_reg is not None and prev_reg.contains(p):\n", " reg[i] = reg[i-1]\n", " continue\n", " try:\n", " sel = rgi_reg.loc[rgi_reg.contains(p)].iloc[0]\n", " reg[i] = sel.RGI_CODE\n", " prev_reg = sel.geometry\n", " except:\n", " prev_reg = None" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 16, 17, 18, 19]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['RGI_REG'] = reg\n", "sorted(df['RGI_REG'].unique())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Separate the data in RGI Regions" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "dfs = OrderedDict()\n", "for r in sorted(df['RGI_REG'].unique()):\n", " dfs[r] = df.loc[df.RGI_REG == r].copy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Prepare for writing and write to file " ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "for i, (k, d) in enumerate(dfs.items()):\n", " d.drop(['geometry'], axis=1, inplace=True)\n", " d['RGI_REG'] = d['RGI_REG'].astype(str)\n", " d['ELEVATION'] = d['ELEVATION'].astype(str)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GlaThiDa_ID int64\n", "POLITICAL_UNIT object\n", "GLACIER_NAME object\n", "SURVEY_DATE object\n", "PROFILE_ID object\n", "POINT_ID object\n", "POINT_LAT float64\n", "POINT_LON float64\n", "ELEVATION object\n", "THICKNESS int64\n", "THICKNESS_UNCERTAINTY float64\n", "DATA_FLAG float64\n", "REMARKS object\n", "RGI_REG object\n" ] } ], "source": [ "for c in d:\n", " print(c, d[c].dtype)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing 01 87157\n", "Writing 02 3406\n", "Writing 03 868346\n", "Writing 04 309453\n", "Writing 05 557136\n", "Writing 07 966408\n", "Writing 08 10801\n", "Writing 10 7726\n", "Writing 11 478312\n", "Writing 12 2278\n", "Writing 13 15327\n", "Writing 16 1287\n", "Writing 17 8463\n", "Writing 18 619\n", "Writing 19 537560\n" ] } ], "source": [ "outf = os.path.join(gtd_dir, 'TTT_per_reg.h5')\n", "if os.path.exists(outf):\n", " os.remove(outf)\n", "count = 0\n", "for i, (k, d) in enumerate(dfs.items()):\n", " key = '{:02d}'.format(int(k))\n", " print('Writing', key, len(d))\n", " with warnings.catch_warnings():\n", " warnings.simplefilter('ignore', tables.NaturalNameWarning)\n", " d.to_hdf(outf, key, append=True, complevel=5)\n", " count += len(d)\n", "assert count == len(df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }