{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## New attempt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In the end, we have an HDF file of GlaThiDa per RGI sub-region." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "import shapely.geometry as shpg\n", "import numpy as np\n", "import os\n", "import progressbar\n", "import time\n", "from oggm import utils, cfg" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "gtd_dir = './glathida-v3.1.0/data'\n", "# gtd_dir = './GlaThiDa_2016'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read the files" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(os.path.join(gtd_dir, 'TTT.csv'), dtype={'SURVEY_DATE': 'str'}, low_memory=False)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GlaThiDa_ID int64\n", "POLITICAL_UNIT object\n", "GLACIER_NAME object\n", "SURVEY_DATE object\n", "PROFILE_ID object\n", "POINT_ID object\n", "POINT_LAT float64\n", "POINT_LON float64\n", "ELEVATION float64\n", "THICKNESS int64\n", "THICKNESS_UNCERTAINTY float64\n", "DATA_FLAG float64\n", "REMARKS object\n" ] } ], "source": [ "for c in df:\n", " print(c, df[c].dtype)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3854279" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.04538540152386478, 174928)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df.loc[df.THICKNESS == 0]) / len(df), len(df.loc[df.THICKNESS == 0])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Replace with 62 when available\n", "rgi_reg = gpd.read_file(os.path.join(utils.get_rgi_dir('62'), '00_rgi62_regions/00_rgi62_O1Regions.shp'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Convert lon, lat to Point geometries " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100% (3854279 of 3854279) |##############| Elapsed Time: 0:00:25 Time: 0:00:25\n" ] } ], "source": [ "geoms = []\n", "for lon, lat in progressbar.progressbar(zip(df.POINT_LON, df.POINT_LAT), max_value=len(df)):\n", " geoms.append(shpg.Point(lon, lat))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df['geometry'] = geoms" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "df = gpd.GeoDataFrame(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read RGI" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "rdf = []\n", "for reg in range(1, 20):\n", " rdf.append(gpd.read_file(utils.get_rgi_region_file(f'{reg:02d}', version='70G')))\n", "rdf = pd.concat(rdf)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "df.crs = rdf.crs" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3854279, 274531)" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df), len(rdf)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "joined = gpd.sjoin(df, rdf, how='left', predicate='within')" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3854279, 265675)" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(joined), len(joined.loc[joined.rgi_id.isnull()])" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "no_join = joined.loc[joined.rgi_id.isnull()]\n", "ok_join = joined.loc[~joined.rgi_id.isnull()]" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9310701171347482" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ok_join) / len(df)" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "1 | \n", "
---|---|---|
GlaThiDa_ID | \n", "33 | \n", "33 | \n", "
POLITICAL_UNIT | \n", "US | \n", "US | \n", "
GLACIER_NAME | \n", "EASTON | \n", "EASTON | \n", "
SURVEY_DATE | \n", "19929999 | \n", "19929999 | \n", "
PROFILE_ID | \n", "NaN | \n", "NaN | \n", "
POINT_ID | \n", "1 | \n", "2 | \n", "
POINT_LAT | \n", "48.76738 | \n", "48.764904 | \n", "
POINT_LON | \n", "-121.819644 | \n", "-121.821909 | \n", "
ELEVATION | \n", "2962.0 | \n", "2813.0 | \n", "
THICKNESS | \n", "0 | \n", "29 | \n", "
THICKNESS_UNCERTAINTY | \n", "NaN | \n", "NaN | \n", "
DATA_FLAG | \n", "NaN | \n", "NaN | \n", "
REMARKS | \n", "NaN | \n", "NaN | \n", "
geometry | \n", "POINT (-121.819644 48.7673801) | \n", "POINT (-121.8219093 48.7649042) | \n", "
index_right | \n", "15002.0 | \n", "15001.0 | \n", "
rgi_id | \n", "RGI2000-v7.0-G-02-15003 | \n", "RGI2000-v7.0-G-02-15002 | \n", "
o1region | \n", "02 | \n", "02 | \n", "
o2region | \n", "02-04 | \n", "02-04 | \n", "
glims_id | \n", "G238158E48759N | \n", "G238169E48750N | \n", "
anlys_id | \n", "761915.0 | \n", "761926.0 | \n", "
subm_id | \n", "744.0 | \n", "744.0 | \n", "
src_date | \n", "1975-09-09T00:00:00 | \n", "1974-09-09T00:00:00 | \n", "
cenlon | \n", "-121.844147 | \n", "-121.830812 | \n", "
cenlat | \n", "48.758784 | \n", "48.749909 | \n", "
utm_zone | \n", "10.0 | \n", "10.0 | \n", "
area_km2 | \n", "4.995923 | \n", "2.887898 | \n", "
primeclass | \n", "0.0 | \n", "0.0 | \n", "
conn_lvl | \n", "0.0 | \n", "0.0 | \n", "
surge_type | \n", "0.0 | \n", "0.0 | \n", "
term_type | \n", "9.0 | \n", "9.0 | \n", "
glac_name | \n", "Deming Glacier | \n", "Easton Glacier | \n", "
is_rgi6 | \n", "1.0 | \n", "1.0 | \n", "
termlon | \n", "-121.868505 | \n", "-121.837315 | \n", "
termlat | \n", "48.741924 | \n", "48.7325 | \n", "
zmin_m | \n", "1179.0857 | \n", "1578.7251 | \n", "
zmax_m | \n", "3273.7449 | \n", "2971.7751 | \n", "
zmed_m | \n", "2251.0093 | \n", "2154.4375 | \n", "
zmean_m | \n", "2254.844 | \n", "2160.2273 | \n", "
slope_deg | \n", "23.269547 | \n", "18.759949 | \n", "
aspect_deg | \n", "217.05867 | \n", "201.118759 | \n", "
aspect_sec | \n", "6.0 | \n", "5.0 | \n", "
dem_source | \n", "COPDEM30 | \n", "COPDEM30 | \n", "
lmax_m | \n", "5346.0 | \n", "4251.0 | \n", "
\n", " | GlaThiDa_ID | \n", "POLITICAL_UNIT | \n", "GLACIER_NAME | \n", "SURVEY_DATE | \n", "PROFILE_ID | \n", "POINT_ID | \n", "POINT_LAT | \n", "POINT_LON | \n", "ELEVATION | \n", "THICKNESS | \n", "THICKNESS_UNCERTAINTY | \n", "DATA_FLAG | \n", "REMARKS | \n", "rgi_id | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "33 | \n", "US | \n", "EASTON | \n", "19929999 | \n", "NaN | \n", "1 | \n", "48.767380 | \n", "-121.819644 | \n", "2962.0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "RGI2000-v7.0-C-02-10810 | \n", "
1 | \n", "33 | \n", "US | \n", "EASTON | \n", "19929999 | \n", "NaN | \n", "2 | \n", "48.764904 | \n", "-121.821909 | \n", "2813.0 | \n", "29 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "RGI2000-v7.0-C-02-10810 | \n", "
2 | \n", "33 | \n", "US | \n", "EASTON | \n", "19929999 | \n", "NaN | \n", "3 | \n", "48.761662 | \n", "-121.825264 | \n", "2598.0 | \n", "41 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "RGI2000-v7.0-C-02-10810 | \n", "
3 | \n", "33 | \n", "US | \n", "EASTON | \n", "19929999 | \n", "NaN | \n", "4 | \n", "48.757063 | \n", "-121.829107 | \n", "2383.0 | \n", "71 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "RGI2000-v7.0-C-02-10810 | \n", "
4 | \n", "33 | \n", "US | \n", "EASTON | \n", "19929999 | \n", "NaN | \n", "5 | \n", "48.753715 | \n", "-121.832006 | \n", "2284.0 | \n", "82 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "RGI2000-v7.0-C-02-10810 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
19089 | \n", "502 | \n", "US | \n", "SHERMAN CRATER | \n", "20109999 | \n", "NaN | \n", "76 | \n", "48.768840 | \n", "-121.816270 | \n", "2931.0 | \n", "59 | \n", "5.0 | \n", "NaN | \n", "NaN | \n", "RGI2000-v7.0-C-02-10810 | \n", "
19090 | \n", "502 | \n", "US | \n", "SHERMAN CRATER | \n", "20109999 | \n", "NaN | \n", "77 | \n", "48.768892 | \n", "-121.816151 | \n", "2928.0 | \n", "54 | \n", "5.0 | \n", "NaN | \n", "NaN | \n", "RGI2000-v7.0-C-02-10810 | \n", "
19091 | \n", "502 | \n", "US | \n", "SHERMAN CRATER | \n", "20109999 | \n", "NaN | \n", "78 | \n", "48.768944 | \n", "-121.816032 | \n", "2926.0 | \n", "51 | \n", "5.0 | \n", "NaN | \n", "NaN | \n", "RGI2000-v7.0-C-02-10810 | \n", "
19092 | \n", "502 | \n", "US | \n", "SHERMAN CRATER | \n", "20109999 | \n", "NaN | \n", "79 | \n", "48.768990 | \n", "-121.815914 | \n", "2923.0 | \n", "49 | \n", "5.0 | \n", "NaN | \n", "NaN | \n", "RGI2000-v7.0-C-02-10810 | \n", "
19093 | \n", "502 | \n", "US | \n", "SHERMAN CRATER | \n", "20109999 | \n", "NaN | \n", "80 | \n", "48.769043 | \n", "-121.815795 | \n", "2921.0 | \n", "43 | \n", "5.0 | \n", "NaN | \n", "NaN | \n", "RGI2000-v7.0-C-02-10810 | \n", "
2533 rows × 14 columns
\n", "