import logging import os import glob import numpy as np import pandas as pd import xarray as xr from oggm import cfg, utils from oggm.core import massbalance log = logging.getLogger(__name__) _MB_ATTRS = {"name": "Specific mass balance (fixed geometry)", "unit": "mwe"} _AREA_ATTRS = {"name": "Reference glacier area from OGGM model flowlines", "unit": "m2"} def _batch_outdir(glacier_dir, rgi_id): """Return (and create) the batch subfolder for a given RGI ID. Glaciers are grouped by the numeric suffix of their RGI ID in batches of 100: RGI60-01.00001 to RGI60-01.00100 → batch_0000, etc. """ glacier_num = int(rgi_id.split(".")[-1]) batch_num = (glacier_num - 1) // 100 batch_dir = os.path.join(glacier_dir, f"batch_{batch_num:04d}") os.makedirs(batch_dir, exist_ok=True) return batch_dir @utils.entity_task(log) def fixed_geometry_specific_mb_ts(gdir, y0=1975, y1=2025, outdir=None, glacier_dir=None): """Compute glacier-wide monthly specific MB from OGGM fixed geometry. Recalibrates MB parameters from geodetic MB (overwriting the spinup calibration) before computing the time series. Always writes an output file; writes NaN values with task_success=0 on failure. Parameters ---------- gdir : oggm.GlacierDirectory Glacier directory. y0 : int First year included. y1 : int Last year included. outdir : str If provided, writes one netcdf file per glacier directly into this dir. glacier_dir : str If provided, writes into a batch subfolder under this dir (batches of 100 glaciers). Takes precedence over outdir. Returns ------- xarray.Dataset Dataset with monthly specific MB and reference area. """ if glacier_dir is not None: outdir = _batch_outdir(glacier_dir, gdir.rgi_id) dates = pd.date_range(f"{y0:04d}-01-01", f"{y1:04d}-12-01", freq="MS") try: fls = gdir.read_pickle("model_flowlines") mb_model = massbalance.MultipleFlowlineMassBalance(gdir) float_years = np.array([utils.date_to_floatyear(d.year, d.month) for d in dates]) area_m2 = sum(np.sum(fl.bin_area_m2) for fl in fls) ice_to_mwe = cfg.SEC_IN_MONTH * cfg.PARAMS["ice_density"] / 1000.0 mb_mwe = np.array([ sum( np.sum(mb_model.get_monthly_mb(fl.surface_h, year=yr, fl_id=i) * fl.bin_area_m2) for i, fl in enumerate(fls) ) * ice_to_mwe / area_m2 for yr in float_years ]) out = xr.Dataset( data_vars={ "specific_mb_mwe": (("time",), mb_mwe), "reference_area_m2": area_m2, "rgi_area_km2": gdir.rgi_area_km2, "task_success": 1, }, coords={"time": dates}, ) except Exception as e: log.warning(f"({gdir.rgi_id}) failed: {e} — writing NaN fallback") out = xr.Dataset( data_vars={ "specific_mb_mwe": (("time",), np.full(len(dates), np.nan)), "reference_area_m2": gdir.rgi_area_km2 * 1e6, "rgi_area_km2": gdir.rgi_area_km2, "task_success": 0, }, coords={"time": dates}, ) out.attrs["rgi_id"] = gdir.rgi_id out["specific_mb_mwe"].attrs = _MB_ATTRS out["reference_area_m2"].attrs = _AREA_ATTRS if outdir is not None: out.to_netcdf(os.path.join(outdir, f"{gdir.rgi_id}.nc")) return out def compile_fixed_geometry_specific_mb_region(y0=1975, y1=2025, outdir=None, glacier_dir=None, filesuffix=""): """Aggregate glacier MB from per-glacier netcdf files into a regional series. RGI catalogue area is used consistently for both MB weighting and completion rate so numerator and denominator are on the same basis. Coverage is treated as binary per glacier (fixed geometry: a glacier either has a full series or all-NaN). Parameters ---------- outdir : str Directory where the regional netcdf is written. glacier_dir : str Directory containing per-glacier netcdf files (may be organized into batch subfolders). If omitted, files are searched directly in outdir. """ if outdir is None: raise ValueError("outdir is required") dates = pd.date_range(f"{y0:04d}-01-01", f"{y1:04d}-12-01", freq="MS") if glacier_dir is not None: glacier_files = sorted(glob.glob(os.path.join(glacier_dir, "**", "RGI*.nc"), recursive=True)) else: glacier_files = sorted(glob.glob(os.path.join(outdir, "RGI*.nc"))) if not glacier_files: search_root = glacier_dir or outdir raise RuntimeError(f"No glacier files found under {search_root}") n_time = len(dates) weighted_sum = np.zeros(n_time) total_area_m2 = 0.0 covered_area_m2 = 0.0 n_success = 0 for fp in glacier_files: with xr.open_dataset(fp) as ds: mb = ds["specific_mb_mwe"].values rgi_area_m2 = float(ds["rgi_area_km2"].values) * 1e6 total_area_m2 += rgi_area_m2 if np.all(np.isfinite(mb)): weighted_sum += mb * rgi_area_m2 covered_area_m2 += rgi_area_m2 n_success += 1 region_mb = weighted_sum / covered_area_m2 if covered_area_m2 > 0 else np.full(n_time, np.nan) completion_rate_pct = covered_area_m2 / total_area_m2 * 100.0 if total_area_m2 > 0 else np.nan # Region identifier from first RGI ID (e.g. "RGI60-01.00001" → "01") with xr.open_dataset(glacier_files[0]) as ds: first_rid = ds.attrs.get("rgi_id", "") region_id = first_rid.split("-")[1].split(".")[0] if "-" in first_rid else "" reg = xr.Dataset( data_vars={ "region_specific_mb_mwe": (("time",), region_mb), "total_region_area_m2": total_area_m2, "covered_area_m2": covered_area_m2, "completion_rate_pct": completion_rate_pct, "n_glaciers_success": n_success, "n_glaciers_total": len(glacier_files), }, coords={"time": dates}, ) reg["region_specific_mb_mwe"].attrs = { "name": "Regional area-weighted specific mass balance", "unit": "mwe", } reg["total_region_area_m2"].attrs = {"unit": "m2"} reg["covered_area_m2"].attrs = {"unit": "m2"} reg["completion_rate_pct"].attrs = {"unit": "%"} reg.attrs["region_id"] = region_id reg.to_netcdf(os.path.join(outdir, f"region_specific_mb_RGI{region_id}{filesuffix}.nc")) return reg