From fa6e757b4985cc26395548839876825bb936e55a Mon Sep 17 00:00:00 2001 From: Matthias Dusch Date: Fri, 29 Apr 2022 15:43:35 +0200 Subject: [PATCH] add postprocessing functions --- rgitools/statistics/dem_post_quality.py | 187 +++++++++++++++++ .../statistics/dem_post_quality_per_region.py | 134 ++++++++++++ rgitools/statistics/dem_post_roughness.py | 188 +++++++++++++++++ rgitools/statistics/my_dem_funcs.py | 198 ++++++++++++++++++ rgitools/statistics/post_all_dems.py | 177 ++++++++++++++++ 5 files changed, 884 insertions(+) create mode 100644 rgitools/statistics/dem_post_quality.py create mode 100644 rgitools/statistics/dem_post_quality_per_region.py create mode 100644 rgitools/statistics/dem_post_roughness.py create mode 100644 rgitools/statistics/my_dem_funcs.py create mode 100644 rgitools/statistics/post_all_dems.py diff --git a/rgitools/statistics/dem_post_quality.py b/rgitools/statistics/dem_post_quality.py new file mode 100644 index 0000000..7714a5a --- /dev/null +++ b/rgitools/statistics/dem_post_quality.py @@ -0,0 +1,187 @@ +import os +import pandas as pd +import geopandas as gpd +import numpy as np +import matplotlib.pyplot as plt + +from oggm import utils, cfg + +from my_dem_funcs import dem_barplot + + +wd = '/home/matthias/rgi/wd' +cfg.initialize() +cfg.PATHS['working_dir'] = wd + +# gdirs storage path +path = '/home/matthias/rgi/dems_v1/default/RGI62/b_010/L1' +sfx = '_v1' + +# dataframe for all areas +dfall = pd.DataFrame() + +# dataframe for statistic +cols = utils.DEM_SOURCES.copy() +cols.sort() +cols = ['RGI region', '# total'] + cols +dfstat = pd.DataFrame([], columns=cols) + +# statistic on subregions +dfsub = dfstat.copy() + +# rgi region file +regions = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60', + '00_rgi60_regions', + '00_rgi60_O1Regions.shp')) +subregs = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60', + '00_rgi60_regions', + '00_rgi60_O2Regions.shp')) + +fig, axs = plt.subplots(5, 4, figsize=[20, 22]) + +for reg in np.arange(1, 20): + regstr = '{:02.0f}'.format(reg) + + quality = pd.read_hdf(os.path.join(wd, 'rgi_{}.h5'.format(regstr + sfx)), + 'quality') + + regname = regions.loc[regions['RGI_CODE'] == reg, 'FULL_NAME'].iloc[0] + + dem_barplot(quality, fig.get_axes()[reg], + title='RGI region {}: {} ({:.0f} glaciers)'. + format(regstr, regname, len(quality))) + + dfall = dfall.append(quality) + + # FULL REGION + total = len(quality) + good = (quality > 0.9).sum() + + # out = good / total + out = (good / total * 100).dropna().astype(int) + outstr = out.astype(str) + outstr.loc[out != 0] += '%' + outstr.loc[out == 0] = '--' + outstr['# total'] = total + + dfstat.loc['{}: {}'.format(regstr, regname)] = outstr + + # take care of subregions + regdf = gpd.read_file(utils.get_rgi_region_file(regstr)) + sregs = np.unique(regdf.O2Region) + + for sreg in sregs: + ids = regdf.loc[regdf.O2Region == sreg, 'RGIId'].values + subq = quality.loc[ids] + + # SUBREGIONS + total = len(subq) + good = (subq > 0.9).sum() + out = (good / total * 100).dropna().astype(int) + outstr = out.astype(str) + outstr.loc[out != 0] += '%' + outstr.loc[out == 0] = '--' + outstr['# total'] = total + + subregstr = '-{:02.0f}'.format(int(sreg)) + subregname = subregs.loc[subregs.RGI_CODE == regstr + subregstr].\ + FULL_NAME.iloc[0] + + dfsub.loc['{}: {}'.format(regstr + subregstr, subregname)] = outstr + +# FULL RGI +total = len(dfall) +good = (dfall > 0.9).sum() +out = (good / total * 100).dropna().astype(int) +outstr = out.astype(str) +outstr.loc[out != 0] += '%' +outstr.loc[out == 0] = '--' +outstr['# total'] = total + +dfstat.loc['All RGI regions'] = outstr + +dfsub.sort_index(inplace=True) + +# integer for number of glaciers +dfstat['# total'] = dfstat['# total'].astype(int) +dfstat['RGI region'] = dfstat.index +dfsub['# total'] = dfsub['# total'].astype(int) +dfsub['RGI region'] = dfsub.index + +# save one with and one without specific DEMs +spc = ['REMA', 'RAMP', 'ARCTICDEM', 'GIMP', 'ALASKA'] + +# general table +tbl1 = dfstat.loc[:, ~dfstat.columns.isin(spc)].to_latex( + na_rep='--', index=False, longtable=True, + column_format='l' + 7*'r') +# add title +ti1 = ('\n\\caption{Summary of all RGI regions. First column shows total ' + 'number of ' + 'glaciers per RGI region. The consecutive columns specify the ' + 'availability of particular DEMs for a RGI region in percent of the ' + 'total glaciers per region. Values are not rounded but truncated so ' + '99\\% ' + 'could be just one missing glacier. Only DEMs with less than 10\\% ' + 'missing values are considered.}\\\\\n' + '\\label{tbl_general}\\\\\n') +tbl1 = tbl1.replace('\n', ti1, 1) +with open('/home/matthias/rgi/report/dem_rgi_general{}.tex'.format(sfx), 'w') as tf: + tf.write(tbl1) + +# specific table +tbl2 = dfstat.iloc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18]].\ + loc[:, dfstat.columns.isin(['RGI region', '# total'] + spc)].to_latex( + na_rep='--', index=False, longtable=True, + column_format='l' + 5*'r') +# add title +ti2 = ('\n\\caption{Same as Table \\ref{tbl_general} but for specific ' + 'regional DEMs only.}\\\\\n' + '\\label{tbl_specific}\\\\\n') +tbl2 = tbl2.replace('\n', ti2, 1) + +with open('/home/matthias/rgi/report/dem_rgi_spec{}.tex'.format(sfx), 'w') as tf: + tf.write(tbl2) + +# +# subregions +# general table +tbl3 = dfsub.loc[:, ~dfsub.columns.isin(spc)].to_latex( + na_rep='--', index=False, longtable=True, + column_format='l' + 7*'r') +# add title +ti3 = ('\n\\caption{Same as Table \\ref{tbl_general} but splitted into RGI ' + 'subregions.}\\\\\n' + '\\label{tbl_general_sub}\\\\\n') +tbl3 = tbl3.replace('\n', ti3, 1) +with open('/home/matthias/rgi/report/dem_subrgi_general.tex'.format(sfx), 'w') as tf: + tf.write(tbl3) + +# specific table +tbl4 = dfsub.iloc[np.append(np.arange(0, 43), np.arange(69, 87))].\ + loc[:, dfsub.columns.isin(['RGI region', '# total'] + spc)].to_latex( + na_rep='--', index=False, longtable=True, + column_format='l' + 5*'r') +# add title +ti4 = ('\n\\caption{Same as Table \\ref{tbl_specific} but splitted into ' + 'RGI subregions.}\\\\\n' + '\\label{tbl_specific_sub}\\\\\n') +tbl4 = tbl4.replace('\n', ti4, 1) + +with open('/home/matthias/rgi/report/dem_subrgi_spec.tex'.format(sfx), 'w') as tf: + tf.write(tbl4) + +# write csv files for RST readthedocs +dfstat.to_csv('/home/matthias/rgi/rgitools/docs/_static/tables/dem_rgi.csv'.format(sfx), + index=False) +dfsub.to_csv('/home/matthias/rgi/rgitools/docs/_static/tables/dem_subrgi.csv'.format(sfx), + index=False) + +# make and save plots +dem_barplot(dfall, axs[0, 0], + title='All RGI regions ({:.0f} glaciers)'.format(len(dfall))) + +fig.tight_layout() +fig.savefig('/home/matthias/rgi/rgitools/docs/_static/images/' + + 'dem_all_regions.png'.format(sfx)) +fig.savefig('/home/matthias/rgi/report/dem_all_regions.pdf'.format(sfx)) diff --git a/rgitools/statistics/dem_post_quality_per_region.py b/rgitools/statistics/dem_post_quality_per_region.py new file mode 100644 index 0000000..2461cdc --- /dev/null +++ b/rgitools/statistics/dem_post_quality_per_region.py @@ -0,0 +1,134 @@ +import os +import pandas as pd +import geopandas as gpd +import numpy as np +import matplotlib.pyplot as plt + +from oggm import utils, cfg + +from my_dem_funcs import dem_barplot + + +wd = '/home/matthias/rgi/wd' +cfg.initialize() +cfg.PATHS['working_dir'] = wd + +# gdirs storage path +path = '/home/matthias/rgi/dems_v1/default/RGI62/b_010/L1' +sfx = '_v1' + +# dataframe for all areas +dfall = pd.DataFrame() + +# dataframe for statistic +cols = utils.DEM_SOURCES.copy() +cols.sort() +cols = ['RGI region', '# total'] + cols +dfstat = pd.DataFrame([], columns=cols) + +# statistic on subregions +dfsub = dfstat.copy() + +# rgi region file +regions = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60', + '00_rgi60_regions', + '00_rgi60_O1Regions.shp')) +subregs = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60', + '00_rgi60_regions', + '00_rgi60_O2Regions.shp')) + +fig0, ax0 = plt.subplots(1, 1, figsize=[10, 10]) + +for reg in np.arange(1, 20): + fig, ax = plt.subplots(1, 1, figsize=[10, 10]) + regstr = '{:02.0f}'.format(reg) + + quality = pd.read_hdf(os.path.join(wd, 'rgi_{}.h5'.format(regstr + sfx)), + 'quality') + + regname = regions.loc[regions['RGI_CODE'] == reg, 'FULL_NAME'].iloc[0] + + dem_barplot(quality, ax, + title='RGI region {}: {} ({:.0f} glaciers)'. + format(regstr, regname, len(quality))) + fig.tight_layout() + fig.savefig('/home/matthias/rgi/out/images/' + + 'barplot_rgi{}.png'.format(regstr + sfx)) + + dfall = dfall.append(quality) + + # FULL REGION + total = len(quality) + good = (quality > 0.9).sum() + + # out = good / total + out = (good / total * 100).dropna().astype(int) + outstr = out.astype(str) + outstr.loc[out != 0] += '%' + outstr.loc[out == 0] = '--' + outstr['# total'] = total + + dfstat.loc[':ref:`{0}: {1}`'.format(regstr, regname)] = outstr + + # take care of subregions + regdf = gpd.read_file(utils.get_rgi_region_file(regstr)) + sregs = np.unique(regdf.O2Region) + + for sreg in sregs: + ids = regdf.loc[regdf.O2Region == sreg, 'RGIId'].values + subq = quality.loc[ids] + + # SUBREGIONS + total = len(subq) + good = (subq > 0.9).sum() + out = (good / total * 100).dropna().astype(int) + outstr = out.astype(str) + outstr.loc[out != 0] += '%' + outstr.loc[out == 0] = '--' + outstr['# total'] = total + + subregstr = '-{:02.0f}'.format(int(sreg)) + subregname = subregs.loc[subregs.RGI_CODE == regstr + subregstr].\ + FULL_NAME.iloc[0] + + dfsub.loc['{}: {}'.format(regstr + subregstr, subregname)] = outstr + +# FULL RGI +total = len(dfall) +good = (dfall > 0.9).sum() +out = (good / total * 100).dropna().astype(int) +outstr = out.astype(str) +outstr.loc[out != 0] += '%' +outstr.loc[out == 0] = '--' +outstr['# total'] = total + +dfstat.loc['All RGI regions'] = outstr + +dfsub.sort_index(inplace=True) + +# integer for number of glaciers +dfstat['# total'] = dfstat['# total'].astype(int) +dfstat['RGI region'] = dfstat.index +dfsub['# total'] = dfsub['# total'].astype(int) +dfsub['RGI region'] = dfsub.index + + +# write csv files for RST readthedocs +dfstat.to_csv('/home/matthias/rgi/out/tables/dem_allrgi{}.csv'.format(sfx), + index=False) + +# write subregion tables: +for reg in np.arange(1, 20): + regstr = '{:02.0f}'.format(reg) + dfsub + sub = dfsub.loc[dfsub.index.str.contains('{}-'.format(regstr))] + sub.to_csv('/home/matthias/rgi/out/tables/dem_rgi{}.csv'.format(regstr + sfx), + index=False) + +# make and save plots +dem_barplot(dfall, ax0, + title='All RGI regions ({:.0f} glaciers)'.format(len(dfall))) + +fig0.tight_layout() +fig0.savefig('/home/matthias/rgi/out/images/' + + 'barplot_allregions{}.png'.format(sfx)) diff --git a/rgitools/statistics/dem_post_roughness.py b/rgitools/statistics/dem_post_roughness.py new file mode 100644 index 0000000..579bc39 --- /dev/null +++ b/rgitools/statistics/dem_post_roughness.py @@ -0,0 +1,188 @@ +import os +import pandas as pd +import geopandas as gpd +import numpy as np +import matplotlib.pyplot as plt + +from oggm import utils, cfg + +from my_dem_funcs import dem_barplot + + +wd = '/home/matthias/rgi/wd' +cfg.initialize() +cfg.PATHS['working_dir'] = wd + +# gdirs storage path +path = '/home/matthias/rgi/dems_v1/default/RGI62/b_010/L1' +brauch ich das? +sfx = '_v1' + +# dataframe for all areas +dfall = pd.DataFrame() + +# dataframe for statistic +cols = utils.DEM_SOURCES.copy() +cols.sort() +cols = ['RGI region', '# total'] + cols +dfstat = pd.DataFrame([], columns=cols) + +# statistic on subregions +dfsub = dfstat.copy() + +# rgi region file +regions = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60', + '00_rgi60_regions', + '00_rgi60_O1Regions.shp')) +subregs = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60', + '00_rgi60_regions', + '00_rgi60_O2Regions.shp')) + +fig, axs = plt.subplots(5, 4, figsize=[20, 22]) + +for reg in np.arange(1, 20): + regstr = '{:02.0f}'.format(reg) + + quality = pd.read_hdf(os.path.join(wd, 'rgi_{}.h5'.format(regstr)), + 'quality') + + regname = regions.loc[regions['RGI_CODE'] == reg, 'FULL_NAME'].iloc[0] + + dem_barplot(quality, fig.get_axes()[reg], + title='RGI region {}: {} ({:.0f} glaciers)'. + format(regstr, regname, len(quality))) + + dfall = dfall.append(quality) + + # FULL REGION + total = len(quality) + good = (quality > 0.9).sum() + + # out = good / total + out = (good / total * 100).dropna().astype(int) + outstr = out.astype(str) + outstr.loc[out != 0] += '%' + outstr.loc[out == 0] = '--' + outstr['# total'] = total + + dfstat.loc['{}: {}'.format(regstr, regname)] = outstr + + # take care of subregions + regdf = gpd.read_file(utils.get_rgi_region_file(regstr)) + sregs = np.unique(regdf.O2Region) + + for sreg in sregs: + ids = regdf.loc[regdf.O2Region == sreg, 'RGIId'].values + subq = quality.loc[ids] + + # SUBREGIONS + total = len(subq) + good = (subq > 0.9).sum() + out = (good / total * 100).dropna().astype(int) + outstr = out.astype(str) + outstr.loc[out != 0] += '%' + outstr.loc[out == 0] = '--' + outstr['# total'] = total + + subregstr = '-{:02.0f}'.format(int(sreg)) + subregname = subregs.loc[subregs.RGI_CODE == regstr + subregstr].\ + FULL_NAME.iloc[0] + + dfsub.loc['{}: {}'.format(regstr + subregstr, subregname)] = outstr + +# FULL RGI +total = len(dfall) +good = (dfall > 0.9).sum() +out = (good / total * 100).dropna().astype(int) +outstr = out.astype(str) +outstr.loc[out != 0] += '%' +outstr.loc[out == 0] = '--' +outstr['# total'] = total + +dfstat.loc['All RGI regions'] = outstr + +dfsub.sort_index(inplace=True) + +# integer for number of glaciers +dfstat['# total'] = dfstat['# total'].astype(int) +dfstat['RGI region'] = dfstat.index +dfsub['# total'] = dfsub['# total'].astype(int) +dfsub['RGI region'] = dfsub.index + +# save one with and one without specific DEMs +spc = ['REMA', 'RAMP', 'ARCTICDEM', 'GIMP'] + +# general table +tbl1 = dfstat.loc[:, ~dfstat.columns.isin(spc)].to_latex( + na_rep='--', index=False, longtable=True, + column_format='l' + 7*'r') +# add title +ti1 = ('\n\\caption{Summary of all RGI regions. First column shows total ' + 'number of ' + 'glaciers per RGI region. The consecutive columns specify the ' + 'availability of particular DEMs for a RGI region in percent of the ' + 'total glaciers per region. Values are not rounded but truncated so ' + '99\\% ' + 'could be just one missing glacier. Only DEMs with less than 10\\% ' + 'missing values are considered.}\\\\\n' + '\\label{tbl_general}\\\\\n') +tbl1 = tbl1.replace('\n', ti1, 1) +with open('/home/matthias/rgi/report/dem_rgi_general.tex', 'w') as tf: + tf.write(tbl1) + +# specific table +tbl2 = dfstat.iloc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18]].\ + loc[:, dfstat.columns.isin(['RGI region', '# total'] + spc)].to_latex( + na_rep='--', index=False, longtable=True, + column_format='l' + 5*'r') +# add title +ti2 = ('\n\\caption{Same as Table \\ref{tbl_general} but for specific ' + 'regional DEMs only.}\\\\\n' + '\\label{tbl_specific}\\\\\n') +tbl2 = tbl2.replace('\n', ti2, 1) + +with open('/home/matthias/rgi/report/dem_rgi_spec.tex', 'w') as tf: + tf.write(tbl2) + +# +# subregions +# general table +tbl3 = dfsub.loc[:, ~dfsub.columns.isin(spc)].to_latex( + na_rep='--', index=False, longtable=True, + column_format='l' + 7*'r') +# add title +ti3 = ('\n\\caption{Same as Table \\ref{tbl_general} but splitted into RGI ' + 'subregions.}\\\\\n' + '\\label{tbl_general_sub}\\\\\n') +tbl3 = tbl3.replace('\n', ti3, 1) +with open('/home/matthias/rgi/report/dem_subrgi_general.tex', 'w') as tf: + tf.write(tbl3) + +# specific table +tbl4 = dfsub.iloc[np.append(np.arange(0, 43), np.arange(69, 87))].\ + loc[:, dfsub.columns.isin(['RGI region', '# total'] + spc)].to_latex( + na_rep='--', index=False, longtable=True, + column_format='l' + 5*'r') +# add title +ti4 = ('\n\\caption{Same as Table \\ref{tbl_specific} but splitted into ' + 'RGI subregions.}\\\\\n' + '\\label{tbl_specific_sub}\\\\\n') +tbl4 = tbl4.replace('\n', ti4, 1) + +with open('/home/matthias/rgi/report/dem_subrgi_spec.tex', 'w') as tf: + tf.write(tbl4) + +# write csv files for RST readthedocs +dfstat.to_csv('/home/matthias/rgi/rgitools/docs/_static/tables/dem_rgi.csv', + index=False) +dfsub.to_csv('/home/matthias/rgi/rgitools/docs/_static/tables/dem_subrgi.csv', + index=False) + +# make and save plots +dem_barplot(dfall, axs[0, 0], + title='All RGI regions ({:.0f} glaciers)'.format(len(dfall))) + +fig.tight_layout() +fig.savefig('/home/matthias/rgi/rgitools/docs/_static/images/' + + 'dem_all_regions.png') +fig.savefig('/home/matthias/rgi/report/dem_all_regions.pdf') diff --git a/rgitools/statistics/my_dem_funcs.py b/rgitools/statistics/my_dem_funcs.py new file mode 100644 index 0000000..20a8bba --- /dev/null +++ b/rgitools/statistics/my_dem_funcs.py @@ -0,0 +1,198 @@ +import os +import tarfile +import logging + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import rasterio + +from oggm import utils, GlacierDirectory, entity_task + +# Module logger +log = logging.getLogger(__name__) + + +def dem_quality(gdir, demfile): + """Quality check based on oggm.simple_glacier_masks. + Parameters + ---------- + gdir : :py:class:`oggm.GlacierDirectory` + the glacier in question + demfile : str + path to a specific DEM tif-file + Returns + ------- + nanpercent : float + how many grid points are NaN as a fraction of all grid points + nanpercent_glc : float + how many grid points are NaN as a fraction of all glaciated grid points + meanhgt : float + mean elevation of grid points + meanhgt_glc : float + mean elevation of glaciated grid points + roughness : float + standard deviation of 2d slope of all grid points + roughness_glc : float + standard deviation of 2d slope of all glaciated grid points + """ + + # open tif-file: + with rasterio.open(demfile, 'r', driver='GTiff') as ds: + dem = ds.read(1).astype(rasterio.float32) + nx = ds.width + ny = ds.height + dx = ds.transform[0] + # assert some basics + assert nx == gdir.grid.nx + assert ny == gdir.grid.ny + assert dx == gdir.grid.dx + + # open glacier mask + with rasterio.open(gdir.get_filepath('glacier_mask'), + 'r', driver='GTiff') as ds: + mask = ds.read(1).astype(rasterio.int16) + + # set nodata values to NaN + min_z = -999. + dem[dem <= min_z] = np.NaN + isfinite = np.isfinite(dem) + isfinite_glc = np.isfinite(dem[np.where(mask)]) + + # calculate fraction of NaNs in all and glaciated area + nanpercent = np.sum(isfinite) / (nx * ny) + nanpercent_glc = np.sum(isfinite_glc) / mask.sum() + + # calculate mean elevation of all and glaciated area + meanhgt = np.nanmean(dem) + meanhgt_glc = np.nanmean(dem[np.where(mask)]) + + # calculate roughness of all area + sy, sx = np.gradient(dem, dx) + slope = np.arctan(np.sqrt(sy**2 + sx**2)) + roughness = np.nanstd(slope) + + # calculate roughness of glaciated area + dem_glc = np.where(mask, dem, np.nan) + sy, sx = np.gradient(dem_glc, dx) + slope = np.arctan(np.sqrt(sy**2 + sx**2)) + roughness_glc = np.nanstd(slope) + + return (nanpercent, nanpercent_glc, meanhgt, meanhgt_glc, roughness, + roughness_glc) + + +@entity_task(log) +def get_dem_area(gdir): + """Read the glacier_mask.tif and calculated glacier area based on this + Parameters + ---------- + gdir : GlacierDirectory + the glacier in question + Returns + ------- + float + glacier area in km2 + """ + + # read dem mask + with rasterio.open(gdir.get_filepath('glacier_mask'), + 'r', driver='GTiff') as ds: + profile = ds.profile + data = ds.read(1).astype(profile['dtype']) + + # calculate dem_mask size and test against RGI area + mask_area_km2 = data.sum() * gdir.grid.dx**2 * 1e-6 + + return mask_area_km2 + + +def gdirs_from_tar_files(path, rgi_region=None): + + gdirs = [] + for regdir in os.listdir(path): + + # only do required rgi_region + if (rgi_region is not None) and (regdir[-2:] != rgi_region): + continue + + rdpath = os.path.join(path, regdir) + + for file in os.listdir(rdpath): + + with tarfile.open(os.path.join(rdpath, file), 'r') as tfile: + for member in tfile: + if member.isdir(): + continue + tar_base = os.path.join(rdpath, member.path) + gdirs.append(GlacierDirectory(member.name[-21:-7], + from_tar=tar_base)) + + return gdirs + + +@entity_task(log) +def check_all_dems_per_gdir(gdir): + """Will go through all available DEMs and create some metrics + + DEMs musst be in GDir subfolders + + :param gdir: + :return: + """ + # dataframe for results + df = pd.DataFrame([], index=[gdir.rgi_id]*6, # np.arange(3), + columns=['metric'] + utils.DEM_SOURCES) + df.iloc[0]['metric'] = 'quality' + df.iloc[1]['metric'] = 'quality_glc' + df.iloc[2]['metric'] = 'meanhgt' + df.iloc[3]['metric'] = 'meanhgt_glc' + df.iloc[4]['metric'] = 'roughness' + df.iloc[5]['metric'] = 'roughness_glc' + + logfile = (os.path.join(gdir.dir, 'log.txt')) + + # read logfile, specify names cause log entries have different size + lfdf = pd.read_csv(logfile, delimiter=';', header=None, + skipinitialspace=True, names=[0, 1, 2, 3]) + + # loop over dems and save existing ones to test + dem2test = [] + for _, line in lfdf.iterrows(): + if ('DEM SOURCE' in line[1]) and ('SUCCESS' in line[2]): + rgi = line[1].split(',')[0] + dem = line[1].split(',')[2] + dem2test.append(dem) + + # loop over DEMs + for dem in dem2test: + demfile = os.path.join(gdir.dir, dem) + '/dem.tif' + qual, qualglc, hgt, hgt_glc, rgh, rgh_glc = dem_quality(gdir, demfile) + df.loc[df.metric == 'quality', dem] = qual + df.loc[df.metric == 'quality_glc', dem] = qualglc + df.loc[df.metric == 'meanhgt', dem] = hgt + df.loc[df.metric == 'meanhgt_glc', dem] = hgt_glc + df.loc[df.metric == 'roughness', dem] = rgh + df.loc[df.metric == 'roughness_glc', dem] = rgh_glc + + return df + + +def dem_barplot(df, ax, title=''): + + # dfexist = (df > 0).sum().sort_index() + dfgood = (df > 0.9).sum().sort_index() + + # ax.bar(dfexist.index, dfexist.values, width=-0.4, align='edge', + # label='DEM exists') + # ax.bar(dfgood.index, dfgood.values, width=0.4, align='edge', color='C2', + # label='DEM with >= 90% valid pixels') + ax.bar(dfgood.index, dfgood.values, width=0.8, align='center', color='C0', + label='DEM with >= 90% valid pixels') + + ax.set_ylabel('# number of glaciers') + # ax.set_ylim([0, np.ceil(len(df)/50)*50]) + ax.set_ylim([0, len(df)]) + ax.set_xticklabels(dfgood.index, rotation=75) + ax.set_title(title) + # ax.legend(loc=3) diff --git a/rgitools/statistics/post_all_dems.py b/rgitools/statistics/post_all_dems.py new file mode 100644 index 0000000..0482dc6 --- /dev/null +++ b/rgitools/statistics/post_all_dems.py @@ -0,0 +1,177 @@ +import os +import pandas as pd +import geopandas as gpd +import numpy as np +import matplotlib.pyplot as plt + +from oggm.cli.prepro_levels import run_prepro_levels +from oggm import utils, cfg, GlacierDirectory +from oggm.workflow import execute_entity_task + +from my_dem_funcs import (check_all_dems_per_gdir, gdirs_from_tar_files, + get_dem_area, dem_barplot) + + +def parse_logfile(path, df=None): + + # df passed or new one? + if df is None: + df = pd.DataFrame([], columns=utils.DEM_SOURCES) + + for lf in os.listdir(path): + # get rgi id from file name + if '.ERROR' in lf: + rgi = lf.split('.ERROR')[0] + else: + raise RuntimeError + + # read logfile + lfdf = pd.read_csv(os.path.join(path, lf), delimiter=';', header=None, + skipinitialspace=True) + + # set all DEMs to True + df.loc[rgi, :] = True + + # loop over dems and set erroneous ones to False + for _, dem in lfdf.iterrows(): + print(dem[3]) + if dem[2] == 'InvalidDEMError': + df.loc[rgi, dem[3].split()[1]] = False + if 'HTTPSConnect' in dem[3]: + print(rgi) + + return df + + +def parse_logfiles(path): + + df = pd.DataFrame([], columns=utils.DEM_SOURCES) + + for root, dirs, files in os.walk(path): + if 'log.txt' in files: + logfile = (os.path.join(root, 'log.txt')) + + # read logfile + lfdf = pd.read_csv(logfile, delimiter=';', header=None, + skipinitialspace=True) + + # loop over dems and set erroneous ones to False + for _, line in lfdf.iterrows(): + if 'DEM SOURCE' in line[1]: + rgi = line[1].split(',')[0] + dem = line[1].split(',')[2] + df.loc[rgi, dem] = True + #elif 'InvalidDEMError' in line[2]: + # rgi = line[2].split()[-1] + # assert rgi[:3] == 'RGI' + # dem = line[2].split()[2] + # assert dem in df.columns + # df.loc[rgi, dem] = 0 + + return df + + +def hgt_barplot(df1, df2, title='', savepath=None): + + fig, ax = plt.subplots(figsize=[10, 7]) + + ax.bar(df1.index, df1.values, width=-0.4, align='edge', + label='glaciated area (all DEMs >0.9 quality)', color='C0') + ax.bar(df2.index, df2.values, width=0.4, align='edge', color='C1', + label='full area (all DEMs >0.9 quality') + + ax.set_ylabel('elevation [m]') + # ax.set_ylim([0, np.ceil(len(df)/5)*5]) + + ax.set_title(title) + ax.legend(loc=3) + + fig.tight_layout() + if savepath is not None: + fig.savefig(savepath) + + +wd = '/home/users/mdusch/rgidems/wd' +# wd = os.environ.get('WORKDIR') +post = '/home/users/mdusch/rgidems/post' + +cfg.initialize() +cfg.PATHS['working_dir'] = wd + +#path = '/home/users/mdusch/rgidems/out/dems_v1/default/RGI62/b_010/L1' +path = '/home/users/mdusch/rgidems/out/dems_v1/highres/RGI62/b_020/L1' +#sfx ='_v1' +sfx ='_v1_highres' + + +dfarea = pd.DataFrame([], index=np.arange(1, 20), columns=['demarea']) + +for reg in np.arange(1, 20): + regstr = '{:02.0f}'.format(reg) + + try: + rgidf = gpd.read_file(utils.get_rgi_region_file(regstr, version='6')) + gdirs = [GlacierDirectory(rgiid) for rgiid in rgidf.RGIId] + print('from gdir') + except OSError: + gdirs = gdirs_from_tar_files(path, rgi_region=regstr) + print('from tar') + + dfreg = execute_entity_task(check_all_dems_per_gdir, gdirs) + dfreg = pd.concat(dfreg) + + quality = dfreg.loc[dfreg['metric'] == 'quality', + dfreg.columns != 'metric'] + + hgt = dfreg.loc[dfreg['metric'] == 'meanhgt', + dfreg.columns != 'metric'] + + qualityglc = dfreg.loc[dfreg['metric'] == 'quality_glc', + dfreg.columns != 'metric'] + hgtglc = dfreg.loc[dfreg['metric'] == 'meanhgt_glc', + dfreg.columns != 'metric'] + + rgh = dfreg.loc[dfreg['metric'] == 'roughness', + dfreg.columns != 'metric'] + rghglc = dfreg.loc[dfreg['metric'] == 'roughness_glc', + dfreg.columns != 'metric'] + + hgt_good = (hgt[(quality > 0.9)].dropna(axis=1, how='all'). + dropna(axis=0, how='any')) + + hgtglc_good = (hgtglc[(qualityglc > 0.9)].dropna(axis=1, how='all'). + dropna(axis=0, how='any')) + + hgt_barplot(hgt_good.mean(), hgtglc_good.mean(), + title=('Mean height of RGI region {} (#{:.0f} full area, ' + + '#{:.0f} glaciated area)').format(regstr, + len(hgt_good), + len(hgtglc_good)), + savepath=os.path.join(post, 'rgi_hgt_%s.png' % (regstr + sfx))) + + rgi_area = np.sum([gd.rgi_area_km2 for gd in gdirs]) + + dem_area = np.sum(execute_entity_task(get_dem_area, gdirs)) + + dfarea.loc[reg, 'demarea'] = dem_area + + quality.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)), + mode='a', key='quality') + + qualityglc.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)), + mode='a', key='quality_glc') + + hgt.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)), + mode='a', key='mhgt') + + hgtglc.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)), + mode='a', key='mhgt_glc') + + rgh.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)), + mode='a', key='roughness') + + rghglc.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)), + mode='a', key='roughness_glc') + + +dfarea.to_hdf(os.path.join(post, 'dem_area{}.h5'.format(sfx)), key='demarea')