From fa6e757b4985cc26395548839876825bb936e55a Mon Sep 17 00:00:00 2001
From: Matthias Dusch <matthias.dusch@posteo.net>
Date: Fri, 29 Apr 2022 15:43:35 +0200
Subject: [PATCH] add postprocessing functions

---
 rgitools/statistics/dem_post_quality.py       | 187 +++++++++++++++++
 .../statistics/dem_post_quality_per_region.py | 134 ++++++++++++
 rgitools/statistics/dem_post_roughness.py     | 188 +++++++++++++++++
 rgitools/statistics/my_dem_funcs.py           | 198 ++++++++++++++++++
 rgitools/statistics/post_all_dems.py          | 177 ++++++++++++++++
 5 files changed, 884 insertions(+)
 create mode 100644 rgitools/statistics/dem_post_quality.py
 create mode 100644 rgitools/statistics/dem_post_quality_per_region.py
 create mode 100644 rgitools/statistics/dem_post_roughness.py
 create mode 100644 rgitools/statistics/my_dem_funcs.py
 create mode 100644 rgitools/statistics/post_all_dems.py

diff --git a/rgitools/statistics/dem_post_quality.py b/rgitools/statistics/dem_post_quality.py
new file mode 100644
index 0000000..7714a5a
--- /dev/null
+++ b/rgitools/statistics/dem_post_quality.py
@@ -0,0 +1,187 @@
+import os
+import pandas as pd
+import geopandas as gpd
+import numpy as np
+import matplotlib.pyplot as plt
+
+from oggm import utils, cfg
+
+from my_dem_funcs import dem_barplot
+
+
+wd = '/home/matthias/rgi/wd'
+cfg.initialize()
+cfg.PATHS['working_dir'] = wd
+
+# gdirs storage path
+path = '/home/matthias/rgi/dems_v1/default/RGI62/b_010/L1'
+sfx = '_v1'
+
+# dataframe for all areas
+dfall = pd.DataFrame()
+
+# dataframe for statistic
+cols = utils.DEM_SOURCES.copy()
+cols.sort()
+cols = ['RGI region', '# total'] + cols
+dfstat = pd.DataFrame([], columns=cols)
+
+# statistic on subregions
+dfsub = dfstat.copy()
+
+# rgi region file
+regions = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60',
+                                     '00_rgi60_regions',
+                                     '00_rgi60_O1Regions.shp'))
+subregs = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60',
+                                     '00_rgi60_regions',
+                                     '00_rgi60_O2Regions.shp'))
+
+fig, axs = plt.subplots(5, 4, figsize=[20, 22])
+
+for reg in np.arange(1, 20):
+    regstr = '{:02.0f}'.format(reg)
+
+    quality = pd.read_hdf(os.path.join(wd, 'rgi_{}.h5'.format(regstr + sfx)),
+                          'quality')
+
+    regname = regions.loc[regions['RGI_CODE'] == reg, 'FULL_NAME'].iloc[0]
+
+    dem_barplot(quality, fig.get_axes()[reg],
+                title='RGI region {}: {} ({:.0f} glaciers)'.
+                format(regstr, regname, len(quality)))
+
+    dfall = dfall.append(quality)
+
+    # FULL REGION
+    total = len(quality)
+    good = (quality > 0.9).sum()
+
+    # out = good / total
+    out = (good / total * 100).dropna().astype(int)
+    outstr = out.astype(str)
+    outstr.loc[out != 0] += '%'
+    outstr.loc[out == 0] = '--'
+    outstr['# total'] = total
+
+    dfstat.loc['{}: {}'.format(regstr, regname)] = outstr
+
+    # take care of subregions
+    regdf = gpd.read_file(utils.get_rgi_region_file(regstr))
+    sregs = np.unique(regdf.O2Region)
+
+    for sreg in sregs:
+        ids = regdf.loc[regdf.O2Region == sreg, 'RGIId'].values
+        subq = quality.loc[ids]
+
+        # SUBREGIONS
+        total = len(subq)
+        good = (subq > 0.9).sum()
+        out = (good / total * 100).dropna().astype(int)
+        outstr = out.astype(str)
+        outstr.loc[out != 0] += '%'
+        outstr.loc[out == 0] = '--'
+        outstr['# total'] = total
+
+        subregstr = '-{:02.0f}'.format(int(sreg))
+        subregname = subregs.loc[subregs.RGI_CODE == regstr + subregstr].\
+            FULL_NAME.iloc[0]
+
+        dfsub.loc['{}: {}'.format(regstr + subregstr, subregname)] = outstr
+
+# FULL RGI
+total = len(dfall)
+good = (dfall > 0.9).sum()
+out = (good / total * 100).dropna().astype(int)
+outstr = out.astype(str)
+outstr.loc[out != 0] += '%'
+outstr.loc[out == 0] = '--'
+outstr['# total'] = total
+
+dfstat.loc['All RGI regions'] = outstr
+
+dfsub.sort_index(inplace=True)
+
+# integer for number of glaciers
+dfstat['# total'] = dfstat['# total'].astype(int)
+dfstat['RGI region'] = dfstat.index
+dfsub['# total'] = dfsub['# total'].astype(int)
+dfsub['RGI region'] = dfsub.index
+
+# save one with and one without specific DEMs
+spc = ['REMA', 'RAMP', 'ARCTICDEM', 'GIMP', 'ALASKA']
+
+# general table
+tbl1 = dfstat.loc[:, ~dfstat.columns.isin(spc)].to_latex(
+    na_rep='--', index=False, longtable=True,
+    column_format='l' + 7*'r')
+# add title
+ti1 = ('\n\\caption{Summary of all RGI regions. First column shows total '
+       'number of '
+       'glaciers per RGI region. The consecutive columns specify the '
+       'availability of particular DEMs for a RGI region in percent of the '
+       'total glaciers per region. Values are not rounded but truncated so '
+       '99\\% ' 
+       'could be just one missing glacier. Only DEMs with less than 10\\% '
+       'missing values are considered.}\\\\\n'
+       '\\label{tbl_general}\\\\\n')
+tbl1 = tbl1.replace('\n', ti1, 1)
+with open('/home/matthias/rgi/report/dem_rgi_general{}.tex'.format(sfx), 'w') as tf:
+    tf.write(tbl1)
+
+# specific table
+tbl2 = dfstat.iloc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18]].\
+    loc[:, dfstat.columns.isin(['RGI region', '# total'] + spc)].to_latex(
+    na_rep='--', index=False, longtable=True,
+    column_format='l' + 5*'r')
+# add title
+ti2 = ('\n\\caption{Same as Table \\ref{tbl_general} but for specific '
+       'regional DEMs only.}\\\\\n'
+       '\\label{tbl_specific}\\\\\n')
+tbl2 = tbl2.replace('\n', ti2, 1)
+
+with open('/home/matthias/rgi/report/dem_rgi_spec{}.tex'.format(sfx), 'w') as tf:
+    tf.write(tbl2)
+
+#
+# subregions
+# general table
+tbl3 = dfsub.loc[:, ~dfsub.columns.isin(spc)].to_latex(
+    na_rep='--', index=False, longtable=True,
+    column_format='l' + 7*'r')
+# add title
+ti3 = ('\n\\caption{Same as Table \\ref{tbl_general} but splitted into RGI '
+       'subregions.}\\\\\n'
+       '\\label{tbl_general_sub}\\\\\n')
+tbl3 = tbl3.replace('\n', ti3, 1)
+with open('/home/matthias/rgi/report/dem_subrgi_general.tex'.format(sfx), 'w') as tf:
+    tf.write(tbl3)
+
+# specific table
+tbl4 = dfsub.iloc[np.append(np.arange(0, 43), np.arange(69, 87))].\
+    loc[:, dfsub.columns.isin(['RGI region', '# total'] + spc)].to_latex(
+    na_rep='--', index=False, longtable=True,
+    column_format='l' + 5*'r')
+# add title
+ti4 = ('\n\\caption{Same as Table \\ref{tbl_specific} but splitted into '
+       'RGI subregions.}\\\\\n'
+       '\\label{tbl_specific_sub}\\\\\n')
+tbl4 = tbl4.replace('\n', ti4, 1)
+
+with open('/home/matthias/rgi/report/dem_subrgi_spec.tex'.format(sfx), 'w') as tf:
+    tf.write(tbl4)
+
+# write csv files for RST readthedocs
+dfstat.to_csv('/home/matthias/rgi/rgitools/docs/_static/tables/dem_rgi.csv'.format(sfx),
+              index=False)
+dfsub.to_csv('/home/matthias/rgi/rgitools/docs/_static/tables/dem_subrgi.csv'.format(sfx),
+             index=False)
+
+# make and save plots
+dem_barplot(dfall, axs[0, 0],
+            title='All RGI regions ({:.0f} glaciers)'.format(len(dfall)))
+
+fig.tight_layout()
+fig.savefig('/home/matthias/rgi/rgitools/docs/_static/images/' +
+            'dem_all_regions.png'.format(sfx))
+fig.savefig('/home/matthias/rgi/report/dem_all_regions.pdf'.format(sfx))
diff --git a/rgitools/statistics/dem_post_quality_per_region.py b/rgitools/statistics/dem_post_quality_per_region.py
new file mode 100644
index 0000000..2461cdc
--- /dev/null
+++ b/rgitools/statistics/dem_post_quality_per_region.py
@@ -0,0 +1,134 @@
+import os
+import pandas as pd
+import geopandas as gpd
+import numpy as np
+import matplotlib.pyplot as plt
+
+from oggm import utils, cfg
+
+from my_dem_funcs import dem_barplot
+
+
+wd = '/home/matthias/rgi/wd'
+cfg.initialize()
+cfg.PATHS['working_dir'] = wd
+
+# gdirs storage path
+path = '/home/matthias/rgi/dems_v1/default/RGI62/b_010/L1'
+sfx = '_v1'
+
+# dataframe for all areas
+dfall = pd.DataFrame()
+
+# dataframe for statistic
+cols = utils.DEM_SOURCES.copy()
+cols.sort()
+cols = ['RGI region', '# total'] + cols
+dfstat = pd.DataFrame([], columns=cols)
+
+# statistic on subregions
+dfsub = dfstat.copy()
+
+# rgi region file
+regions = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60',
+                                     '00_rgi60_regions',
+                                     '00_rgi60_O1Regions.shp'))
+subregs = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60',
+                                     '00_rgi60_regions',
+                                     '00_rgi60_O2Regions.shp'))
+
+fig0, ax0 = plt.subplots(1, 1, figsize=[10, 10])
+
+for reg in np.arange(1, 20):
+    fig, ax = plt.subplots(1, 1, figsize=[10, 10])
+    regstr = '{:02.0f}'.format(reg)
+
+    quality = pd.read_hdf(os.path.join(wd, 'rgi_{}.h5'.format(regstr + sfx)),
+                          'quality')
+
+    regname = regions.loc[regions['RGI_CODE'] == reg, 'FULL_NAME'].iloc[0]
+
+    dem_barplot(quality, ax,
+                title='RGI region {}: {} ({:.0f} glaciers)'.
+                format(regstr, regname, len(quality)))
+    fig.tight_layout()
+    fig.savefig('/home/matthias/rgi/out/images/' +
+                'barplot_rgi{}.png'.format(regstr + sfx))
+
+    dfall = dfall.append(quality)
+
+    # FULL REGION
+    total = len(quality)
+    good = (quality > 0.9).sum()
+
+    # out = good / total
+    out = (good / total * 100).dropna().astype(int)
+    outstr = out.astype(str)
+    outstr.loc[out != 0] += '%'
+    outstr.loc[out == 0] = '--'
+    outstr['# total'] = total
+
+    dfstat.loc[':ref:`{0}: {1}<rgi{0}>`'.format(regstr, regname)] = outstr
+
+    # take care of subregions
+    regdf = gpd.read_file(utils.get_rgi_region_file(regstr))
+    sregs = np.unique(regdf.O2Region)
+
+    for sreg in sregs:
+        ids = regdf.loc[regdf.O2Region == sreg, 'RGIId'].values
+        subq = quality.loc[ids]
+
+        # SUBREGIONS
+        total = len(subq)
+        good = (subq > 0.9).sum()
+        out = (good / total * 100).dropna().astype(int)
+        outstr = out.astype(str)
+        outstr.loc[out != 0] += '%'
+        outstr.loc[out == 0] = '--'
+        outstr['# total'] = total
+
+        subregstr = '-{:02.0f}'.format(int(sreg))
+        subregname = subregs.loc[subregs.RGI_CODE == regstr + subregstr].\
+            FULL_NAME.iloc[0]
+
+        dfsub.loc['{}: {}'.format(regstr + subregstr, subregname)] = outstr
+
+# FULL RGI
+total = len(dfall)
+good = (dfall > 0.9).sum()
+out = (good / total * 100).dropna().astype(int)
+outstr = out.astype(str)
+outstr.loc[out != 0] += '%'
+outstr.loc[out == 0] = '--'
+outstr['# total'] = total
+
+dfstat.loc['All RGI regions'] = outstr
+
+dfsub.sort_index(inplace=True)
+
+# integer for number of glaciers
+dfstat['# total'] = dfstat['# total'].astype(int)
+dfstat['RGI region'] = dfstat.index
+dfsub['# total'] = dfsub['# total'].astype(int)
+dfsub['RGI region'] = dfsub.index
+
+
+# write csv files for RST readthedocs
+dfstat.to_csv('/home/matthias/rgi/out/tables/dem_allrgi{}.csv'.format(sfx),
+              index=False)
+
+# write subregion tables:
+for reg in np.arange(1, 20):
+    regstr = '{:02.0f}'.format(reg)
+    dfsub
+    sub = dfsub.loc[dfsub.index.str.contains('{}-'.format(regstr))]
+    sub.to_csv('/home/matthias/rgi/out/tables/dem_rgi{}.csv'.format(regstr + sfx),
+               index=False)
+
+# make and save plots
+dem_barplot(dfall, ax0,
+            title='All RGI regions ({:.0f} glaciers)'.format(len(dfall)))
+
+fig0.tight_layout()
+fig0.savefig('/home/matthias/rgi/out/images/' +
+             'barplot_allregions{}.png'.format(sfx))
diff --git a/rgitools/statistics/dem_post_roughness.py b/rgitools/statistics/dem_post_roughness.py
new file mode 100644
index 0000000..579bc39
--- /dev/null
+++ b/rgitools/statistics/dem_post_roughness.py
@@ -0,0 +1,188 @@
+import os
+import pandas as pd
+import geopandas as gpd
+import numpy as np
+import matplotlib.pyplot as plt
+
+from oggm import utils, cfg
+
+from my_dem_funcs import dem_barplot
+
+
+wd = '/home/matthias/rgi/wd'
+cfg.initialize()
+cfg.PATHS['working_dir'] = wd
+
+# gdirs storage path
+path = '/home/matthias/rgi/dems_v1/default/RGI62/b_010/L1'
+brauch ich das?
+sfx = '_v1'
+
+# dataframe for all areas
+dfall = pd.DataFrame()
+
+# dataframe for statistic
+cols = utils.DEM_SOURCES.copy()
+cols.sort()
+cols = ['RGI region', '# total'] + cols
+dfstat = pd.DataFrame([], columns=cols)
+
+# statistic on subregions
+dfsub = dfstat.copy()
+
+# rgi region file
+regions = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60',
+                                     '00_rgi60_regions',
+                                     '00_rgi60_O1Regions.shp'))
+subregs = gpd.read_file(os.path.join(cfg.PATHS['rgi_dir'], 'RGIV60',
+                                     '00_rgi60_regions',
+                                     '00_rgi60_O2Regions.shp'))
+
+fig, axs = plt.subplots(5, 4, figsize=[20, 22])
+
+for reg in np.arange(1, 20):
+    regstr = '{:02.0f}'.format(reg)
+
+    quality = pd.read_hdf(os.path.join(wd, 'rgi_{}.h5'.format(regstr)),
+                          'quality')
+
+    regname = regions.loc[regions['RGI_CODE'] == reg, 'FULL_NAME'].iloc[0]
+
+    dem_barplot(quality, fig.get_axes()[reg],
+                title='RGI region {}: {} ({:.0f} glaciers)'.
+                format(regstr, regname, len(quality)))
+
+    dfall = dfall.append(quality)
+
+    # FULL REGION
+    total = len(quality)
+    good = (quality > 0.9).sum()
+
+    # out = good / total
+    out = (good / total * 100).dropna().astype(int)
+    outstr = out.astype(str)
+    outstr.loc[out != 0] += '%'
+    outstr.loc[out == 0] = '--'
+    outstr['# total'] = total
+
+    dfstat.loc['{}: {}'.format(regstr, regname)] = outstr
+
+    # take care of subregions
+    regdf = gpd.read_file(utils.get_rgi_region_file(regstr))
+    sregs = np.unique(regdf.O2Region)
+
+    for sreg in sregs:
+        ids = regdf.loc[regdf.O2Region == sreg, 'RGIId'].values
+        subq = quality.loc[ids]
+
+        # SUBREGIONS
+        total = len(subq)
+        good = (subq > 0.9).sum()
+        out = (good / total * 100).dropna().astype(int)
+        outstr = out.astype(str)
+        outstr.loc[out != 0] += '%'
+        outstr.loc[out == 0] = '--'
+        outstr['# total'] = total
+
+        subregstr = '-{:02.0f}'.format(int(sreg))
+        subregname = subregs.loc[subregs.RGI_CODE == regstr + subregstr].\
+            FULL_NAME.iloc[0]
+
+        dfsub.loc['{}: {}'.format(regstr + subregstr, subregname)] = outstr
+
+# FULL RGI
+total = len(dfall)
+good = (dfall > 0.9).sum()
+out = (good / total * 100).dropna().astype(int)
+outstr = out.astype(str)
+outstr.loc[out != 0] += '%'
+outstr.loc[out == 0] = '--'
+outstr['# total'] = total
+
+dfstat.loc['All RGI regions'] = outstr
+
+dfsub.sort_index(inplace=True)
+
+# integer for number of glaciers
+dfstat['# total'] = dfstat['# total'].astype(int)
+dfstat['RGI region'] = dfstat.index
+dfsub['# total'] = dfsub['# total'].astype(int)
+dfsub['RGI region'] = dfsub.index
+
+# save one with and one without specific DEMs
+spc = ['REMA', 'RAMP', 'ARCTICDEM', 'GIMP']
+
+# general table
+tbl1 = dfstat.loc[:, ~dfstat.columns.isin(spc)].to_latex(
+    na_rep='--', index=False, longtable=True,
+    column_format='l' + 7*'r')
+# add title
+ti1 = ('\n\\caption{Summary of all RGI regions. First column shows total '
+       'number of '
+       'glaciers per RGI region. The consecutive columns specify the '
+       'availability of particular DEMs for a RGI region in percent of the '
+       'total glaciers per region. Values are not rounded but truncated so '
+       '99\\% ' 
+       'could be just one missing glacier. Only DEMs with less than 10\\% '
+       'missing values are considered.}\\\\\n'
+       '\\label{tbl_general}\\\\\n')
+tbl1 = tbl1.replace('\n', ti1, 1)
+with open('/home/matthias/rgi/report/dem_rgi_general.tex', 'w') as tf:
+    tf.write(tbl1)
+
+# specific table
+tbl2 = dfstat.iloc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18]].\
+    loc[:, dfstat.columns.isin(['RGI region', '# total'] + spc)].to_latex(
+    na_rep='--', index=False, longtable=True,
+    column_format='l' + 5*'r')
+# add title
+ti2 = ('\n\\caption{Same as Table \\ref{tbl_general} but for specific '
+       'regional DEMs only.}\\\\\n'
+       '\\label{tbl_specific}\\\\\n')
+tbl2 = tbl2.replace('\n', ti2, 1)
+
+with open('/home/matthias/rgi/report/dem_rgi_spec.tex', 'w') as tf:
+    tf.write(tbl2)
+
+#
+# subregions
+# general table
+tbl3 = dfsub.loc[:, ~dfsub.columns.isin(spc)].to_latex(
+    na_rep='--', index=False, longtable=True,
+    column_format='l' + 7*'r')
+# add title
+ti3 = ('\n\\caption{Same as Table \\ref{tbl_general} but splitted into RGI '
+       'subregions.}\\\\\n'
+       '\\label{tbl_general_sub}\\\\\n')
+tbl3 = tbl3.replace('\n', ti3, 1)
+with open('/home/matthias/rgi/report/dem_subrgi_general.tex', 'w') as tf:
+    tf.write(tbl3)
+
+# specific table
+tbl4 = dfsub.iloc[np.append(np.arange(0, 43), np.arange(69, 87))].\
+    loc[:, dfsub.columns.isin(['RGI region', '# total'] + spc)].to_latex(
+    na_rep='--', index=False, longtable=True,
+    column_format='l' + 5*'r')
+# add title
+ti4 = ('\n\\caption{Same as Table \\ref{tbl_specific} but splitted into '
+       'RGI subregions.}\\\\\n'
+       '\\label{tbl_specific_sub}\\\\\n')
+tbl4 = tbl4.replace('\n', ti4, 1)
+
+with open('/home/matthias/rgi/report/dem_subrgi_spec.tex', 'w') as tf:
+    tf.write(tbl4)
+
+# write csv files for RST readthedocs
+dfstat.to_csv('/home/matthias/rgi/rgitools/docs/_static/tables/dem_rgi.csv',
+              index=False)
+dfsub.to_csv('/home/matthias/rgi/rgitools/docs/_static/tables/dem_subrgi.csv',
+             index=False)
+
+# make and save plots
+dem_barplot(dfall, axs[0, 0],
+            title='All RGI regions ({:.0f} glaciers)'.format(len(dfall)))
+
+fig.tight_layout()
+fig.savefig('/home/matthias/rgi/rgitools/docs/_static/images/' +
+            'dem_all_regions.png')
+fig.savefig('/home/matthias/rgi/report/dem_all_regions.pdf')
diff --git a/rgitools/statistics/my_dem_funcs.py b/rgitools/statistics/my_dem_funcs.py
new file mode 100644
index 0000000..20a8bba
--- /dev/null
+++ b/rgitools/statistics/my_dem_funcs.py
@@ -0,0 +1,198 @@
+import os
+import tarfile
+import logging
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import rasterio
+
+from oggm import utils, GlacierDirectory, entity_task
+
+# Module logger
+log = logging.getLogger(__name__)
+
+
+def dem_quality(gdir, demfile):
+    """Quality check based on oggm.simple_glacier_masks.
+    Parameters
+    ----------
+    gdir : :py:class:`oggm.GlacierDirectory`
+        the glacier in question
+    demfile : str
+        path to a specific DEM tif-file
+    Returns
+    -------
+    nanpercent : float
+        how many grid points are NaN as a fraction of all grid points
+    nanpercent_glc : float
+        how many grid points are NaN as a fraction of all glaciated grid points
+    meanhgt : float
+        mean elevation of grid points
+    meanhgt_glc : float
+        mean elevation of glaciated grid points
+    roughness : float
+        standard deviation of 2d slope of all grid points
+    roughness_glc : float
+        standard deviation of 2d slope of all glaciated grid points
+    """
+
+    # open tif-file:
+    with rasterio.open(demfile, 'r', driver='GTiff') as ds:
+        dem = ds.read(1).astype(rasterio.float32)
+        nx = ds.width
+        ny = ds.height
+        dx = ds.transform[0]
+    # assert some basics
+    assert nx == gdir.grid.nx
+    assert ny == gdir.grid.ny
+    assert dx == gdir.grid.dx
+
+    # open glacier mask
+    with rasterio.open(gdir.get_filepath('glacier_mask'),
+                       'r', driver='GTiff') as ds:
+        mask = ds.read(1).astype(rasterio.int16)
+
+    # set nodata values to NaN
+    min_z = -999.
+    dem[dem <= min_z] = np.NaN
+    isfinite = np.isfinite(dem)
+    isfinite_glc = np.isfinite(dem[np.where(mask)])
+
+    # calculate fraction of NaNs in all and glaciated area
+    nanpercent = np.sum(isfinite) / (nx * ny)
+    nanpercent_glc = np.sum(isfinite_glc) / mask.sum()
+
+    # calculate mean elevation of all and glaciated area
+    meanhgt = np.nanmean(dem)
+    meanhgt_glc = np.nanmean(dem[np.where(mask)])
+
+    # calculate roughness of all area
+    sy, sx = np.gradient(dem, dx)
+    slope = np.arctan(np.sqrt(sy**2 + sx**2))
+    roughness = np.nanstd(slope)
+
+    # calculate roughness of glaciated area
+    dem_glc = np.where(mask, dem, np.nan)
+    sy, sx = np.gradient(dem_glc, dx)
+    slope = np.arctan(np.sqrt(sy**2 + sx**2))
+    roughness_glc = np.nanstd(slope)
+
+    return (nanpercent, nanpercent_glc, meanhgt, meanhgt_glc, roughness,
+            roughness_glc)
+
+
+@entity_task(log)
+def get_dem_area(gdir):
+    """Read the glacier_mask.tif and calculated glacier area based on this
+    Parameters
+    ----------
+    gdir : GlacierDirectory
+        the glacier in question
+    Returns
+    -------
+    float
+        glacier area in km2
+    """
+
+    # read dem mask
+    with rasterio.open(gdir.get_filepath('glacier_mask'),
+                       'r', driver='GTiff') as ds:
+        profile = ds.profile
+        data = ds.read(1).astype(profile['dtype'])
+
+    # calculate dem_mask size and test against RGI area
+    mask_area_km2 = data.sum() * gdir.grid.dx**2 * 1e-6
+
+    return mask_area_km2
+
+
+def gdirs_from_tar_files(path, rgi_region=None):
+
+    gdirs = []
+    for regdir in os.listdir(path):
+
+        # only do required rgi_region
+        if (rgi_region is not None) and (regdir[-2:] != rgi_region):
+            continue
+
+        rdpath = os.path.join(path, regdir)
+
+        for file in os.listdir(rdpath):
+
+            with tarfile.open(os.path.join(rdpath, file), 'r') as tfile:
+                for member in tfile:
+                    if member.isdir():
+                        continue
+                    tar_base = os.path.join(rdpath, member.path)
+                    gdirs.append(GlacierDirectory(member.name[-21:-7],
+                                                  from_tar=tar_base))
+
+    return gdirs
+
+
+@entity_task(log)
+def check_all_dems_per_gdir(gdir):
+    """Will go through all available DEMs and create some metrics
+
+    DEMs musst be in GDir subfolders
+
+    :param gdir:
+    :return:
+    """
+    # dataframe for results
+    df = pd.DataFrame([], index=[gdir.rgi_id]*6, # np.arange(3),
+                      columns=['metric'] + utils.DEM_SOURCES)
+    df.iloc[0]['metric'] = 'quality'
+    df.iloc[1]['metric'] = 'quality_glc'
+    df.iloc[2]['metric'] = 'meanhgt'
+    df.iloc[3]['metric'] = 'meanhgt_glc'
+    df.iloc[4]['metric'] = 'roughness'
+    df.iloc[5]['metric'] = 'roughness_glc'
+
+    logfile = (os.path.join(gdir.dir, 'log.txt'))
+
+    # read logfile, specify names cause log entries have different size
+    lfdf = pd.read_csv(logfile, delimiter=';', header=None,
+                       skipinitialspace=True, names=[0, 1, 2, 3])
+
+    # loop over dems and save existing ones to test
+    dem2test = []
+    for _, line in lfdf.iterrows():
+        if ('DEM SOURCE' in line[1]) and ('SUCCESS' in line[2]):
+            rgi = line[1].split(',')[0]
+            dem = line[1].split(',')[2]
+            dem2test.append(dem)
+
+    # loop over DEMs
+    for dem in dem2test:
+        demfile = os.path.join(gdir.dir, dem) + '/dem.tif'
+        qual, qualglc, hgt, hgt_glc, rgh, rgh_glc = dem_quality(gdir, demfile)
+        df.loc[df.metric == 'quality', dem] = qual
+        df.loc[df.metric == 'quality_glc', dem] = qualglc
+        df.loc[df.metric == 'meanhgt', dem] = hgt
+        df.loc[df.metric == 'meanhgt_glc', dem] = hgt_glc
+        df.loc[df.metric == 'roughness', dem] = rgh
+        df.loc[df.metric == 'roughness_glc', dem] = rgh_glc
+
+    return df
+
+
+def dem_barplot(df, ax, title=''):
+
+    # dfexist = (df > 0).sum().sort_index()
+    dfgood = (df > 0.9).sum().sort_index()
+
+    # ax.bar(dfexist.index, dfexist.values, width=-0.4, align='edge',
+    #        label='DEM exists')
+    # ax.bar(dfgood.index, dfgood.values, width=0.4, align='edge', color='C2',
+    #        label='DEM with >= 90% valid pixels')
+    ax.bar(dfgood.index, dfgood.values, width=0.8, align='center', color='C0',
+           label='DEM with >= 90% valid pixels')
+
+    ax.set_ylabel('# number of glaciers')
+    # ax.set_ylim([0, np.ceil(len(df)/50)*50])
+    ax.set_ylim([0, len(df)])
+    ax.set_xticklabels(dfgood.index, rotation=75)
+    ax.set_title(title)
+    # ax.legend(loc=3)
diff --git a/rgitools/statistics/post_all_dems.py b/rgitools/statistics/post_all_dems.py
new file mode 100644
index 0000000..0482dc6
--- /dev/null
+++ b/rgitools/statistics/post_all_dems.py
@@ -0,0 +1,177 @@
+import os
+import pandas as pd
+import geopandas as gpd
+import numpy as np
+import matplotlib.pyplot as plt
+
+from oggm.cli.prepro_levels import run_prepro_levels
+from oggm import utils, cfg, GlacierDirectory
+from oggm.workflow import execute_entity_task
+
+from my_dem_funcs import (check_all_dems_per_gdir, gdirs_from_tar_files,
+                          get_dem_area, dem_barplot)
+
+
+def parse_logfile(path, df=None):
+
+    # df passed or new one?
+    if df is None:
+        df = pd.DataFrame([], columns=utils.DEM_SOURCES)
+
+    for lf in os.listdir(path):
+        # get rgi id from file name
+        if '.ERROR' in lf:
+            rgi = lf.split('.ERROR')[0]
+        else:
+            raise RuntimeError
+
+        # read logfile
+        lfdf = pd.read_csv(os.path.join(path, lf), delimiter=';', header=None,
+                           skipinitialspace=True)
+
+        # set all DEMs to True
+        df.loc[rgi, :] = True
+
+        # loop over dems and set erroneous ones to False
+        for _, dem in lfdf.iterrows():
+            print(dem[3])
+            if dem[2] == 'InvalidDEMError':
+                df.loc[rgi, dem[3].split()[1]] = False
+            if 'HTTPSConnect' in dem[3]:
+                print(rgi)
+
+    return df
+
+
+def parse_logfiles(path):
+
+    df = pd.DataFrame([], columns=utils.DEM_SOURCES)
+
+    for root, dirs, files in os.walk(path):
+        if 'log.txt' in files:
+            logfile = (os.path.join(root, 'log.txt'))
+
+            # read logfile
+            lfdf = pd.read_csv(logfile, delimiter=';', header=None,
+                               skipinitialspace=True)
+
+            # loop over dems and set erroneous ones to False
+            for _, line in lfdf.iterrows():
+                if 'DEM SOURCE' in line[1]:
+                    rgi = line[1].split(',')[0]
+                    dem = line[1].split(',')[2]
+                    df.loc[rgi, dem] = True
+                #elif 'InvalidDEMError' in line[2]:
+                #    rgi = line[2].split()[-1]
+                #    assert rgi[:3] == 'RGI'
+                #    dem = line[2].split()[2]
+                #    assert dem in df.columns
+                #    df.loc[rgi, dem] = 0
+
+    return df
+
+
+def hgt_barplot(df1, df2, title='', savepath=None):
+
+    fig, ax = plt.subplots(figsize=[10, 7])
+
+    ax.bar(df1.index, df1.values, width=-0.4, align='edge',
+           label='glaciated area (all DEMs >0.9 quality)', color='C0')
+    ax.bar(df2.index, df2.values, width=0.4, align='edge', color='C1',
+           label='full area (all DEMs >0.9 quality')
+
+    ax.set_ylabel('elevation [m]')
+    # ax.set_ylim([0, np.ceil(len(df)/5)*5])
+
+    ax.set_title(title)
+    ax.legend(loc=3)
+
+    fig.tight_layout()
+    if savepath is not None:
+        fig.savefig(savepath)
+
+
+wd = '/home/users/mdusch/rgidems/wd'
+# wd = os.environ.get('WORKDIR')
+post = '/home/users/mdusch/rgidems/post'
+
+cfg.initialize()
+cfg.PATHS['working_dir'] = wd
+
+#path = '/home/users/mdusch/rgidems/out/dems_v1/default/RGI62/b_010/L1'
+path = '/home/users/mdusch/rgidems/out/dems_v1/highres/RGI62/b_020/L1'
+#sfx ='_v1'
+sfx ='_v1_highres'
+
+
+dfarea = pd.DataFrame([], index=np.arange(1, 20), columns=['demarea'])
+
+for reg in np.arange(1, 20):
+    regstr = '{:02.0f}'.format(reg)
+
+    try:
+        rgidf = gpd.read_file(utils.get_rgi_region_file(regstr, version='6'))
+        gdirs = [GlacierDirectory(rgiid) for rgiid in rgidf.RGIId]
+        print('from gdir')
+    except OSError:
+        gdirs = gdirs_from_tar_files(path, rgi_region=regstr)
+        print('from tar')
+
+    dfreg = execute_entity_task(check_all_dems_per_gdir, gdirs)
+    dfreg = pd.concat(dfreg)
+
+    quality = dfreg.loc[dfreg['metric'] == 'quality',
+                        dfreg.columns != 'metric']
+
+    hgt = dfreg.loc[dfreg['metric'] == 'meanhgt',
+                    dfreg.columns != 'metric']
+
+    qualityglc = dfreg.loc[dfreg['metric'] == 'quality_glc',
+                           dfreg.columns != 'metric']
+    hgtglc = dfreg.loc[dfreg['metric'] == 'meanhgt_glc',
+                       dfreg.columns != 'metric']
+
+    rgh = dfreg.loc[dfreg['metric'] == 'roughness',
+                    dfreg.columns != 'metric']
+    rghglc = dfreg.loc[dfreg['metric'] == 'roughness_glc',
+                       dfreg.columns != 'metric']
+
+    hgt_good = (hgt[(quality > 0.9)].dropna(axis=1, how='all').
+                dropna(axis=0, how='any'))
+
+    hgtglc_good = (hgtglc[(qualityglc > 0.9)].dropna(axis=1, how='all').
+                   dropna(axis=0, how='any'))
+
+    hgt_barplot(hgt_good.mean(), hgtglc_good.mean(),
+                title=('Mean height of RGI region {} (#{:.0f} full area, ' +
+                       '#{:.0f} glaciated area)').format(regstr,
+                                                         len(hgt_good),
+                                                         len(hgtglc_good)),
+                savepath=os.path.join(post, 'rgi_hgt_%s.png' % (regstr + sfx)))
+
+    rgi_area = np.sum([gd.rgi_area_km2 for gd in gdirs])
+
+    dem_area = np.sum(execute_entity_task(get_dem_area, gdirs))
+
+    dfarea.loc[reg, 'demarea'] = dem_area
+
+    quality.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)),
+                   mode='a', key='quality')
+
+    qualityglc.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)),
+                      mode='a', key='quality_glc')
+
+    hgt.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)),
+               mode='a', key='mhgt')
+
+    hgtglc.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)),
+                  mode='a', key='mhgt_glc')
+
+    rgh.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)),
+               mode='a', key='roughness')
+
+    rghglc.to_hdf(os.path.join(post, 'rgi_%s.h5' % (regstr + sfx)),
+                  mode='a', key='roughness_glc')
+
+
+dfarea.to_hdf(os.path.join(post, 'dem_area{}.h5'.format(sfx)), key='demarea')