From 0ad7466ca98795e36fd5aaf4418147affce43369 Mon Sep 17 00:00:00 2001 From: Daniel Duke Date: Thu, 23 Feb 2017 14:40:30 -0600 Subject: [PATCH 1/5] Minor fixes to MDA2HDF5_Fluorescence No change to existing functionality - just fixed some minor bugs. Intend to improve command line interface in future commit. --- Library/MDA2HDF5_Fluorescence.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/Library/MDA2HDF5_Fluorescence.py b/Library/MDA2HDF5_Fluorescence.py index 6fa85df..5ffb21d 100644 --- a/Library/MDA2HDF5_Fluorescence.py +++ b/Library/MDA2HDF5_Fluorescence.py @@ -12,6 +12,21 @@ May 7, 2014: Save only the positioner and detector arrays up to current point. June 18, 2014: Minor change to check the existence of the input file. March 5, 2015: Add function frun_append to append MDA file data to an existing HDF file. + +Feb 23, 2017: Changes made by D Duke: + Set directory in frun_main to "" so that user can point to any directory from command line by default. + Using os.path functions to generate output filename in frun_main, more robust than splitting on "." + (allows writing to a different place with ".." in the path name for example) + Fixed some harmless typos. + +Dan's wish list for future features: + - built in HDF5 compression option + - cleaner command line interface (using OptionParser for example, to give standard *NIX cmd line switches) + - Move some hard coded features like mca_saving flag into the command line arguments. + - Add command line interface access to frun_append function. + - Allow multiple mca files to be intelligently merged into N-d arrays for each PV (i.e. a 2D scan made up + of a handful of 1D scans, that might each have MCAs in them). + """ # #Imports @@ -19,7 +34,7 @@ import xdrlib import numpy as np import os.path -mca_saving = False +mca_saving = True # def fparse_counted_string(data): '''Parses the data to get a counted string, @@ -313,19 +328,20 @@ def fread_extra_PVs(data,group): extra_PV_group.attrs[prefix + "_Value"] = pv_value return -def frun_main(input_file="7bmb1_0933.mda",directory="/data/Data/SprayData/Cycle_2012_3/Time_Averaged_ISU/",mca_saving_flag=False): +def frun_main(input_file="7bmb1_0933.mda",directory="",mca_saving_flag=False): '''Function that tests the working of the code. ''' #Parse the input file into the main part of the file name and the extension. global mca_saving mca_saving = mca_saving_flag - output_filename = input_file.split(".")[0] + ".hdf5" + output_filename = os.path.splitext(os.path.basename(input_file))[0] + ".hdf5" #Check whether the file exists if not os.path.isfile(directory+input_file): print "File " + input_file + " cannot be opened. Exiting." return try: with h5py.File(directory+output_filename,'w') as output_hdf: + print "Writing to",directory+output_filename data = fstart_unpacking(input_file,directory) extra_PV_position = fread_file_header(data,output_hdf) fread_scan(data,output_hdf) @@ -373,8 +389,8 @@ def frun_append(input_MDA = '7bmb1_0260.mda',MDA_directory = '/data/Data/SprayDa frun_main() elif len(sys.argv) == 2: if sys.argv[1] == "-h": - print """Program to convert sscan MDA files to HDF5. /n - If one argument is given, assume it is a path to an MDA file. /n + print """Program to convert sscan MDA files to HDF5. \n + If one argument is given, assume it is a path to an MDA file. \n The HDF5 file will be placed in the same directory. Alternatively, give file and a directory in which to place HDF file. """ @@ -384,4 +400,4 @@ def frun_append(input_MDA = '7bmb1_0260.mda',MDA_directory = '/data/Data/SprayDa frun_main(sys.argv[1],sys.argv[2]) elif len(sys.argv) == 4 and os.path.isdir(sys.argv[2]): frun_main(sys.argv[1],sys.argv[2],sys.argv[3]) - \ No newline at end of file + From 9d46ef3560992f3126e000c6c1f3589a6f46ac31 Mon Sep 17 00:00:00 2001 From: Daniel Duke Date: Thu, 23 Feb 2017 17:23:41 -0600 Subject: [PATCH 2/5] Merge multiple 1D scans Script can merge multiple 1D scans (performed using a pyEpics script loop for example), and generate 2D arrays in a single HDF5 file for each PV. For an MCA, it can take the 2D array produced by a ScanH for the array record wrapped in Scan1, and output a full 3D array. Very useful for merging fluorescence detector spectra for 2D raster scanning experiments. --- Library/Merge_MCA_HDF5_Scans.py | 228 ++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) create mode 100644 Library/Merge_MCA_HDF5_Scans.py diff --git a/Library/Merge_MCA_HDF5_Scans.py b/Library/Merge_MCA_HDF5_Scans.py new file mode 100644 index 0000000..0069b8e --- /dev/null +++ b/Library/Merge_MCA_HDF5_Scans.py @@ -0,0 +1,228 @@ +""" Module to merge sets of HDF5-converted MCA scans into a single set of 3-dimensional arrays. +Useful, for example, for merging a set of 1-D scans from a fluorescence detector into a single file. +The array record will end up being of size (N_outer (pyEpics loop), N_inner (scan1), N_array (scanH)). + +Daniel Duke, ES +Started February 23, 2017 + +""" + +import h5py, sys, os +import numpy as np + + +# Merge h5py dataset "ds" into h5py container "dest". +# Preserves variable attributes by indexing them with an integer if necessary. +def mergeDatasets(ds,dest): + + if not ds.name in dest: + # If nothing there, just blind copy + dest.copy(ds,ds.name) + + else: + # Get existing dataset & its attributes from destination, and delete old version. + ds_existing = dest[ds.name][...] + ds_existing_attrs = dict(dest[ds.name].attrs) + del dest[ds.name] + + # Merge to new version + ds_new = np.vstack((ds_existing,ds[...])) + + # Create new destination dataset with compression ON + ds_new_obj=dest.create_dataset(ds.name,data=ds_new,compression='gzip',compression_opts=4) + + # Preserve attributes from prior instance. + for a in ds_existing_attrs: + ds_new_obj.attrs[a] = ds_existing_attrs[a] + + # Merge in attributes from the new dataset. + # No fancy array indexing for dataset attributes (it's probably unnecessary). + # Just append indices to the attribute names if they aren't constant. + for a in ds.attrs: + # Copy new attr + if not a in ds_new_obj.attrs: ds_new_obj.attrs[a]=ds.attrs[a] + # Merge existing attr if not constant + elif ds_new_obj.attrs[a] != ds.attrs[a]: + ds_new_obj.attrs[a+"_%i" % ds_new_obj.shape[0]]=ds.attrs[a] + return + + + +# Merge group attributes by converting variable attributes into arrays. +def mergeGroupAttrs(obj,dest): + # Loop attributes + for key in obj.attrs.keys(): + + if 'attr_'+key in dest: + # Merge attribute into existing dataset + assert(isinstance(dest['attr_'+key],h5py.Dataset)) + merged_attr_data=np.array(np.hstack(( dest['attr_'+key][...].ravel(), obj.attrs[key] ))) + # Delete previous dataset + del dest['attr_'+key] + else: + merged_attr_data=np.array([obj.attrs[key]]) + + # Make attribute into dataset + new_attr_ds = dest.create_dataset('attr_'+key, data=merged_attr_data,\ + compression='gzip',compression_opts=4) + # Set a flag telling us that this dataset came from a group attribute. + # This will avoid problems in the recursive merging of other datasets. + new_attr_ds.attrs['from_group_attribute']=1 + return + + + + +# Recursive function which duplicates the structure of HDF5 files +def mergeStructure(source, dest): + for obj in source.values(): + if isinstance(obj,h5py.Dataset): + # Copy/merge dataset + mergeDatasets(obj,dest) + elif isinstance(obj,h5py.Group): + # Recurse into group + grpname = obj.name + # Check for pre-existing + if grpname in dest: destgrp=dest[grpname] + else: destgrp=dest.create_group(grpname) + mergeStructure(obj, destgrp) + # Merge group attributes - particularly useful for Extra_PVs group. + if len(obj.attrs.keys()) > 0: mergeGroupAttrs(obj,destgrp) + return + + + +# Post-merge cleanup : any group attributes that were constant will be arrays +# full of the same value. We can get rid of these and convert them back to 1D +# attributes to save space. +def collapseConstantAttrArrays(name, obj): + if 'from_group_attribute' in obj.attrs: + if obj.attrs['from_group_attribute']==1: + data=obj[...] + if np.all(data==data.ravel()[0]): # If all entries in array identical... + newname = ''.join(os.path.basename(name).split('attr_')) + if 'mca_' in name: + # If the attr array comes from the MCA record, put it with + # one of the MCA arrays. + p=[ mds for mds in obj.parent.values() if 'mca_' in mds.name\ + and not 'attr' in mds.name ] + if len(p)==0: p=obj.parent + else: p=p[0] + newname = ''.join(os.path.basename(newname).split('mca_')) + else: + # Otherwise store it at parent group level (ie Extra PVs group) + p=obj.parent + + p.attrs[newname]=data.ravel()[0] + obj.attrs['from_group_attribute']=-1 # flag for deletion + return + + + +# Post-merge cleanup : delete group attr arrays flagged with from_group_attribute==-1 +def deleteFlaggedAttrArrays(h5obj): + ndeleted=0 + for obj in h5obj.values(): + if isinstance(obj,h5py.Dataset): + if 'from_group_attribute' in obj.attrs: + if obj.attrs['from_group_attribute']==-1: + #print 'delete',obj.name + del h5obj[obj.name] + ndeleted+=1 + elif isinstance(obj,h5py.Group): + ndeleted+=deleteFlaggedAttrArrays(obj) + return ndeleted + + +# Merge groups containing 2D MCA arrays into 3D arrays. +def make3dMCAArrays(dest, groups): + + # Get the motor name and positioner values + motor = groups[0].split('.VAL=')[0] + print "The positioner for the scan was",motor + positionerValues=np.array([float(g.split('.VAL=')[1]) for g in groups]) + + # Record separate dataset with positioner values from group names, + # in case this varies from the detector value. + dsetname=motor+'.VAL from group names' + dest.create_dataset(dsetname,data=positionerValues,\ + compression='gzip',compression_opts=4) + print "Wrote",dsetname + + # Loop through arrays inside each group + for dsname in dest[groups[0]].keys(): + orig_shape = dest[groups[0]+'/'+dsname].shape + dtype = dest[groups[0]+'/'+dsname].dtype + new_shape = tuple(np.hstack((orig_shape[0],len(groups),orig_shape[1:])).astype(int)) + print "\tMerging",dsname,new_shape,dtype + ds_new = dest.create_dataset('mca_'+dsname,shape=new_shape,dtype=dtype,\ + compression='gzip',compression_opts=4) + + # Loop through source groups. Keep indexing same as for positionerValues! + for i in range(len(groups)): + copy_from = dest[groups[i]+'/'+dsname][...] + ds_new[:,i,...]=copy_from + + # Copy attribute + for a in dest[groups[i]+'/'+dsname].attrs: + if not a in ds_new.attrs: ds_new.attrs[a] = dest[groups[i]+'/'+dsname].attrs[a] + + + # Delete original groups + for g in groups: + del dest[g] + + + +if __name__=='__main__': + # Parse command line inputs + if len(sys.argv)<4: + print "Usage: %s hdf5-file hdf5-file [hdf5-file...] output-hdf5-file" % sys.argv[0] + exit() + elif sys.argv[1].strip().lower() == '-h': + print "Usage: %s hdf5-file hdf5-file [hdf5-file...] output-hdf5-file" % sys.argv[0] + exit() + + inputFiles = sys.argv[1:-1] + outputFile = sys.argv[-1] + print "\n=== Starting Merge_MCA_HDF5_Scans ===" + print "Reading %i input files -- writing to %s" % (len(inputFiles),outputFile) + + # Open output file - overwrite existing + hout = h5py.File(outputFile,'w') + + # Loop input files + print "\nMerging matching arrays and groups between files..." + for inputFilename in inputFiles: + print "Reading",inputFilename,"..." + hin = h5py.File(inputFilename,'r') + mergeStructure(hin, hout) + hin.close() + # End reading input + + print "\nDone merging top level arrays between files" + + # Force file sync! + hout.close() + hout = h5py.File(outputFile,'a') + + ''' Now we can check if there is another level to merge. + The first pass of merging will combine MCAs between the hdf5 files. + If each file is a 2-D record with an outer positioner, then those positions + will remain seperated into seperate groups i.e. 'MOTOR.VAL=POS'. + + This part of the code can combined these groups into a single 3-D array. + ''' + groups = [ key for key in hout.keys() if '.VAL=' in key ] + if len(groups) > 0: + print "\nMerging inner scans: %i positions detected" % len(groups) + make3dMCAArrays(hout, groups) + + # Now go through the file and collapse any attribute arrays that are constant. + # We do this mainly because the heirarchy could get very messy without it. + print "\nCleaning up..." + hout.visititems(collapseConstantAttrArrays) + print "Collapsed %i attribute arrays with constant value" % deleteFlaggedAttrArrays(hout) + + hout.close() + print "\nDone!" From cd783187b3deb8ba2840322213a3d0dc7c6cc247 Mon Sep 17 00:00:00 2001 From: Daniel Duke Date: Fri, 24 Feb 2017 15:49:53 -0600 Subject: [PATCH 3/5] Support for unequal array sizes This is a more powerful version of ALK's Combine_Scans script. Can now handle unequal array sizes by RHS-padding with NaN. This is useful when a scan gets aborted or scans of unequal length are merged. --- Library/Merge_MCA_HDF5_Scans.py | 39 ++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/Library/Merge_MCA_HDF5_Scans.py b/Library/Merge_MCA_HDF5_Scans.py index 0069b8e..5bf5540 100644 --- a/Library/Merge_MCA_HDF5_Scans.py +++ b/Library/Merge_MCA_HDF5_Scans.py @@ -5,11 +5,34 @@ Daniel Duke, ES Started February 23, 2017 +Changelog: + Feb 24, 2017: Added support for non matching array sizes to mergeDatasets & make3dMCAArrays. + It pads with NaN on the right side of the array. + This is useful when a scan gets aborted or scans of unequal length are merged. + """ import h5py, sys, os import numpy as np +# Add new array ds to array ds_existing. +# Expand or pad with NaN where sizes don't match. +def combineArrays(ds_existing, ds): + # Merge to new version + if ds_existing.shape[-1] == len(ds): + # Size matches! + ds_new = np.vstack((ds_existing,ds[...])) + elif ds_existing.shape[-1] > len(ds): + # new data smaller than expected + padding = np.tile(np.nan,np.abs(ds_existing.shape[-1] - len(ds))) + ds_new = np.vstack((ds_existing, np.hstack(( ds[...], padding )))) + elif ds_existing.shape[-1] < len(ds): + # new data larger than expected + padding = np.tile(np.nan,(ds_existing.shape[0],np.abs(ds_existing.shape[-1] - len(ds)))) + ds_existing = np.vstack(( ds_existing, padding )) + ds_new = np.vstack((ds_existing,ds[...])) + + return ds_new # Merge h5py dataset "ds" into h5py container "dest". # Preserves variable attributes by indexing them with an integer if necessary. @@ -25,8 +48,8 @@ def mergeDatasets(ds,dest): ds_existing_attrs = dict(dest[ds.name].attrs) del dest[ds.name] - # Merge to new version - ds_new = np.vstack((ds_existing,ds[...])) + # Merge arrays + ds_new = combineArrays(ds_existing, ds) # Create new destination dataset with compression ON ds_new_obj=dest.create_dataset(ds.name,data=ds_new,compression='gzip',compression_opts=4) @@ -151,17 +174,21 @@ def make3dMCAArrays(dest, groups): # Loop through arrays inside each group for dsname in dest[groups[0]].keys(): - orig_shape = dest[groups[0]+'/'+dsname].shape - dtype = dest[groups[0]+'/'+dsname].dtype + # Find group with biggest array + totalArrSz = [np.product(dest[groups[j]+'/'+dsname].shape) for j in range(len(groups))] + j = np.where(totalArrSz == np.max(totalArrSz))[0][0] + orig_shape = dest[groups[j]+'/'+dsname].shape + dtype = dest[groups[j]+'/'+dsname].dtype new_shape = tuple(np.hstack((orig_shape[0],len(groups),orig_shape[1:])).astype(int)) print "\tMerging",dsname,new_shape,dtype ds_new = dest.create_dataset('mca_'+dsname,shape=new_shape,dtype=dtype,\ compression='gzip',compression_opts=4) - + if not 'S' in str(dtype): ds_new[...]=np.nan # fill numeric arrays with NaN in empty places + # Loop through source groups. Keep indexing same as for positionerValues! for i in range(len(groups)): copy_from = dest[groups[i]+'/'+dsname][...] - ds_new[:,i,...]=copy_from + ds_new[:copy_from.shape[0],i,...]=copy_from # Allow variable number of points in scans in files, pad with NaN on RHS. # Copy attribute for a in dest[groups[i]+'/'+dsname].attrs: From cf7b36ac7fb1cd0f1c5995bb8c9563a233fc2f39 Mon Sep 17 00:00:00 2001 From: Daniel Duke Date: Sat, 25 Feb 2017 20:19:19 -0600 Subject: [PATCH 4/5] Bug fix in combineArrays --- Library/Merge_MCA_HDF5_Scans.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Library/Merge_MCA_HDF5_Scans.py b/Library/Merge_MCA_HDF5_Scans.py index 5bf5540..9f5596c 100644 --- a/Library/Merge_MCA_HDF5_Scans.py +++ b/Library/Merge_MCA_HDF5_Scans.py @@ -9,6 +9,7 @@ Feb 24, 2017: Added support for non matching array sizes to mergeDatasets & make3dMCAArrays. It pads with NaN on the right side of the array. This is useful when a scan gets aborted or scans of unequal length are merged. + Feb 25, 2017: Bug fix in combineArrays """ @@ -29,7 +30,7 @@ def combineArrays(ds_existing, ds): elif ds_existing.shape[-1] < len(ds): # new data larger than expected padding = np.tile(np.nan,(ds_existing.shape[0],np.abs(ds_existing.shape[-1] - len(ds)))) - ds_existing = np.vstack(( ds_existing, padding )) + ds_existing = np.hstack(( ds_existing, padding )) ds_new = np.vstack((ds_existing,ds[...])) return ds_new @@ -122,7 +123,7 @@ def collapseConstantAttrArrays(name, obj): if 'from_group_attribute' in obj.attrs: if obj.attrs['from_group_attribute']==1: data=obj[...] - if np.all(data==data.ravel()[0]): # If all entries in array identical... + if len(np.unique(data))<2: # If all entries in array identical... newname = ''.join(os.path.basename(name).split('attr_')) if 'mca_' in name: # If the attr array comes from the MCA record, put it with @@ -136,7 +137,8 @@ def collapseConstantAttrArrays(name, obj): # Otherwise store it at parent group level (ie Extra PVs group) p=obj.parent - p.attrs[newname]=data.ravel()[0] + if len(data.shape)==1: p.attrs[newname]=data[0] + else: p.attrs[newname]=data[0,0] obj.attrs['from_group_attribute']=-1 # flag for deletion return @@ -149,7 +151,7 @@ def deleteFlaggedAttrArrays(h5obj): if isinstance(obj,h5py.Dataset): if 'from_group_attribute' in obj.attrs: if obj.attrs['from_group_attribute']==-1: - #print 'delete',obj.name + #print '\tdeleting',obj.name del h5obj[obj.name] ndeleted+=1 elif isinstance(obj,h5py.Group): @@ -249,6 +251,12 @@ def make3dMCAArrays(dest, groups): # We do this mainly because the heirarchy could get very messy without it. print "\nCleaning up..." hout.visititems(collapseConstantAttrArrays) + + # Force file sync! + hout.close() + hout = h5py.File(outputFile,'a') + + # Delete superfluous arrays print "Collapsed %i attribute arrays with constant value" % deleteFlaggedAttrArrays(hout) hout.close() From 445cc5685fd267a29d242826e8a843e5ce06e06f Mon Sep 17 00:00:00 2001 From: Daniel Duke Date: Sat, 11 Mar 2017 23:26:33 +1100 Subject: [PATCH 5/5] Add sorting to group names Inner positioner group names generated by MDA2HDF5_Fluorescence are now sorted by a float conversion of everything that comes after the = sign in the HDF group name. This makes it easy to graph results using HDFView's Image mode because the scans are pre-sorted. --- Library/Merge_MCA_HDF5_Scans.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Library/Merge_MCA_HDF5_Scans.py b/Library/Merge_MCA_HDF5_Scans.py index 9f5596c..63a92c9 100644 --- a/Library/Merge_MCA_HDF5_Scans.py +++ b/Library/Merge_MCA_HDF5_Scans.py @@ -10,6 +10,7 @@ It pads with NaN on the right side of the array. This is useful when a scan gets aborted or scans of unequal length are merged. Feb 25, 2017: Bug fix in combineArrays + Mar 11, 2017: Update sorting algorithm for key names for inner positioner """ @@ -35,6 +36,7 @@ def combineArrays(ds_existing, ds): return ds_new + # Merge h5py dataset "ds" into h5py container "dest". # Preserves variable attributes by indexing them with an integer if necessary. def mergeDatasets(ds,dest): @@ -190,7 +192,8 @@ def make3dMCAArrays(dest, groups): # Loop through source groups. Keep indexing same as for positionerValues! for i in range(len(groups)): copy_from = dest[groups[i]+'/'+dsname][...] - ds_new[:copy_from.shape[0],i,...]=copy_from # Allow variable number of points in scans in files, pad with NaN on RHS. + # Allow variable number of points in scans in files, pad with NaN on RHS. + ds_new[:copy_from.shape[0],i,...]=copy_from.copy() # Copy attribute for a in dest[groups[i]+'/'+dsname].attrs: @@ -242,9 +245,13 @@ def make3dMCAArrays(dest, groups): This part of the code can combined these groups into a single 3-D array. ''' - groups = [ key for key in hout.keys() if '.VAL=' in key ] + groups = [ key for key in hout.keys() if '.VAL=' in key ] # positions to merge + if len(groups) > 0: print "\nMerging inner scans: %i positions detected" % len(groups) + # Sorting + grpsort = np.argsort([ float(nm.split('=')[-1]) for nm in groups ]) + groups = [ groups[i] for i in grpsort ] make3dMCAArrays(hout, groups) # Now go through the file and collapse any attribute arrays that are constant.