UWB-Biocomputing · marinarosenwald · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025
diff --git a/GNN/TemporalGAT/Data_Preprocessing/DataAquisitionPreprocessingPipeline.png b/GNN/TemporalGAT/Data_Preprocessing/DataAquisitionPreprocessingPipeline.png
diff --git a/GNN/TemporalGAT/Data_Preprocessing/getBinnedBurstInfo.py b/GNN/TemporalGAT/Data_Preprocessing/getBinnedBurstInfo.py
@@ -0,0 +1,118 @@
+'''
+GETBINNEDBURSTINFO Python implementation of GETBINNEDBURSTINFO.m:
+    GETBINNEDBURSTINFO Identify bursts using binned data and save burst info
+
+    This function reads the BrainGrid result (.h5 file), specifically
+    spikesHistory (contains spike Count for every 10ms bin; it is also the name of the dataset being read), 
+    identify burst using burst threshold of 0.5 spikes/sec/neuron,
+    calculate burst information from those bins above the threshold,
+    and store them in allBinnedBurstInfo.csv.
+
+    List below are those burst information and how they are calculated:
+    ID                  - the burst ID
+    startBin#           - The starting bin number is the bin that is above the threshold.
+    endBin#             - The ending bin number is the next bin that is above the threshold.
+    width(bins)         - With the starting/ending bin number, the difference is calculated as the width 
+                            representing the number of bins in between each burst above the threshold.
+    totalSpikeCount     - the sum of all the spikesPerBin between the starting and ending bin number
+    peakBin             - startBin# + peakHeightIndex(index of the bin with the peakHeight) + 1
+    peakHeight(spikes)  - calculated by finding the bin with the highest spike count between each burst
+    Interval(bins)      - difference between current peakHeight and previous peakHeight
+                        - previous peakHeight is initialized as 0
+
+
+    Author:   Jewel Y. Lee (jewel.yh.lee@gmail.com)
+    Last updated: 02/10/2022 added Documentation, cleaned redundant code
+    Last updated by: Vu T. Tieu (vttieu1995@gmail.com)
+
+Syntax: getBinnedBurstInfo(h5dir)
+
+Input:
+datasetName - Graphitti dataset the entire path can be used; for example
+              '/CSSDIV/research/biocomputing/data/2025/tR_1.0--fE_0.90_10000'
+
+Output:
+  - <allbinnedBurstInfo.csv> - burst metadata. The columns of the csv are:
+                               burst ID, startBin#, endBin#, width(bins),
+                               totalSpikeCount, peakBin,
+                               peakHeight(spikes), Interval(bins)
+
+Author: Marina Rosenwald (marinarosenwald@gmail.com)
+Last updated: 12/16/2025    
+'''
+
+
+import h5py
+import numpy as np
+import csv
+import os
+import sys
+import time
+
+def getBinnedBurstInfo(datasetName, adjustedBurstThreshold=0.005):
+
+    with h5py.File(f"{datasetName}.h5", "r") as f:
+        if '/neuronTypes' in f:
+            nNeurons = f['/neuronTypes'].shape[0]
+        elif '/spikesProbedNeurons' in f:
+            nNeurons = f['/spikesProbedNeurons'].shape[0]
+        else:
+            raise KeyError("No neuronTypes or spikesProbedNeurons dataset found")
+
+        spikesPerBin = f['/spikesHistory'][:].astype(float)
+    nNeurons=10000
+    spikesPerNeuronPerBin = spikesPerBin / nNeurons
+    binsAboveThreshold = np.where(spikesPerNeuronPerBin >= adjustedBurstThreshold)[0]
+
+    if len(binsAboveThreshold) == 0:
+        print("No bursts detected above threshold")
+        return
+
+    burstBoundaries = np.where(np.diff(binsAboveThreshold) > 1)[0]
+    burstBoundaries = np.concatenate(([0], burstBoundaries, [len(binsAboveThreshold) - 1]))
+    nBursts = len(burstBoundaries) - 1
+
+    print(nBursts)
+
+    previousPeak = 0
+    out_path = os.path.join(datasetName, "allBinnedBurstInfo.csv")
+    with open(out_path, "w", newline="") as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow([
+            "ID", "startBin#", "endBin#", "width(bins)", "totalSpikeCount",
+            "peakBin", "peakHeight(spikes)", "Interval(bins)"
+        ])
+        for iBurst in range(nBursts):
+            startBinNum = binsAboveThreshold[burstBoundaries[iBurst]]
+            endBinNum   = binsAboveThreshold[burstBoundaries[iBurst + 1]]
+            burstSlice  = spikesPerBin[startBinNum:endBinNum+1]
+            width = (endBinNum - startBinNum) + 1
+
+            if burstSlice.size == 0:
+                continue 
+            totalSpikeCount = np.sum(burstSlice)
+            peakHeightIndex = np.argmax(burstSlice)
+            peakHeight = burstSlice[peakHeightIndex]
+            peakBin = startBinNum + peakHeightIndex
+            burstPeakInterval = peakBin - previousPeak
+            writer.writerow([
+                iBurst + 1, startBinNum, endBinNum, width,
+                int(totalSpikeCount), peakBin, int(peakHeight), burstPeakInterval
+            ])
+            previousPeak = peakBin
+
+
+    print(f"Saved burst info to {out_path}")
+
+if __name__ == "__main__": 
+    # example execution: python ./getBinnedBurstSpikes.py /CSSDIV/research/biocomputing/data/2025/tR_1.0--fE_0.90_10000
+    h5dir = sys.argv[1]
+
+    start = time.time()
+    getBinnedBurstInfo(h5dir)
+    end = time.time()
+
+    elapsed_time = end - start
+
+
+    print('Elapsed time: ' + str(elapsed_time) + ' seconds')
diff --git a/GNN/TemporalGAT/Data_Preprocessing/getBinnedBurstSpikes.py b/GNN/TemporalGAT/Data_Preprocessing/getBinnedBurstSpikes.py
@@ -0,0 +1,97 @@
+'''
+GETBINNEDBURSTSPIKES Python implementation of GETBINNEDBURSTSPIKES.m:
+    GETBINNEDBURSTSPIKES Get spikes within every burst and save as flattened images of arrays
+    of bursts. These are the results of "spike train binning", mentioned in lee-thesis18 Figure 4.1.
+    Spike train binning adds up the binary values for every. 
+    Read BrainGrid result dataset "spikesProbedNeurons" to retrieve location 
+    of each spike that happended within a burst, and save as flatten image arrays.
+
+    Example of flattened image arrays in a 5x5 matrix:
+        1 1 2 0 1                       
+        1 2 2 1 1
+        1 3 2 1 0
+        2 2 2 3 1
+        0 1 3 2 2
+    Each cell in a matrix this function output represents a pixel in which the number indicates how bright it is
+    with 5 being the highest value. The values in each cell is the result of "spike train binning" (this functions main algorithm)
+    This value can be understood as sum of all spike rate within 100 time step at that specific
+    x and y location.  
+
+    Author:   Jewel Y. Lee (jewel.yh.lee@gmail.com)
+    Updated: 02/22/2022  added documentation and changed output filetype to a single .mat file
+    Updated by: Vu T. Tieu (vttieu1995@gmail.com)
+    Updated: May 2023 minor tweaks
+    Updated by: Michael Stiber
+
+Syntax: getBinnedBurstSpikes(h5dir)
+
+Input:
+datasetName - Graphitti dataset the entire path can be used; for example
+              '/CSSDIV/research/biocomputing/data/2025/tR_1.0--fE_0.90_10000'
+
+Output:
+  - allFrames.npz - collection of flattened image arrays of a burst 
+
+Author: Marina Rosenwald (marinarosenwald@gmail.com)
+Last updated: 12/16/2025
+'''
+
+import h5py
+import numpy as np
+import os
+import sys
+import time
+
+def getBinnedBurstSpikes(h5dir, n_neurons=10000, head=10, tail=0, binSize=10.0, timeStepSize=0.1):
+
+    timeStepsPerBin = int(binSize / timeStepSize) 
+
+    print("Starting read of spikes from HDF5 file...")
+    with h5py.File(f"{h5dir}.h5", "r") as f:
+        spikesProbedNeurons = f['/spikesProbedNeurons'][:]  
+    print("done")
+
+    burstInfoPath = os.path.join(h5dir, "allBinnedBurstInfo.csv")
+    burstInfo = np.loadtxt(burstInfoPath, delimiter=",", skiprows=1, usecols=(1,2,3,4,5,6,7))
+    nBursts = burstInfo.shape[0]
+
+    allFrames = []
+
+    print("Starting burst analysis...")
+    for iBurst in range(nBursts):
+        burstStartBinNum = int(burstInfo[iBurst,0])
+        burstEndBinNum   = int(burstInfo[iBurst,1])
+        width            = int(burstInfo[iBurst,2])
+
+        startingTimeStep = (burstStartBinNum - head + 1) * timeStepsPerBin
+        endingTimeStep   = (burstEndBinNum + tail) * timeStepsPerBin
+
+        frame = np.zeros((n_neurons, width + head + tail), dtype=np.uint16)
+
+        for jNeuron in range(n_neurons):
+            neuronSpikes = spikesProbedNeurons[:, jNeuron]
+            spike_mask = (neuronSpikes >= startingTimeStep) & (neuronSpikes <= endingTimeStep)
+            spike_times = neuronSpikes[spike_mask]
+            for curSpikeTime in spike_times:
+                bin_idx = int((curSpikeTime - startingTimeStep) / timeStepsPerBin)
+                if 0 <= bin_idx < frame.shape[1]:
+                    frame[jNeuron, bin_idx] += 1
+
+        allFrames.append(frame)
+        print(f"\tdone with burst {iBurst+1}/{nBursts}")
+
+    np.savez(os.path.join(h5dir, "allFrames.npz"), *allFrames)
+    print(f"Saved allFrames.npz with {len(allFrames)} bursts and {n_neurons} neurons per frame.")
+
+if __name__ == "__main__": 
+    # example execution: python ./getBinnedBurstSpikes.py /CSSDIV/research/biocomputing/data/2025/tR_1.0--fE_0.90_10000
+    h5dir = sys.argv[1]
+
+    start = time.time()
+    getBinnedBurstSpikes(h5dir)
+    end = time.time()
+
+    elapsed_time = end - start
+
+
+    print('Elapsed time: ' + str(elapsed_time) + ' seconds')