Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 27 additions & 29 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Makefile for Arkouda
ARKOUDA_PROJECT_DIR := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))
ARKOUDA_PROJECT_DIR := $(patsubst %/,%,$(ARKOUDA_PROJECT_DIR))

PROJECT_NAME := arkouda
ARKOUDA_SOURCE_DIR := $(ARKOUDA_PROJECT_DIR)/src
Expand Down Expand Up @@ -114,7 +115,7 @@ deps-download-source: zmq-download-source hdf5-download-source arrow-download-so

DEP_DIR := dep
DEP_INSTALL_DIR := $(ARKOUDA_PROJECT_DIR)/$(DEP_DIR)
DEP_BUILD_DIR := $(ARKOUDA_PROJECT_DIR)$(DEP_DIR)/build
DEP_BUILD_DIR := $(ARKOUDA_PROJECT_DIR)/$(DEP_DIR)/build

ZMQ_VER := 4.3.5
ZMQ_NAME_VER := zeromq-$(ZMQ_VER)
Expand All @@ -128,8 +129,8 @@ zmq-download-source:
ifeq (,$(wildcard ${ZMQ_BUILD_DIR}*/.*))
# If the tar.gz not found, download it
ifeq (,$(wildcard ${DEP_BUILD_DIR}/${ZMQ_NAME_VER}*.tar.gz))
cd $(DEP_BUILD_DIR) && curl -sL $(ZMQ_LINK) | tar xz
# Otherwise just unzip it
cd $(DEP_BUILD_DIR) && curl -sL $(ZMQ_LINK) | tar xz
# Otherwise just unzip it
else
cd $(DEP_BUILD_DIR) && tar -xzf $(ZMQ_NAME_VER)*.tar.gz
endif
Expand Down Expand Up @@ -166,18 +167,18 @@ hdf5-download-source:
ifeq (,$(wildcard ${HDF5_BUILD_DIR}*/.*))
# If the tar.gz not found, download it
ifeq (,$(wildcard ${DEP_BUILD_DIR}/$(HDF5_NAME_VER)*tar.gz))
cd $(DEP_BUILD_DIR) && curl -sL $(HDF5_LINK) | tar xz
# Otherwise just unzip it
cd $(DEP_BUILD_DIR) && curl -sL $(HDF5_LINK) | tar xz
# Otherwise just unzip it
else
cd $(DEP_BUILD_DIR) && tar -xzf $(HDF5_NAME_VER)*.tar.gz
endif
endif
endif

install-hdf5: hdf5-download-source
@echo "Installing HDF5"
rm -rf $(HDF5_INSTALL_DIR)
mkdir -p $(DEP_INSTALL_DIR) $(DEP_BUILD_DIR)

cd $(HDF5_BUILD_DIR)* && ./configure --prefix=$(HDF5_INSTALL_DIR) --enable-optimization=high --enable-hl && make && make install
echo '$$(eval $$(call add-path,$(HDF5_INSTALL_DIR)))' >> Makefile.paths

Expand Down Expand Up @@ -218,9 +219,8 @@ arrow-download-source:
mkdir -p $(ARROW_DEP_DIR)
cd $(ARROW_BUILD_DIR)/cpp/thirdparty/ && ./download_dependencies.sh $(ARROW_DEP_DIR) > $(DEP_BUILD_DIR)/arrow_exports.sh
endif

rm -fr $(ARROW_BUILD_DIR)


install-arrow: arrow-download-source
@echo "Installing Apache Arrow/Parquet"
Expand All @@ -230,16 +230,16 @@ install-arrow: arrow-download-source

cd $(DEP_BUILD_DIR) && tar -xvf $(ARROW_NAME_VER).tar.gz
mkdir -p $(ARROW_BUILD_DIR)/cpp/build-release

cd $(DEP_BUILD_DIR) && . ./arrow_exports.sh && cd $(ARROW_BUILD_DIR)/cpp/build-release && cmake -S $(ARROW_BUILD_DIR)/cpp .. -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_INSTALL_PREFIX=$(ARROW_INSTALL_DIR) -DCMAKE_BUILD_TYPE=Release -DARROW_PARQUET=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_BROTLI=ON -DARROW_WITH_BZ2=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_ZLIB=ON -DARROW_WITH_ZSTD=ON -DARROW_DEPENDENCY_SOURCE=$(ARROW_DEPENDENCY_SOURCE) $(ARROW_OPTIONS) && make -j$(NUM_CORES)

cd $(ARROW_BUILD_DIR)/cpp/build-release && make install
echo '$$(eval $$(call add-path,$(ARROW_INSTALL_DIR)))' >> Makefile.paths

echo '$$(eval $$(call add-path,$(ARROW_INSTALL_DIR)))' >> Makefile.paths

arrow-clean:
rm -rf $(DEP_BUILD_DIR)/apache-arrow*
rm -rf $(DEP_BUILD_DIR)/arrow-apache-arrow*
rm -rf $(DEP_BUILD_DIR)/arrow-apache-arrow*
rm -rf $(ARROW_DEP_DIR)
rm -fr $(DEP_BUILD_DIR)/arrow_exports.sh

Expand Down Expand Up @@ -268,23 +268,23 @@ ICONV_LINK := https://ftp.gnu.org/pub/gnu/libiconv/libiconv-$(ICONV_VER).tar.gz

iconv-download-source:
mkdir -p $(DEP_BUILD_DIR)

#If the build directory does not exist, create it
ifeq (,$(wildcard ${ICONV_BUILD_DIR}*/.*))
# If the tar.gz not found, download it
ifeq (,$(wildcard ${DEP_BUILD_DIR}/libiconv-${ICONV_VER}.tar.gz))
cd $(DEP_BUILD_DIR) && curl -sL $(ICONV_LINK) | tar xz
# Otherwise just unzip it
cd $(DEP_BUILD_DIR) && curl -sL $(ICONV_LINK) | tar xz
# Otherwise just unzip it
else
cd $(DEP_BUILD_DIR) && tar -xzf libiconv-$(ICONV_VER).tar.gz
endif
endif
endif

install-iconv: iconv-download-source
@echo "Installing iconv"
rm -rf $(ICONV_INSTALL_DIR)
mkdir -p $(DEP_INSTALL_DIR) $(DEP_BUILD_DIR)

cd $(ICONV_BUILD_DIR) && ./configure --prefix=$(ICONV_INSTALL_DIR) && make && make install
echo '$$(eval $$(call add-path,$(ICONV_INSTALL_DIR)))' >> Makefile.paths

Expand All @@ -299,23 +299,23 @@ LIBIDN_LINK := https://ftp.gnu.org/gnu/libidn/libidn2-$(LIBIDN_VER).tar.gz

idn2-download-source:
mkdir -p $(DEP_BUILD_DIR)

#If the build directory does not exist, create it
ifeq (,$(wildcard $(LIBIDN_BUILD_DIR)*/.*))
# If the tar.gz is not found, download it
# If the tar.gz is not found, download it
ifeq (,$(wildcard ${DEP_BUILD_DIR}/libidn2-$(LIBIDN_VER)*.tar.gz))
cd $(DEP_BUILD_DIR) && curl -sL $(LIBIDN_LINK) | tar xz
# Otherwise just unzip it
cd $(DEP_BUILD_DIR) && curl -sL $(LIBIDN_LINK) | tar xz
# Otherwise just unzip it
else
cd $(DEP_BUILD_DIR) && tar -xzf libidn2-$(LIBIDN_VER)*.tar.gz
endif
endif
endif

install-idn2: idn2-download-source
@echo "Installing libidn2"
rm -rf $(LIBIDN_INSTALL_DIR)
mkdir -p $(DEP_INSTALL_DIR) $(DEP_BUILD_DIR)
mkdir -p $(DEP_INSTALL_DIR) $(DEP_BUILD_DIR)

cd $(LIBIDN_BUILD_DIR) && ./configure --prefix=$(LIBIDN_INSTALL_DIR) && make && make install
echo '$$(eval $$(call add-path,$(LIBIDN_INSTALL_DIR)))' >> Makefile.paths

Expand All @@ -327,7 +327,7 @@ BLOSC_INSTALL_DIR := $(DEP_INSTALL_DIR)/c-blosc-install

blosc-download-source:
mkdir -p $(DEP_BUILD_DIR)

#If the build directory does not exist, create it
ifeq (,$(wildcard $(BLOSC_BUILD_DIR)/.*))
cd $(DEP_BUILD_DIR) && git clone https://github.com/Blosc/c-blosc2.git
Expand Down Expand Up @@ -760,8 +760,6 @@ benchmark:
python3 -m pytest -c benchmark.ini --benchmark-autosave --benchmark-storage=file://benchmark_v2/.benchmarks --size=$(size_bm) --benchmark-json=$(out)
python3 benchmark_v2/reformat_benchmark_results.py --benchmark-data $(out)



version:
@echo $(VERSION);

Expand Down
98 changes: 87 additions & 11 deletions arkouda/numpy/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -1772,8 +1772,27 @@ def where(
return create_pdarray(type_cast(str, repMsg))


# histogram helper
def _pyrange(count):
"""Simply makes a range(count). For use in histogram* functions
that, like in numpy, have a 'range' parameter."""
return range(count)


# histogram helper, to avoid typechecker errors
def _conv_dim(sampleDim, rangeDim):
if rangeDim:
return (rangeDim[0], rangeDim[1])
else:
return (sampleDim.min(), sampleDim.max())


@typechecked
def histogram(pda: pdarray, bins: int_scalars = 10) -> Tuple[pdarray, pdarray]:
def histogram(
pda: pdarray,
bins: int_scalars = 10,
range: Optional[Tuple[numeric_scalars, numeric_scalars]] = None,
) -> Tuple[pdarray, pdarray]:
"""
Compute a histogram of evenly spaced bins over the range of an array.

Expand All @@ -1785,6 +1804,11 @@ def histogram(pda: pdarray, bins: int_scalars = 10) -> Tuple[pdarray, pdarray]:
bins : int_scalars, default=10
The number of equal-size bins to use (default: 10)

range : (minVal, maxVal), optional
The range of the values to count.
Values outside of this range are dropped.
By default, all values are counted.

Returns
-------
(pdarray, Union[pdarray, int64 or float64])
Expand All @@ -1807,6 +1831,7 @@ def histogram(pda: pdarray, bins: int_scalars = 10) -> Tuple[pdarray, pdarray]:
Notes
-----
The bins are evenly spaced in the interval [pda.min(), pda.max()].
If range parameter is provided, the interval is [range[0], range[1]].

Examples
--------
Expand All @@ -1828,15 +1853,25 @@ def histogram(pda: pdarray, bins: int_scalars = 10) -> Tuple[pdarray, pdarray]:
"""
if bins < 1:
raise ValueError("bins must be 1 or greater")
b = linspace(pda.min(), pda.max(), bins + 1)
repMsg = generic_msg(cmd="histogram", args={"array": pda, "bins": bins})

minVal, maxVal = _conv_dim(pda, range)

b = linspace(minVal, maxVal, bins + 1)
repMsg = generic_msg(
cmd="histogram", args={"array": pda, "bins": bins, "minVal": minVal, "maxVal": maxVal}
)
return create_pdarray(type_cast(str, repMsg)), b


# Typechecking removed due to circular dependencies with arrayview
# @typechecked
def histogram2d(
x: pdarray, y: pdarray, bins: Union[int_scalars, Sequence[int_scalars]] = 10
x: pdarray,
y: pdarray,
bins: Union[int_scalars, Sequence[int_scalars]] = 10,
range: Optional[
Tuple[Tuple[numeric_scalars, numeric_scalars], Tuple[numeric_scalars, numeric_scalars]]
] = None,
) -> Tuple[pdarray, pdarray, pdarray]:
"""
Compute the bi-dimensional histogram of two data samples with evenly spaced bins
Expand All @@ -1855,6 +1890,11 @@ def histogram2d(
If [int, int], the number of bins in each dimension (nx, ny = bins).
Defaults to 10

range : ((xMin, xMax), (yMin, yMax)), optional
The ranges of the values in x and y to count.
Values outside of these ranges are dropped.
By default, all values are counted.

Returns
-------
hist : pdarray
Expand Down Expand Up @@ -1887,6 +1927,8 @@ def histogram2d(
-----
The x bins are evenly spaced in the interval [x.min(), x.max()]
and y bins are evenly spaced in the interval [y.min(), y.max()].
If range parameter is provided, the intervals are given
by range[0] for x and range[1] for y..

Examples
--------
Expand All @@ -1909,11 +1951,28 @@ def histogram2d(
if len(bins) != 2:
raise ValueError("Sequences of bins must contain two elements (num_x_bins, num_y_bins)")
x_bins, y_bins = bins
x_bins, y_bins = int(x_bins), int(y_bins)
if x_bins < 1 or y_bins < 1:
raise ValueError("bins must be 1 or greater")
x_bin_boundaries = linspace(x.min(), x.max(), x_bins + 1)
y_bin_boundaries = linspace(y.min(), y.max(), y_bins + 1)
repMsg = generic_msg(cmd="histogram2D", args={"x": x, "y": y, "xBins": x_bins, "yBins": y_bins})

xMin, xMax = _conv_dim(x, range[0] if range else None)
yMin, yMax = _conv_dim(y, range[1] if range else None)

x_bin_boundaries = linspace(xMin, xMax, x_bins + 1)
y_bin_boundaries = linspace(yMin, yMax, y_bins + 1)
repMsg = generic_msg(
cmd="histogram2D",
args={
"x": x,
"y": y,
"xBins": x_bins,
"yBins": y_bins,
"xMin": xMin,
"xMax": xMax,
"yMin": yMin,
"yMax": yMax,
},
)
return (
create_pdarray(type_cast(str, repMsg)).reshape(x_bins, y_bins),
x_bin_boundaries,
Expand All @@ -1922,7 +1981,9 @@ def histogram2d(


def histogramdd(
sample: Sequence[pdarray], bins: Union[int_scalars, Sequence[int_scalars]] = 10
sample: Sequence[pdarray],
bins: Union[int_scalars, Sequence[int_scalars]] = 10,
range: Optional[Sequence[Optional[Tuple[numeric_scalars, numeric_scalars]]]] = None,
) -> Tuple[pdarray, Sequence[pdarray]]:
"""
Compute the multidimensional histogram of data in sample with evenly spaced bins.
Expand All @@ -1938,6 +1999,11 @@ def histogramdd(
If [int, int, ...], the number of bins in each dimension (nx, ny, ... = bins).
Defaults to 10

range : Sequence[optional (minVal, maxVal)], optional
The ranges of the values to count for each array in sample.
Values outside of these ranges are dropped.
By default, all values are counted.

Returns
-------
hist : pdarray
Expand All @@ -1964,6 +2030,7 @@ def histogramdd(
Notes
-----
The bins for each dimension, m, are evenly spaced in the interval [m.min(), m.max()]
or in the inverval determined by range[dimension], if provided.

Examples
--------
Expand Down Expand Up @@ -1996,17 +2063,26 @@ def histogramdd(
if any(b < 1 for b in bins):
raise ValueError("bins must be 1 or greater")

if not range:
range = [None for pda in sample]
elif len(range) != num_dims:
raise ValueError("The range sequence contains a different number of elements than the sample")

range_list = [_conv_dim(sample[i], range[i]) for i in _pyrange(num_dims)]

bins = list(bins) if isinstance(bins, tuple) else bins
sample = list(sample) if isinstance(sample, tuple) else sample
bin_boundaries = [linspace(a.min(), a.max(), b + 1) for a, b in zip(sample, bins)]
bins_pda = array(bins)[::-1]
dim_prod = (cumprod(bins_pda) // bins_pda)[::-1]
bin_boundaries = [linspace(r[0], r[1], b + 1) for r, b in zip(range_list, bins)]
d_curr, d_next = 1, 1
dim_prod = [(d_curr := d_next, d_next := d_curr * int(v))[0] for v in bins[::-1]][::-1] # noqa: F841
repMsg = generic_msg(
cmd="histogramdD",
args={
"sample": sample,
"num_dims": num_dims,
"bins": bins,
"rangeMin": [r[0] for r in range_list],
"rangeMax": [r[1] for r in range_list],
"dim_prod": dim_prod,
"num_samples": sample[0].size,
},
Expand Down
Loading
Loading