diff --git a/jazzhands/data/test_vectorized_timing.py b/jazzhands/data/test_vectorized_timing.py new file mode 100644 index 0000000..f99ea97 --- /dev/null +++ b/jazzhands/data/test_vectorized_timing.py @@ -0,0 +1,43 @@ +# finds WWZ with and without vectorization +import numpy as np +from tqdm import tqdm +import time +# np.random.seed(sum(map(ord, "wavelets are cool!"))) + +from jazzhands import wavelets + +config = { + "num_steps" : 100, + "num_omegas" : 300, + "num_taus" : 100, + "tmax" : 1, + "xmax" : 100, + "omega_max" : 25 * np.pi, + "tau_max" : 2 * np.pi, + "num_unvec_runs" : 1, + "num_vec_runs" : 1 +} + +for k in config: + exec(k + " = " + str(config.get(k))) + +t = np.linspace(-tmax, tmax, num_steps) +x = np.random.uniform(-xmax, xmax, num_steps) +omegas = np.random.uniform(0, omega_max, (num_omegas,)) +taus = np.random.uniform(0, tau_max, (num_taus,)) + +transformer = wavelets.WaveletTransformer(t, x, omegas=omegas, taus=taus) +print("Starting unvectorized computation of WWT of {0} datapoints over {1} omega values and {2} tau values".format(num_steps, num_omegas, num_taus)) +t0 = time.time() +for _ in range(num_unvec_runs): + transformer.compute_wavelet(vectorized=False) +t1 = time.time() + +print("Starting vectorized computation of WWT of {0} datapoints over {1} omega values and {2} tau values".format(num_steps, num_omegas, num_taus)) +t2 = time.time() +for _ in range(num_vec_runs): + transformer.compute_wavelet(vectorized=True) +t3 = time.time() + +print("Unvectorized computation took {0} seconds for {1} computation(s), for an average of {2} seconds per computation.".format(t1 - t0, num_unvec_runs, (t1 - t0) / num_unvec_runs)) +print("Vectorized computation took {0} seconds for {1} computation(s), for an average of {2} seconds per computation.".format(t3 - t2, num_vec_runs, (t3 - t2) / num_vec_runs)) diff --git a/jazzhands/tests/test_wavelets.py b/jazzhands/tests/test_wavelets.py index a162ad0..9c51aed 100644 --- a/jazzhands/tests/test_wavelets.py +++ b/jazzhands/tests/test_wavelets.py @@ -1,6 +1,8 @@ import os import shutil import unittest +import numpy as np +from tqdm import tqdm from jazzhands import wavelets @@ -15,27 +17,25 @@ def tearDown(self): if os.path.exists(self.outdir): shutil.rmtree(self.outdir) - def test_correct_constructor(self): - wavelets.WaveletTransformer(func_list=[0, 0], f1=[0, 0], data=[0, 0], - time=[0, 0], omegas=[1, 2], taus=[0, 0.5], c=0.0125) - - def test_wavelet_transform_no_gap(self): - import numpy as np - from tqdm import tqdm - - def make_test_time_signal(): - # Can switch this out for something else later - num_steps = 100 - t = np.linspace(-1, 1, num_steps) - freq = 1 - phase = np.pi / 4 - x = np.cos(2 * np.pi * freq * t + phase) - return t, x - - t, x = make_test_time_signal() - omegas = [1.1, 1.4] + def make_test_time_signal(self): + # Can switch this out for something else later + num_steps = 100 + t = np.linspace(-1, 1, num_steps) + freq = 1 + phase = np.pi / 4 + x = np.cos(2 * np.pi * freq * t + phase) + return t, x + + def get_params_and_data(self): + t, x = self.make_test_time_signal() + omegas = [1.1, 1.4, 1.8, 2.0] taus = [0.0, 0.5] c = 1 / (8 * np.pi ** 2) + return t, x, omegas, taus, c + + def make_transformer(self): + # not its own test, but is executed by later tests + t, x, omegas, taus, c = self.get_params_and_data() transformer = wavelets.WaveletTransformer( time = t, @@ -44,6 +44,15 @@ def make_test_time_signal(): taus = taus, c = c ) + + return transformer + + def test_make_transformer(self): + self.make_transformer() + + def test_wavelet_transform_no_gap(self): + self.transformer = self.make_transformer() + t, x, omegas, taus, c = self.get_params_and_data() def slow_wwz_and_checks(t, x, omegas, taus, c): # naive/not well encapsulated/not parallelized implementation, just to check correctness @@ -53,37 +62,39 @@ def slow_wwz_and_checks(t, x, omegas, taus, c): for i, omega in enumerate(omegas): for j, tau in enumerate(taus): zs = omega * (t - tau) - basis_functions = [lambda z: np.ones(np.shape(z)), np.sin, np.cos] + basis_functions = [np.ones_like, np.sin, np.cos] basis_evals = np.array([f(zs) for f in basis_functions]) weights = np.exp(-c * zs ** 2) weights /= np.sum(weights) npoints = 1 / np.inner(weights, weights) - assert np.allclose(weights, transformer._weight_alpha(t, omega, tau, c)) - assert np.isclose(transformer._n_points(weights), npoints) + assert np.allclose(weights, self.transformer._weight_alpha(t, omega, tau, c)) + assert np.isclose(self.transformer._n_points(weights), npoints) S = np.array([[np.sum(basis_evals[k] * basis_evals[l] * weights) for k in range(3)] for l in range(3)]) - assert np.allclose(transformer._S_matrix(basis_evals, weights), S) + assert np.allclose(self.transformer._S_matrix(basis_evals, weights), S) sols = np.sum(basis_evals * weights * x, axis=1) yc = np.linalg.inv(S).dot(sols) y = yc.dot(basis_evals) - y_package, yc_package = transformer._y_fit(basis_evals, weights, x) + y_package, yc_package = self.transformer._y_fit(basis_evals, weights, x) assert np.allclose(yc, yc_package) assert np.allclose(y, y_package) vx = np.sum(weights * x * x) - np.dot(weights, x) ** 2 vy = np.sum(weights * y * y) - np.dot(weights, y) ** 2 - assert np.isclose(vx, transformer._weight_var_x(np.ones(len(x)), weights, x)) - assert np.isclose(vy, transformer._weight_var_y(basis_evals, np.ones(len(x)), weights, x)[0]) + + assert np.isclose(vx, self.transformer._weight_var_x(np.ones(len(x)), weights, x)) + assert np.isclose(vy, self.transformer._weight_var_y(basis_evals, np.ones(len(x)), weights, x)[0]) transforms[i][j] = (npoints - 3) * vy / (2 * (vx - vy)) - assert np.isclose(transforms[i][j], transformer._wavelet_transform(True, tau, omega)[0]) + assert np.isclose(transforms[i][j], self.transformer._wavelet_transform(True, tau, omega)[0]) return transforms - wwz, wwa = transformer.compute_wavelet() - wwz_check = slow_wwz_and_checks(t, x, omegas, taus, c) - - assert np.allclose(wwz, wwz_check) + for v in [False, True]: + wwz, wwa = self.transformer.compute_wavelet(vectorized=v) + wwz_check = slow_wwz_and_checks(t, x, omegas, taus, c) + + assert np.allclose(wwz, wwz_check) # def test_omegas_taus_from_min_max_nu(self): # wav = wavelets.WaveletTransformer(func_list=[0, 0], f1=[0, 0], data=[0, 0], time=[0, 0], omegas=[0, 0], taus=[0, 0], c=0.0125) diff --git a/jazzhands/wavelets.py b/jazzhands/wavelets.py index 0914764..2f2b4ce 100644 --- a/jazzhands/wavelets.py +++ b/jazzhands/wavelets.py @@ -187,7 +187,7 @@ def _weight_alpha(self, time, omega, tau, c): time : array-like times of observations - omega : float + omega : array-like angular frequency in radians per unit time. tau : float @@ -237,8 +237,8 @@ def _inner_product(self, *arrs): Inner product of func1 and func2 """ - einsum_arg = 'i,' * (len(arrs) - 1) + 'i' - return np.einsum(einsum_arg, *[a.flatten() for a in arrs]) + from functools import reduce + return np.sum(reduce(lambda a, b: a * b, arrs)) def _inner_product_vector(self, func_vals, weights, data): """ @@ -290,7 +290,7 @@ def _S_matrix(self, func_vals, weights): S = np.zeros((l, l)) for i in range(l): for j in range(i + 1): - S[i][j] = self._inner_product(func_vals[i], func_vals[j], weights) + S[i][j] = (func_vals[i] * func_vals[j]).dot(weights) S = S + S.T - np.diag(S.diagonal()) return S @@ -372,8 +372,8 @@ def _y_fit(self, func_vals, weights, data): The coefficients returned by `coeffs` """ y_coeffs = self._calc_coeffs(func_vals, weights, data) - - return y_coeffs.dot(func_vals), y_coeffs + y_fit = np.tensordot(y_coeffs, func_vals, axes=1) + return y_fit, y_coeffs def _weight_var_y(self, func_vals, f1_vals, weights, data): """ @@ -402,6 +402,7 @@ def _weight_var_y(self, func_vals, f1_vals, weights, data): Coefficients from `coeffs` """ + assert func_vals.shape == (3, len(data)) y_f, y_coeffs = self._y_fit(func_vals, weights, data) return self._inner_product(y_f, y_f, weights) - np.power( @@ -451,7 +452,7 @@ def _wavelet_transform(self, exclude, tau, omega): return ((num_pts - 3.0) * y_var) / (2.0 * (x_var - y_var)), np.sqrt( np.power(y_coeff_rows[1], 2.0) + np.power(y_coeff_rows[2], 2.0)) - def compute_wavelet(self, exclude=True, parallel=False, n_processes=False): + def compute_wavelet(self, exclude=True, parallel=False, n_processes=False, vectorized=False): """ Calculate the Weighted Wavelet Transform of the object. Note that this can be incredibly slow for a large enough data array and a dense @@ -486,6 +487,7 @@ def compute_wavelet(self, exclude=True, parallel=False, n_processes=False): raise ValueError("Please set omegas or nus or scales") from tqdm.autonotebook import tqdm + from functools import partial if parallel: import multiprocessing as mp @@ -506,14 +508,68 @@ def compute_wavelet(self, exclude=True, parallel=False, n_processes=False): wwz = transform[:, :, 0] wwa = transform[:, :, 1] - else: - transform = np.array([[ - self._wavelet_transform(exclude, tau, omega) - for omega in self._omegas - ] for tau in tqdm(self._taus)]) - - wwz = transform[:, :, 0].T - wwa = transform[:, :, 1].T + else: + if vectorized: + from scipy.special import softmax + omegas, taus, time = np.meshgrid(self._omegas, self._taus, self._time, indexing='ij') + zvals_grid = omegas * (time - taus) + weights_grid = softmax(-self._c * np.power(zvals_grid, 2.), axis=-1) + npoints = 1 / np.sum(weights_grid ** 2, axis=-1) + func_vals = np.array([func(zvals_grid) for func in [np.ones_like, np.sin, np.cos]]) + x_var = weights_grid.dot(self._data ** 2) - (weights_grid.dot(self._data)) ** 2 + + S_matrices = np.array([[self._S_matrix(func_vals[:,i,j,:], weights_grid[i,j]) for j in range(len(self._taus))] for i in range(len(self._omegas))]) + phis = np.array([[self._inner_product_vector(func_vals[:,i,j,:], weights_grid[i,j], self._data) for j in range(len(self._taus))] for i in range(len(self._omegas))]) + + for i, omega in enumerate(tqdm(self._omegas)): + for j, tau in enumerate(self._taus): + local_weights = self._weight_alpha(self._time, omega, tau, self._c) + local_z = omega * (self._time - tau) + local_func_vals = np.array([np.ones_like(local_z), np.sin(local_z), np.cos(local_z)]) + local_S = self._S_matrix(local_func_vals, local_weights) + try: + np.linalg.solve(local_S, phis[i,j]) + except np.linalg.LinAlgError: + print(i, j, omega, tau) + self._calc_coeffs(local_func_vals, local_weights, self._data) + + + y_coeffs = np.array([[np.linalg.solve(S_matrices[i,j], phis[i,j]).T[0] for j in range(len(self._taus))] for i in range(len(self._omegas))]) + y_fit = np.einsum('jki,ijkl->jkl', y_coeffs, func_vals) + + get_wvar_y = lambda weights, fit_vals: self._inner_product(fit_vals, fit_vals, weights) - np.power(self._inner_product(fit_vals, weights), 2.0) + y_var = np.array([[get_wvar_y(weights_grid[i,j], y_fit[i,j]) for j in range(len(self._taus))] for i in range(len(self._omegas))]) + + for i, omega in enumerate(self._omegas): + for j, tau in enumerate(self._taus): + assert np.isclose(self._n_points(weights_grid[i][j]), npoints[i][j]) + weight_vector = self._weight_alpha(self._time, omega, tau, self._c) + z = omega * (self._time - tau) + local_func_vals = np.array([np.ones_like(z), np.sin(z), np.cos(z)]) + assert np.allclose(func_vals[:,i,j,:], local_func_vals) + assert np.allclose(weight_vector, weights_grid[i][j]) + assert np.allclose(self._weight_var_x(np.ones_like(self._time), weight_vector, self._data), x_var[i][j]) + assert np.allclose(S_matrices[i,j], self._S_matrix(local_func_vals, weight_vector)) + local_coeffs = self._calc_coeffs(local_func_vals, weight_vector, self._data) + assert np.allclose(y_coeffs[i,j], local_coeffs) + local_fit = np.tensordot(local_coeffs, local_func_vals, axes=1) + local_yvar = self._weight_var_y(local_func_vals, np.ones_like(self._data), weight_vector, self._data)[0] + local_yvar_2 = self._inner_product(local_fit, local_fit, weight_vector) - np.power(self._inner_product(local_fit, weight_vector), 2.0) + assert np.allclose(local_fit, y_fit[i,j]) + assert np.isclose(local_yvar, local_yvar_2) + assert np.isclose(local_yvar, y_var[i,j]) + + wwz = ((npoints - 3.0) * y_var) / (2.0 * (x_var - y_var)) + wwa = np.sqrt(np.power(y_coeffs[:,:,1], 2.0) + np.power(y_coeffs[:,:,2], 2.0)) + + else: + transform = np.array([[ + self._wavelet_transform(exclude, tau, omega) + for omega in self._omegas + ] for tau in tqdm(self._taus)]) + + wwz = transform[:, :, 0].T + wwa = transform[:, :, 1].T return wwz, wwa diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 0000000..5618a85 --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,45 @@ +@ARTICLE{foster96, + author = {{Foster}, Grant}, + title = "{Wavelets for period analysis of unevenly sampled time series}", + journal = {\aj}, + keywords = {STARS: OSCILLATIONS, METHODS: NUMERICAL}, + year = "1996", + month = "Oct", + volume = {112}, + pages = {1709}, + doi = {10.1086/118137}, + adsurl = {https://ui.adsabs.harvard.edu/abs/1996AJ....112.1709F}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@ARTICLE{torrence98, + author = {{Torrence}, Christopher and {Compo}, Gilbert P.}, + title = "{A Practical Guide to Wavelet Analysis.}", + journal = {Bulletin of the American Meteorological Society}, + year = 1998, + month = jan, + volume = {79}, + number = {1}, + pages = {61-78}, + doi = {10.1175/1520-0477(1998)079<0061:APGTWA>2.0.CO;2}, + adsurl = {https://ui.adsabs.harvard.edu/abs/1998BAMS...79...61T}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@ARTICLE{dornwallenstein20b, + author = {{Dorn-Wallenstein}, Trevor Z. and {Levesque}, Emily M. and + {Neugent}, Kathryn F. and {Davenport}, James R.~A. and + {Morris}, Brett M. and {Gootkin}, Keyan}, + title = "{Short Term Variability of Evolved Massive Stars with TESS II: A New Class of Cool, Pulsating Supergiants}", + journal = {arXiv e-prints}, + keywords = {Astrophysics - Solar and Stellar Astrophysics}, + year = 2020, + month = aug, + eid = {arXiv:2008.11723}, + pages = {arXiv:2008.11723}, +archivePrefix = {arXiv}, + eprint = {2008.11723}, + primaryClass = {astro-ph.SR}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200811723D}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} \ No newline at end of file diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 0000000..9fa9fab --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,68 @@ +--- +title: 'JazzHands: A Python package for fast and accurate wavelet transformations on unevenly sampled data' +tags: + - Python + - astronomy + - time series + - wavelets + - light curves +authors: + - name: Trevor Z. Dorn-Wallenstein^[corresponding author.] + orcid: 0000-0003-3601-3180 + affiliation: "1" # + - name: Author 2 + orcid: XXXX-XXXX-XXXX-XXXX + affiliation: "2, 3" + - name: Author 3 + affiliation: 3 +affiliations: + - name: University of Washington + index: 1 + - name: Institution Name + index: 2 + - name: Independent Researcher + index: 3 +date: XX October 2020 +bibliography: paper.bib + +--- + +# Summary + +Time series photometry of astronomical objects encodes information about +these objects' physical state, and how they change on human timescales. Recent +rapid cadence observations from missions like TESS and *Kepler* have been used to +to discover exoplanets around nearby stars, measure stellar pulsations, +characterize variability in active galactic nuclei, and more. Wavelet analysis +is an extension of Fourier analysis that allows astronomers to examine the +frequency content of a light curve as a function of time. As such, it is an +ideal tool for studying real data, which often contain information that is +otherwise impossible to extract from the Fourier transform alone. + +# Statement of need + +`JazzHands` is a Python package for wavelet transformations. Typical wavelet +methods [@torrence98] assume that the data are regularly sampled and have +no gaps. This fact has hindered astronomical applications of wavelets. Data +taken from the ground are often irregularly sampled and have daily and seasonal +gaps due to the rotation and orbit of the Earth. While less problematic, space- +based data still have gaps due to downlink times, and time-stamp adjustments to +account for time-of-arrival differences along the spacecraft orbit induce +sampling irregularities. Interpolative methods may be used to offset these +effects, but introduce their own systematics into the wavelet transform. + +@foster96 introduced a mathematical formulation of the wavelet transform +using the Morlet wavelet that accounts for the effects of irregular sampling +and gaps, and avoids the need for interpolation. `JazzHands` is the first +open-source Python implementation of the @foster96 method, and has already +been used in scientific applications [@dornwallenstein20b]. The combination of +speed, customizability, and helpful features for users who are new to wavelet +analysis will enable exciting new science on the wealth of high precision light +curves that are currently being released. + +# Acknowledgements + +This project was developed in part at the online.tess.science meeting, which +took place globally in 2020 September. + +# References \ No newline at end of file