diff --git a/jazzhands/data/test_vectorized_timing.py b/jazzhands/data/test_vectorized_timing.py
new file mode 100644
index 0000000..f99ea97
--- /dev/null
+++ b/jazzhands/data/test_vectorized_timing.py
@@ -0,0 +1,43 @@
+# finds WWZ with and without vectorization
+import numpy as np
+from tqdm import tqdm
+import time
+# np.random.seed(sum(map(ord, "wavelets are cool!")))
+
+from jazzhands import wavelets
+
+config = {
+    "num_steps" : 100,
+    "num_omegas" : 300,
+    "num_taus" : 100,
+    "tmax" : 1,
+    "xmax" : 100,
+    "omega_max" : 25 * np.pi,
+    "tau_max" : 2 * np.pi,
+    "num_unvec_runs" : 1,
+    "num_vec_runs" : 1
+}
+
+for k in config:
+    exec(k + " = " + str(config.get(k)))
+
+t = np.linspace(-tmax, tmax, num_steps)
+x = np.random.uniform(-xmax, xmax, num_steps)
+omegas = np.random.uniform(0, omega_max, (num_omegas,))
+taus = np.random.uniform(0, tau_max, (num_taus,))
+
+transformer = wavelets.WaveletTransformer(t, x, omegas=omegas, taus=taus)
+print("Starting unvectorized computation of WWT of {0} datapoints over {1} omega values and {2} tau values".format(num_steps, num_omegas, num_taus))
+t0 = time.time()
+for _ in range(num_unvec_runs):
+    transformer.compute_wavelet(vectorized=False)
+t1 = time.time()
+
+print("Starting vectorized computation of WWT of {0} datapoints over {1} omega values and {2} tau values".format(num_steps, num_omegas, num_taus))
+t2 = time.time()
+for _ in range(num_vec_runs):
+    transformer.compute_wavelet(vectorized=True)
+t3 = time.time()
+
+print("Unvectorized computation took {0} seconds for {1} computation(s), for an average of {2} seconds per computation.".format(t1 - t0, num_unvec_runs, (t1 - t0) / num_unvec_runs))
+print("Vectorized computation took {0} seconds for {1} computation(s), for an average of {2} seconds per computation.".format(t3 - t2, num_vec_runs, (t3 - t2) / num_vec_runs))
diff --git a/jazzhands/tests/test_wavelets.py b/jazzhands/tests/test_wavelets.py
index a162ad0..9c51aed 100644
--- a/jazzhands/tests/test_wavelets.py
+++ b/jazzhands/tests/test_wavelets.py
@@ -1,6 +1,8 @@
 import os
 import shutil
 import unittest
+import numpy as np
+from tqdm import tqdm
 
 from jazzhands import wavelets
 
@@ -15,27 +17,25 @@ def tearDown(self):
         if os.path.exists(self.outdir):
             shutil.rmtree(self.outdir)
 
-    def test_correct_constructor(self):
-        wavelets.WaveletTransformer(func_list=[0, 0], f1=[0, 0], data=[0, 0],
-                                    time=[0, 0], omegas=[1, 2], taus=[0, 0.5], c=0.0125)
-
-    def test_wavelet_transform_no_gap(self):
-        import numpy as np
-        from tqdm import tqdm
-
-        def make_test_time_signal():
-            # Can switch this out for something else later
-            num_steps = 100
-            t = np.linspace(-1, 1, num_steps)
-            freq = 1
-            phase = np.pi / 4
-            x = np.cos(2 * np.pi * freq * t + phase)
-            return t, x
-
-        t, x = make_test_time_signal()
-        omegas = [1.1, 1.4]
+    def make_test_time_signal(self):
+        # Can switch this out for something else later
+        num_steps = 100
+        t = np.linspace(-1, 1, num_steps)
+        freq = 1
+        phase = np.pi / 4
+        x = np.cos(2 * np.pi * freq * t + phase)
+        return t, x
+
+    def get_params_and_data(self):
+        t, x = self.make_test_time_signal()
+        omegas = [1.1, 1.4, 1.8, 2.0]
         taus = [0.0, 0.5]
         c = 1 / (8 * np.pi ** 2)
+        return t, x, omegas, taus, c
+
+    def make_transformer(self):
+        # not its own test, but is executed by later tests
+        t, x, omegas, taus, c = self.get_params_and_data()
 
         transformer = wavelets.WaveletTransformer(
             time = t, 
@@ -44,6 +44,15 @@ def make_test_time_signal():
             taus = taus, 
             c = c
         )
+        
+        return transformer
+
+    def test_make_transformer(self):
+        self.make_transformer()
+
+    def test_wavelet_transform_no_gap(self):
+        self.transformer = self.make_transformer()
+        t, x, omegas, taus, c = self.get_params_and_data()
 
         def slow_wwz_and_checks(t, x, omegas, taus, c):
             # naive/not well encapsulated/not parallelized implementation, just to check correctness
@@ -53,37 +62,39 @@ def slow_wwz_and_checks(t, x, omegas, taus, c):
             for i, omega in enumerate(omegas):
                 for j, tau in enumerate(taus):
                     zs = omega * (t - tau)
-                    basis_functions = [lambda z: np.ones(np.shape(z)), np.sin, np.cos]
+                    basis_functions = [np.ones_like, np.sin, np.cos]
                     basis_evals = np.array([f(zs) for f in basis_functions])
                     weights = np.exp(-c * zs ** 2)
                     weights /= np.sum(weights)
                     npoints = 1 / np.inner(weights, weights)
-                    assert np.allclose(weights, transformer._weight_alpha(t, omega, tau, c))
-                    assert np.isclose(transformer._n_points(weights), npoints)
+                    assert np.allclose(weights, self.transformer._weight_alpha(t, omega, tau, c))
+                    assert np.isclose(self.transformer._n_points(weights), npoints)
 
                     S = np.array([[np.sum(basis_evals[k] * basis_evals[l] * weights) for k in range(3)] for l in range(3)])
-                    assert np.allclose(transformer._S_matrix(basis_evals, weights), S)
+                    assert np.allclose(self.transformer._S_matrix(basis_evals, weights), S)
 
                     sols = np.sum(basis_evals * weights * x, axis=1)
                     yc = np.linalg.inv(S).dot(sols)
                     y = yc.dot(basis_evals)
-                    y_package, yc_package = transformer._y_fit(basis_evals, weights, x)
+                    y_package, yc_package = self.transformer._y_fit(basis_evals, weights, x)
                     assert np.allclose(yc, yc_package)
                     assert np.allclose(y, y_package)
                     
                     vx = np.sum(weights * x * x) - np.dot(weights, x) ** 2
                     vy = np.sum(weights * y * y) - np.dot(weights, y) ** 2
-                    assert np.isclose(vx, transformer._weight_var_x(np.ones(len(x)), weights, x))
-                    assert np.isclose(vy, transformer._weight_var_y(basis_evals, np.ones(len(x)), weights, x)[0])
+                    
+                    assert np.isclose(vx, self.transformer._weight_var_x(np.ones(len(x)), weights, x))
+                    assert np.isclose(vy, self.transformer._weight_var_y(basis_evals, np.ones(len(x)), weights, x)[0])
 
                     transforms[i][j] = (npoints - 3) * vy / (2 * (vx - vy))
-                    assert np.isclose(transforms[i][j], transformer._wavelet_transform(True, tau, omega)[0])
+                    assert np.isclose(transforms[i][j], self.transformer._wavelet_transform(True, tau, omega)[0])
             return transforms
 
-        wwz, wwa = transformer.compute_wavelet()
-        wwz_check = slow_wwz_and_checks(t, x, omegas, taus, c)
-        
-        assert np.allclose(wwz, wwz_check)
+        for v in [False, True]:
+            wwz, wwa = self.transformer.compute_wavelet(vectorized=v)
+            wwz_check = slow_wwz_and_checks(t, x, omegas, taus, c)
+            
+            assert np.allclose(wwz, wwz_check)
 
     # def test_omegas_taus_from_min_max_nu(self):
     #     wav = wavelets.WaveletTransformer(func_list=[0, 0], f1=[0, 0], data=[0, 0], time=[0, 0], omegas=[0, 0], taus=[0, 0], c=0.0125)
diff --git a/jazzhands/wavelets.py b/jazzhands/wavelets.py
index 0914764..2f2b4ce 100644
--- a/jazzhands/wavelets.py
+++ b/jazzhands/wavelets.py
@@ -187,7 +187,7 @@ def _weight_alpha(self, time, omega, tau, c):
         time : array-like
             times of observations
 
-        omega : float
+        omega : array-like
             angular frequency in radians per unit time.
 
         tau : float
@@ -237,8 +237,8 @@ def _inner_product(self, *arrs):
             Inner product of func1 and func2
 
         """
-        einsum_arg = 'i,' * (len(arrs) - 1) + 'i'
-        return np.einsum(einsum_arg, *[a.flatten() for a in arrs])
+        from functools import reduce
+        return np.sum(reduce(lambda a, b: a * b, arrs))
 
     def _inner_product_vector(self, func_vals, weights, data):
         """
@@ -290,7 +290,7 @@ def _S_matrix(self, func_vals, weights):
         S = np.zeros((l, l))
         for i in range(l):
             for j in range(i + 1):
-                S[i][j] = self._inner_product(func_vals[i], func_vals[j], weights)
+                S[i][j] = (func_vals[i] * func_vals[j]).dot(weights)
 
         S = S + S.T - np.diag(S.diagonal())
         return S
@@ -372,8 +372,8 @@ def _y_fit(self, func_vals, weights, data):
             The coefficients returned by `coeffs`
         """
         y_coeffs = self._calc_coeffs(func_vals, weights, data)
-
-        return y_coeffs.dot(func_vals), y_coeffs
+        y_fit = np.tensordot(y_coeffs, func_vals, axes=1)
+        return y_fit, y_coeffs
 
     def _weight_var_y(self, func_vals, f1_vals, weights, data):
         """
@@ -402,6 +402,7 @@ def _weight_var_y(self, func_vals, f1_vals, weights, data):
             Coefficients from `coeffs`
 
         """
+        assert func_vals.shape == (3, len(data))
         y_f, y_coeffs = self._y_fit(func_vals, weights, data)
 
         return self._inner_product(y_f, y_f, weights) - np.power(
@@ -451,7 +452,7 @@ def _wavelet_transform(self, exclude, tau, omega):
         return ((num_pts - 3.0) * y_var) / (2.0 * (x_var - y_var)), np.sqrt(
             np.power(y_coeff_rows[1], 2.0) + np.power(y_coeff_rows[2], 2.0))
 
-    def compute_wavelet(self, exclude=True, parallel=False, n_processes=False):
+    def compute_wavelet(self, exclude=True, parallel=False, n_processes=False, vectorized=False):
         """
         Calculate the Weighted Wavelet Transform of the object. Note that this
         can be incredibly slow for a large enough data array and a dense
@@ -486,6 +487,7 @@ def compute_wavelet(self, exclude=True, parallel=False, n_processes=False):
             raise ValueError("Please set omegas or nus or scales")
 
         from tqdm.autonotebook import tqdm
+        from functools import partial
 
         if parallel:
             import multiprocessing as mp
@@ -506,14 +508,68 @@ def compute_wavelet(self, exclude=True, parallel=False, n_processes=False):
                 wwz = transform[:, :, 0]
                 wwa = transform[:, :, 1]
 
-        else:
-            transform = np.array([[
-                self._wavelet_transform(exclude, tau, omega)
-                for omega in self._omegas
-            ] for tau in tqdm(self._taus)])
-
-            wwz = transform[:, :, 0].T
-            wwa = transform[:, :, 1].T
+        else: 
+            if vectorized:
+                from scipy.special import softmax
+                omegas, taus, time = np.meshgrid(self._omegas, self._taus, self._time, indexing='ij')
+                zvals_grid = omegas * (time - taus)
+                weights_grid = softmax(-self._c * np.power(zvals_grid, 2.), axis=-1)
+                npoints = 1 / np.sum(weights_grid ** 2, axis=-1)
+                func_vals = np.array([func(zvals_grid) for func in [np.ones_like, np.sin, np.cos]])
+                x_var = weights_grid.dot(self._data ** 2) - (weights_grid.dot(self._data)) ** 2
+                
+                S_matrices = np.array([[self._S_matrix(func_vals[:,i,j,:], weights_grid[i,j]) for j in range(len(self._taus))] for i in range(len(self._omegas))])
+                phis = np.array([[self._inner_product_vector(func_vals[:,i,j,:], weights_grid[i,j], self._data) for j in range(len(self._taus))] for i in range(len(self._omegas))])
+
+                for i, omega in enumerate(tqdm(self._omegas)):
+                    for j, tau in enumerate(self._taus):
+                        local_weights = self._weight_alpha(self._time, omega, tau, self._c)
+                        local_z = omega * (self._time - tau)
+                        local_func_vals = np.array([np.ones_like(local_z), np.sin(local_z), np.cos(local_z)])
+                        local_S = self._S_matrix(local_func_vals, local_weights)
+                        try:
+                            np.linalg.solve(local_S, phis[i,j])
+                        except np.linalg.LinAlgError:
+                            print(i, j, omega, tau)
+                            self._calc_coeffs(local_func_vals, local_weights, self._data)
+                        
+                
+                y_coeffs = np.array([[np.linalg.solve(S_matrices[i,j], phis[i,j]).T[0] for j in range(len(self._taus))] for i in range(len(self._omegas))])
+                y_fit = np.einsum('jki,ijkl->jkl', y_coeffs, func_vals)
+                
+                get_wvar_y = lambda weights, fit_vals: self._inner_product(fit_vals, fit_vals, weights) - np.power(self._inner_product(fit_vals, weights), 2.0)
+                y_var = np.array([[get_wvar_y(weights_grid[i,j], y_fit[i,j]) for j in range(len(self._taus))] for i in range(len(self._omegas))])
+
+                for i, omega in enumerate(self._omegas):
+                    for j, tau in enumerate(self._taus):
+                        assert np.isclose(self._n_points(weights_grid[i][j]), npoints[i][j])
+                        weight_vector = self._weight_alpha(self._time, omega, tau, self._c)
+                        z = omega * (self._time - tau)
+                        local_func_vals = np.array([np.ones_like(z), np.sin(z), np.cos(z)])
+                        assert np.allclose(func_vals[:,i,j,:], local_func_vals)
+                        assert np.allclose(weight_vector, weights_grid[i][j])
+                        assert np.allclose(self._weight_var_x(np.ones_like(self._time), weight_vector, self._data), x_var[i][j])
+                        assert np.allclose(S_matrices[i,j], self._S_matrix(local_func_vals, weight_vector))
+                        local_coeffs = self._calc_coeffs(local_func_vals, weight_vector, self._data)
+                        assert np.allclose(y_coeffs[i,j], local_coeffs)
+                        local_fit = np.tensordot(local_coeffs, local_func_vals, axes=1)
+                        local_yvar = self._weight_var_y(local_func_vals, np.ones_like(self._data), weight_vector, self._data)[0]
+                        local_yvar_2 = self._inner_product(local_fit, local_fit, weight_vector) - np.power(self._inner_product(local_fit, weight_vector), 2.0)
+                        assert np.allclose(local_fit, y_fit[i,j])
+                        assert np.isclose(local_yvar, local_yvar_2)
+                        assert np.isclose(local_yvar, y_var[i,j])  
+                
+                wwz = ((npoints - 3.0) * y_var) / (2.0 * (x_var - y_var))
+                wwa = np.sqrt(np.power(y_coeffs[:,:,1], 2.0) + np.power(y_coeffs[:,:,2], 2.0))
+
+            else:
+                transform = np.array([[
+                    self._wavelet_transform(exclude, tau, omega)
+                    for omega in self._omegas
+                ] for tau in tqdm(self._taus)])
+
+                wwz = transform[:, :, 0].T
+                wwa = transform[:, :, 1].T
 
         return wwz, wwa
 
diff --git a/paper/paper.bib b/paper/paper.bib
new file mode 100644
index 0000000..5618a85
--- /dev/null
+++ b/paper/paper.bib
@@ -0,0 +1,45 @@
+@ARTICLE{foster96,
+       author = {{Foster}, Grant},
+        title = "{Wavelets for period analysis of unevenly sampled time series}",
+      journal = {\aj},
+     keywords = {STARS: OSCILLATIONS, METHODS: NUMERICAL},
+         year = "1996",
+        month = "Oct",
+       volume = {112},
+        pages = {1709},
+          doi = {10.1086/118137},
+       adsurl = {https://ui.adsabs.harvard.edu/abs/1996AJ....112.1709F},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+@ARTICLE{torrence98,
+       author = {{Torrence}, Christopher and {Compo}, Gilbert P.},
+        title = "{A Practical Guide to Wavelet Analysis.}",
+      journal = {Bulletin of the American Meteorological Society},
+         year = 1998,
+        month = jan,
+       volume = {79},
+       number = {1},
+        pages = {61-78},
+          doi = {10.1175/1520-0477(1998)079<0061:APGTWA>2.0.CO;2},
+       adsurl = {https://ui.adsabs.harvard.edu/abs/1998BAMS...79...61T},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+@ARTICLE{dornwallenstein20b,
+       author = {{Dorn-Wallenstein}, Trevor Z. and {Levesque}, Emily M. and
+         {Neugent}, Kathryn F. and {Davenport}, James R.~A. and
+         {Morris}, Brett M. and {Gootkin}, Keyan},
+        title = "{Short Term Variability of Evolved Massive Stars with TESS II: A New Class of Cool, Pulsating Supergiants}",
+      journal = {arXiv e-prints},
+     keywords = {Astrophysics - Solar and Stellar Astrophysics},
+         year = 2020,
+        month = aug,
+          eid = {arXiv:2008.11723},
+        pages = {arXiv:2008.11723},
+archivePrefix = {arXiv},
+       eprint = {2008.11723},
+ primaryClass = {astro-ph.SR},
+       adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200811723D},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
\ No newline at end of file
diff --git a/paper/paper.md b/paper/paper.md
new file mode 100644
index 0000000..9fa9fab
--- /dev/null
+++ b/paper/paper.md
@@ -0,0 +1,68 @@
+---
+title: 'JazzHands: A Python package for fast and accurate wavelet transformations on unevenly sampled data'
+tags:
+  - Python
+  - astronomy
+  - time series
+  - wavelets
+  - light curves
+authors:
+  - name: Trevor Z. Dorn-Wallenstein^[corresponding author.]
+    orcid: 0000-0003-3601-3180
+    affiliation: "1" # 
+  - name: Author 2
+    orcid: XXXX-XXXX-XXXX-XXXX
+    affiliation: "2, 3"
+  - name: Author 3
+    affiliation: 3
+affiliations:
+ - name: University of Washington
+   index: 1
+ - name: Institution Name
+   index: 2
+ - name: Independent Researcher
+   index: 3
+date: XX October 2020
+bibliography: paper.bib
+
+---
+
+# Summary
+
+Time series photometry of astronomical objects encodes information about 
+these objects' physical state, and how they change on human timescales. Recent
+rapid cadence observations from missions like TESS and *Kepler* have been used to
+to discover exoplanets around nearby stars, measure stellar pulsations, 
+characterize variability in active galactic nuclei, and more. Wavelet analysis 
+is an extension of Fourier analysis that allows astronomers to examine the 
+frequency content of a light curve as a function of time. As such, it is an 
+ideal tool for studying real data, which often contain information that is 
+otherwise impossible to extract from the Fourier transform alone. 
+
+# Statement of need
+
+`JazzHands` is a Python package for wavelet transformations. Typical wavelet 
+methods [@torrence98] assume that the data are regularly sampled and have
+no gaps. This fact has hindered astronomical applications of wavelets. Data 
+taken from the ground are often irregularly sampled and have daily and seasonal
+gaps due to the rotation and orbit of the Earth. While less problematic, space-
+based data still have gaps due to downlink times, and time-stamp adjustments to
+account for time-of-arrival differences along the spacecraft orbit induce 
+sampling irregularities. Interpolative methods may be used to offset these 
+effects, but introduce their own systematics into the wavelet transform.
+
+@foster96 introduced a mathematical formulation of the wavelet transform 
+using the Morlet wavelet that accounts for the effects of irregular sampling 
+and gaps, and avoids the need for interpolation. `JazzHands` is the first 
+open-source Python implementation of the @foster96 method, and has already
+been used in scientific applications [@dornwallenstein20b]. The combination of
+speed, customizability, and helpful features for users who are new to wavelet 
+analysis will enable exciting new science on the wealth of high precision light
+curves that are currently being released.
+
+# Acknowledgements
+
+This project was developed in part at the online.tess.science meeting, which 
+took place globally in 2020 September.
+
+# References
\ No newline at end of file