diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml deleted file mode 100644 index 61ae864..0000000 --- a/.github/workflows/draft-pdf.yml +++ /dev/null @@ -1,23 +0,0 @@ -on: [push] - -jobs: - paper: - runs-on: ubuntu-latest - name: Paper Draft - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Build draft PDF - uses: openjournals/openjournals-draft-action@master - with: - journal: joss - # This should be the path to the paper within your repo. - paper-path: paper.md - - name: Upload - uses: actions/upload-artifact@v1 - with: - name: paper - # This is the output path where Pandoc will write the compiled - # PDF. Note, this should be the same directory as the input - # paper.md - path: paper.pdf \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0fb1011..2ecdd14 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,3 @@ -name: Tests - on: push: branches: @@ -9,20 +7,14 @@ on: jobs: Linux: runs-on: ubuntu-latest - strategy: - max-parallel: 4 - matrix: - python-version: ["3.8", "3.9", "3.10"] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - pip install -e .[advanced,dev] - - name: Test with pytest - run: | - pytest pynumdiff + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: tests + run: | + pip install -e .[advanced,dev] + pytest pynumdiff + diff --git a/paper.bib b/paper.bib deleted file mode 100644 index bf66c9a..0000000 --- a/paper.bib +++ /dev/null @@ -1,227 +0,0 @@ -@article{van2020numerical, - title={Numerical differentiation of noisy data: A unifying multi-objective optimization framework}, - author={Van Breugel, Floris and Kutz, J Nathan and Brunton, Bingni W}, - journal={IEEE Access}, - volume={8}, - pages={196865--196877}, - year={2020}, - publisher={IEEE}, - doi={10.1109/access.2020.3034077}, -} - -@incollection{chambolle2010introduction, - title={An introduction to total variation for image analysis}, - author={Chambolle, Antonin and Caselles, Vicent and Cremers, Daniel and Novaga, Matteo and Pock, Thomas}, - booktitle={Theoretical foundations and numerical methods for sparse recovery}, - pages={263--340}, - year={2010}, - publisher={de Gruyter}, - doi={10.1515/9783110226157.263}, -} - -@article{brunton2016discovering, - title={Discovering governing equations from data by sparse identification of nonlinear dynamical systems}, - author={Brunton, Steven L and Proctor, Joshua L and Kutz, J Nathan}, - journal={Proceedings of the national academy of sciences}, - volume={113}, - number={15}, - pages={3932--3937}, - year={2016}, - publisher={National Acad Sciences}, - doi={10.1073/pnas.1517384113} -} - -@article{de2020pysindy, - title={Pysindy: a python package for the sparse identification of nonlinear dynamics from data}, - author={de Silva, Brian M and Champion, Kathleen and Quade, Markus and Loiseau, Jean-Christophe and Kutz, J Nathan and Brunton, Steven L}, - journal={arXiv preprint arXiv:2004.08424}, - year={2020} -} - -@article{ahnert2007numerical, - title={Numerical differentiation of experimental data: local versus global methods}, - author={Ahnert, Karsten and Abel, Markus}, - journal={Computer Physics Communications}, - volume={177}, - number={10}, - pages={764--774}, - year={2007}, - publisher={Elsevier}, - doi={10.1016/j.cpc.2007.03.009} -} - -@article{butterworth1930theory, - title={On the theory of filter amplifiers}, - author={Butterworth, Stephen}, - journal={Wireless Engineer}, - volume={7}, - number={6}, - pages={536--541}, - year={1930} -} - -@article{belytschko1996meshless, - title={Meshless methods: an overview and recent developments}, - author={Belytschko, Ted and Krongauz, Yury and Organ, Daniel and Fleming, Mark and Krysl, Petr}, - journal={Computer methods in applied mechanics and engineering}, - volume={139}, - number={1-4}, - pages={3--47}, - year={1996}, - publisher={Elsevier} -} - -@article{schafer2011savitzky, - title={What is a {Savitzky-Golay} filter? [lecture notes]}, - author={Schafer, Ronald W}, - journal={IEEE Signal processing magazine}, - volume={28}, - number={4}, - pages={111--117}, - year={2011}, - publisher={IEEE}, - doi={10.1109/msp.2011.941097} -} - -@article{savitzky1964smoothing, - title={Smoothing and differentiation of data by simplified least squares procedures}, - author={Savitzky, Abraham and Golay, Marcel J. E.}, - journal={Analytical chemistry}, - volume={36}, - number={8}, - pages={1627--1639}, - year={1964}, - publisher={ACS Publications}, - doi={10.1021/ac60214a047} -} - -@article{kalman1960new, - title={A new approach to linear filtering and prediction problems}, - author={Kalman, Rudolph Emil}, - year={1960}, - doi={10.1109/9780470544334.ch9} -} - -@article{henderson2010fundamentals, -title={Fundamentals of {Kalman} Filtering: A Practical Approach}, -volume={105}, -number={1049}, -journal={The Aeronautical Journal}, -publisher={Cambridge University Press}, -author={Henderson, Geoff. T.}, -year={2001}, -pages={400–400}, -doi={10.1017/S000192400001232X}, -} -} - -@article{aravkin2017generalized, - title={Generalized {Kalman} smoothing: Modeling and algorithms}, - author={Aravkin, Aleksandr and Burke, James V and Ljung, Lennart and Lozano, Aurelie and Pillonetto, Gianluigi}, - journal={Automatica}, - volume={86}, - pages={63--86}, - year={2017}, - publisher={Elsevier}, - doi={10.1016/j.automatica.2017.08.011} -} - -@book{crassidis2004optimal, - title={Optimal estimation of dynamic systems}, - author={Crassidis, John L and Junkins, John L}, - volume={2}, - year={2004}, - publisher={Chapman \& Hall/CRC Boca Raton, FL} -} - -@article{rudin1992nonlinear, - title={Nonlinear total variation based noise removal algorithms}, - author={Rudin, Leonid I and Osher, Stanley and Fatemi, Emad}, - journal={Physica D: nonlinear phenomena}, - volume={60}, - number={1-4}, - pages={259--268}, - year={1992}, - publisher={Elsevier}, - doi={10.1016/0167-2789(92)90242-f} -} - -@article{chartrand2011numerical, - title={Numerical differentiation of noisy, nonsmooth data}, - author={Chartrand, Rick}, - journal={International Scholarly Research Notices}, - volume={2011}, - year={2011}, - publisher={Hindawi}, - doi={10.5402/2011/164564} -} - -@Misc{scipy, - author = {Eric Jones and Travis Oliphant and Pearu Peterson}, - title = {{SciPy}: Open source scientific tools for {Python}}, - year = {2001}, - url = "http://www.scipy.org/" -} - -@article{Virtanen2020, - doi = {10.1038/s41592-019-0686-2}, - url = {https://doi.org/10.1038/s41592-019-0686-2}, - year = {2020}, - month = feb, - publisher = {Springer Science and Business Media {LLC}}, - volume = {17}, - number = {3}, - pages = {261--272}, - author = {Pauli Virtanen and Ralf Gommers and Travis E. Oliphant and Matt Haberland and Tyler Reddy and David Cournapeau and Evgeni Burovski and Pearu Peterson and Warren Weckesser and Jonathan Bright and St{\'{e}}fan J. van der Walt and Matthew Brett and Joshua Wilson and K. Jarrod Millman and Nikolay Mayorov and Andrew R. J. Nelson and Eric Jones and Robert Kern and Eric Larson and C J Carey and {\.{I}}lhan Polat and Yu Feng and Eric W. Moore and Jake VanderPlas and Denis Laxalde and Josef Perktold and Robert Cimrman and Ian Henriksen and E. A. Quintero and Charles R. Harris and Anne M. Archibald and Ant{\^{o}}nio H. Ribeiro and Fabian Pedregosa and Paul van Mulbregt and Aditya Vijaykumar and Alessandro Pietro Bardelli and Alex Rothberg and Andreas Hilboll and Andreas Kloeckner and Anthony Scopatz and Antony Lee and Ariel Rokem and C. Nathan Woods and Chad Fulton and Charles Masson and Christian H\"{a}ggstr\"{o}m and Clark Fitzgerald and David A. Nicholson and David R. Hagen and Dmitrii V. Pasechnik and Emanuele Olivetti and Eric Martin and Eric Wieser and Fabrice Silva and Felix Lenders and Florian Wilhelm and G. Young and Gavin A. Price and Gert-Ludwig Ingold and Gregory E. Allen and Gregory R. Lee and Herv{\'{e}} Audren and Irvin Probst and J\"{o}rg P. Dietrich and Jacob Silterra and James T Webber and Janko Slavi{\v{c}} and Joel Nothman and Johannes Buchner and Johannes Kulick and Johannes L. Sch\"{o}nberger and Jos{\'{e}} Vin{\'{\i}}cius de Miranda Cardoso and Joscha Reimer and Joseph Harrington and Juan Luis Cano Rodr{\'{\i}}guez and Juan Nunez-Iglesias and Justin Kuczynski and Kevin Tritz and Martin Thoma and Matthew Newville and Matthias K\"{u}mmerer and Maximilian Bolingbroke and Michael Tartre and Mikhail Pak and Nathaniel J. Smith and Nikolai Nowaczyk and Nikolay Shebanov and Oleksandr Pavlyk and Per A. Brodtkorb and Perry Lee and Robert T. McGibbon and Roman Feldbauer and Sam Lewis and Sam Tygier and Scott Sievert and Sebastiano Vigna and Stefan Peterson and Surhud More and Tadeusz Pudlik and Takuya Oshima and Thomas J. Pingel and Thomas P. Robitaille and Thomas Spura and Thouis R. Jones and Tim Cera and Tim Leslie and Tiziano Zito and Tom Krauss and Utkarsh Upadhyay and Yaroslav O. Halchenko and V{\'{a}}zquez-Baeza, Yoshiki}, - title = {{SciPy} 1.0: fundamental algorithms for scientific computing in {P}ython}, - journal = {Nature Methods} -} - -@Misc{pykalman, - author = {Daniel Duckworth and others}, - title = {pykalman, the dead-simple {Kalman} Filter, {Kalman} Smoother, and {EM} library for {P}ython}, - year = {2012}, - url = "https://github.com/pykalman/pykalman" -} - - -@Article{ harris2020array, - title = {Array programming with {NumPy}}, - author = {Charles R. Harris and K. Jarrod Millman and St{\'{e}}fan J. - van der Walt and Ralf Gommers and Pauli Virtanen and David - Cournapeau and Eric Wieser and Julian Taylor and Sebastian - Berg and Nathaniel J. Smith and Robert Kern and Matti Picus - and Stephan Hoyer and Marten H. van Kerkwijk and Matthew - Brett and Allan Haldane and Jaime Fern{\'{a}}ndez del - R{\'{i}}o and Mark Wiebe and Pearu Peterson and Pierre - G{\'{e}}rard-Marchant and Kevin Sheppard and Tyler Reddy and - Warren Weckesser and Hameer Abbasi and Christoph Gohlke and - Travis E. Oliphant}, - year = {2020}, - month = sep, - journal = {Nature}, - volume = {585}, - number = {7825}, - pages = {357--362}, - doi = {10.1038/s41586-020-2649-2}, - publisher = {Springer Science and Business Media {LLC}}, - url = {10.1038/s41586-020-2649-2} -} - -@article{demo18pydmd, - Author = {Demo, Nicola and Tezzele, Marco and Rozza, Gianluigi}, - Title = {{PyDMD: Python Dynamic Mode Decomposition}}, - Journal = {The Journal of Open Source Software}, - Volume = {3}, - Number = {22}, - Pages = {530}, - Year = {2018}, - Doi = {10.21105/joss.00530} -} - -@Misc{findiff, - author = {Matthias Baer and others}, - title = {findiff, A Python package for finite difference numerical derivatives and partial differential equations in any number of dimensions.}, - year = {2018}, - url = "https://github.com/maroba/findiff" -} diff --git a/paper.md b/paper.md deleted file mode 100644 index 142ca8f..0000000 --- a/paper.md +++ /dev/null @@ -1,66 +0,0 @@ ---- -title: 'PyNumDiff: A Python package for numerical differentiation of noisy time-series data' -tags: - - Python - - numerical differentiation - - denoising - - dynamics - - time series - - machine learning -authors: - - name: Floris Van Breugel^[corresponding author] - affiliation: 1 - - name: Yuying Liu - affiliation: 2 - - name: Bingni W. Brunton - affiliation: 3 - - name: J. Nathan Kutz - affiliation: 2 -affiliations: - - name: Department of Mechanical Engineering, University of Nevada at Reno - index: 1 - - name: Department of Applied Mathematics, University of Washington - index: 2 - - name: Department of Biology, University of Washington - index: 3 -date: 10 July 2021 -bibliography: paper.bib ---- - -# Statement of need - -The numerical computation of derivatives is ubiquitous in every scientific discipline and engineering application because derivatives express fundamental relationships among many quantities of interest. As a result, a large number of diverse algorithms have been developed to differentiate numerical data. These efforts are challenging because, in reality, practitioners often have sparse and noisy measurements and data, which undermine the ability to estimate accurate derivatives. Among the diversity of mathematical approaches that have been formulated, many are ad hoc in nature and require significant bespoke tuning of multiple parameters to produce reasonable results. Thus, at a practical level, it is often unclear which method should be used, how to choose parameters, and how to compare results from different methods. - -Regardless of application domain, scientists of various levels of mathematical expertise would benefit from a unified toolbox for differentiation techniques and parameter tuning. To address these needs, we built the open-source package `PyNumDiff`, with two primary goals in mind: (1) to develop a unified source for a diversity of differentiation methods using a common API, and (2) to provide an objective approach for choosing optimal parameters with a single universal hyperparameter (`gamma`) that functions similarly for all differentiation methods [@van2020numerical]. By filling these needs, `PyNumdiff` facilitates easy computations of derivatives on diverse time-series data sets. - -# State of the field - -Currently, practitioners in need of numerical differentiation tools must often implement a number of methods themselves, before selecting one that is appropriate for their application. High-quality data can leverage computationally efficient and algorithmically simple methods such as the finite-difference, as implemented by standard packages such as NumPy [@harris2020array], SciPy [@scipy; @Virtanen2020], or specialized packages like findiff [@findiff]. Data that are sparse and noisy, however, require more sophisticated algorithms that pracitioners must build themselves based on routines implemented across modules found in disparate packages such as SciPy, PyKalman [@pykalman], PyDMD [@demo18pydmd], or stand alone scripts such as these [implementations of total variation regularization](https://sites.google.com/site/dnartrahckcir/home/tvdiff-code) [@rudin1992nonlinear; @chartrand2011numerical]. At present, there is no centralized repository that offers a diverse range of vetted numerical differentiation tools under a unified API in Python, or other software languages. - - -# Summary - -`PyNumDiff` is a Python package that implements methods for computing numerical derivatives of noisy data. -In this package, we implement four commonly used families of differentiation methods whose mathematical formulations have different -underlying assumptions, including both global and local methods [@ahnert2007numerical]. The first family of methods usually start by -applying a smoothing filter to the data, followed by a finite difference calculation [@butterworth1930theory]. -The second family relies on building a local model of the data through linear regression, and then analytically -calculating the derivative based on the model [@belytschko1996meshless; @schafer2011savitzky; @savitzky1964smoothing]. -The third family we consider is the Kalman filter [@kalman1960new; @henderson2010fundamentals; @aravkin2017generalized; @crassidis2004optimal], -with unknown noise and process characteristics. The last family is an optimization approach based on total variation -regularization (TVR) method [@rudin1992nonlinear; @chartrand2011numerical]. For more technical details, -refer to @van2020numerical. Individual methods under each family are accessed through the API as `pynumdiff.family.method`. - -Applying `PyNumDiff` usually -takes three steps: (i) pick a differentiation method, (ii) obtain optimized parameters, and (iii) apply the differentiation. -Step (ii) can be skipped if one wants to manually assign the parameters, which is recommended when computation time is limited and the timeseries is long. Alternatively for long timeseries, optimal parameters can be chosen using a short but representative subset of the data. This optimization routine is provided as a sub-module (pynumdiff.optimize) with the same structure of differentiation families (i.e. `pynumdiff.optimize.family.method`). By default, the package performs the optimization using the open source CVXOPT package. Faster solutions can be achieved by using proprietary solvers such as MOSEK. - -The software package includes tutorials in the form of Jupyter notebooks. These tutorials demonstrate the usage of the aforementioned -features. For more detailed information, there is a more comprehensive Sphinx documentation associated with the repository. - -# Acknowledgements - -The work of J. Nathan Kutz was supported by the Air Force Office of Scientific Research under Grant FA9550-19-1-0011 and FA9550-19-1-0386. The work of F. van Breugel was supported by NIH grant P20GM103650, Air Force Research Lab award FA8651-20-1-0002 Airforce Office of Scientific Research FA9550-21-0122. BWB acknowledges support from the Air Force Office of Scientific Research award FA9550-19-1-0386. - -# References -