From bc910702ae3c3f21cc6006cb173a561c89ec304a Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Mon, 9 Dec 2024 15:51:20 +0000 Subject: [PATCH 01/16] feat: New impl --- src/FftComplex.cpp | 149 +++++++++++++++++++++++++++ src/FftComplex.hpp | 61 +++++++++++ src/FftComplexTest.cpp | 177 +++++++++++++++++++++++++++++++ src/complex.hpp | 92 +++++++++++++++++ src/fdct.cpp | 98 ------------------ src/fdct.hpp | 229 +++++++++++++++++++++++++++++++++++++++++ src/fdct_module.cpp | 15 +++ src/fft.cpp | 143 +++++++++++++++++++++++++ src/fft.hpp | 16 +++ test_dct.py | 28 +++-- 10 files changed, 903 insertions(+), 105 deletions(-) create mode 100644 src/FftComplex.cpp create mode 100644 src/FftComplex.hpp create mode 100644 src/FftComplexTest.cpp create mode 100644 src/complex.hpp delete mode 100644 src/fdct.cpp create mode 100644 src/fdct.hpp create mode 100644 src/fdct_module.cpp create mode 100644 src/fft.cpp create mode 100644 src/fft.hpp diff --git a/src/FftComplex.cpp b/src/FftComplex.cpp new file mode 100644 index 0000000..4f842a0 --- /dev/null +++ b/src/FftComplex.cpp @@ -0,0 +1,149 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include "FftComplex.hpp" + +using std::complex; +using std::size_t; +using std::uintmax_t; +using std::vector; + + +// Private function prototypes +static size_t reverseBits(size_t val, int width); + + +void Fft::transform(vector > &vec, bool inverse) { + size_t n = vec.size(); + if (n == 0) + return; + else if ((n & (n - 1)) == 0) // Is power of 2 + transformRadix2(vec, inverse); + else // More complicated algorithm for arbitrary sizes + transformBluestein(vec, inverse); +} + + +void Fft::transformRadix2(vector > &vec, bool inverse) { + // Length variables + size_t n = vec.size(); + int levels = 0; // Compute levels = floor(log2(n)) + for (size_t temp = n; temp > 1U; temp >>= 1) + levels++; + if (static_cast(1U) << levels != n) + throw std::domain_error("Length is not a power of 2"); + + // Trigonometric table + vector > expTable(n / 2); + for (size_t i = 0; i < n / 2; i++) + expTable[i] = std::polar(1.0, (inverse ? 2 : -2) * M_PI * i / n); + + // Bit-reversed addressing permutation + for (size_t i = 0; i < n; i++) { + size_t j = reverseBits(i, levels); + if (j > i) + std::swap(vec[i], vec[j]); + } + + // Cooley-Tukey decimation-in-time radix-2 FFT + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + complex temp = vec[j + halfsize] * expTable[k]; + vec[j + halfsize] = vec[j] - temp; + vec[j] += temp; + } + } + if (size == n) // Prevent overflow in 'size *= 2' + break; + } +} + + +void Fft::transformBluestein(vector > &vec, bool inverse) { + // Find a power-of-2 convolution length m such that m >= n * 2 + 1 + size_t n = vec.size(); + size_t m = 1; + while (m / 2 <= n) { + if (m > SIZE_MAX / 2) + throw std::length_error("Vector too large"); + m *= 2; + } + + // Trigonometric table + vector > expTable(n); + for (size_t i = 0; i < n; i++) { + uintmax_t temp = static_cast(i) * i; + temp %= static_cast(n) * 2; + double angle = (inverse ? M_PI : -M_PI) * temp / n; + expTable[i] = std::polar(1.0, angle); + } + + // Temporary vectors and preprocessing + vector > avec(m); + for (size_t i = 0; i < n; i++) + avec[i] = vec[i] * expTable[i]; + vector > bvec(m); + bvec[0] = expTable[0]; + for (size_t i = 1; i < n; i++) + bvec[i] = bvec[m - i] = std::conj(expTable[i]); + + // Convolution + vector > cvec = convolve(std::move(avec), std::move(bvec)); + + // Postprocessing + for (size_t i = 0; i < n; i++) + vec[i] = cvec[i] * expTable[i]; +} + + +vector > Fft::convolve( + vector > xvec, + vector > yvec) { + + size_t n = xvec.size(); + if (n != yvec.size()) + throw std::domain_error("Mismatched lengths"); + transform(xvec, false); + transform(yvec, false); + for (size_t i = 0; i < n; i++) + xvec[i] *= yvec[i]; + transform(xvec, true); + for (size_t i = 0; i < n; i++) // Scaling (because this FFT implementation omits it) + xvec[i] /= static_cast(n); + return xvec; +} + + +static size_t reverseBits(size_t val, int width) { + size_t result = 0; + for (int i = 0; i < width; i++, val >>= 1) + result = (result << 1) | (val & 1U); + return result; +} diff --git a/src/FftComplex.hpp b/src/FftComplex.hpp new file mode 100644 index 0000000..24eeabb --- /dev/null +++ b/src/FftComplex.hpp @@ -0,0 +1,61 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include +#include +using Complex = std::complex; + +namespace Fft { + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. The inverse transform does not perform scaling, so it is not a true inverse. + */ + void transform(std::vector > &vec, bool inverse); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. + */ + void transformRadix2(std::vector > &vec, bool inverse); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. + * Uses Bluestein's chirp z-transform algorithm. + */ + void transformBluestein(std::vector > &vec, bool inverse); + + + /* + * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. + */ + std::vector > convolve( + std::vector > xvec, + std::vector > yvec); + +} diff --git a/src/FftComplexTest.cpp b/src/FftComplexTest.cpp new file mode 100644 index 0000000..dd960d1 --- /dev/null +++ b/src/FftComplexTest.cpp @@ -0,0 +1,177 @@ +/* + * FFT and convolution test (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "FftComplex.hpp" + +using std::complex; +using std::cout; +using std::endl; +using std::vector; + + +// Private function prototypes +static void testFft(int n); +static void testConvolution(int n); +static vector > naiveDft(const vector > &input, bool inverse); +static vector > naiveConvolve(const vector > &xvec, const vector > &yvec); +static double log10RmsErr(const vector > &xvec, const vector > &yvec); +static vector > randomComplexes(int n); + +// Mutable global variable +static double maxLogError = -INFINITY; + +// Random number generation +std::default_random_engine randGen((std::random_device())()); + + +/*---- Main and test functions ----*/ + +int main() { + // Test power-of-2 size FFTs + for (int i = 0; i <= 12; i++) + testFft(1 << i); + + // Test small size FFTs + for (int i = 0; i < 30; i++) + testFft(i); + + // Test diverse size FFTs + for (int i = 0, prev = 0; i <= 100; i++) { + int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); + if (n > prev) { + testFft(n); + prev = n; + } + } + + // Test power-of-2 size convolutions + for (int i = 0; i <= 12; i++) + testConvolution(1 << i); + + // Test diverse size convolutions + for (int i = 0, prev = 0; i <= 100; i++) { + int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); + if (n > prev) { + testConvolution(n); + prev = n; + } + } + + cout << endl; + cout << "Max log err = " << std::setprecision(3) << maxLogError << endl; + cout << "Test " << (maxLogError < -10 ? "passed" : "failed") << endl; + return EXIT_SUCCESS; +} + + +static void testFft(int n) { + const vector > input = randomComplexes(n); + const vector > expect = naiveDft(input, false); + vector > actual = input; + Fft::transform(actual, false); + double err = log10RmsErr(expect, actual); + + for (auto it = actual.begin(); it != actual.end(); ++it) + *it /= n; + Fft::transform(actual, true); + err = std::max(log10RmsErr(input, actual), err); + cout << "fftsize=" << std::setw(4) << std::setfill(' ') << n << " " + << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) + << err << endl; +} + + +static void testConvolution(int n) { + const vector > input0 = randomComplexes(n); + const vector > input1 = randomComplexes(n); + const vector > expect = naiveConvolve(input0, input1); + const vector > actual = Fft::convolve(std::move(input0), std::move(input1)); + cout << "convsize=" << std::setw(4) << std::setfill(' ') << n << " " + << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) + << log10RmsErr(expect, actual) << endl; +} + + +/*---- Naive reference computation functions ----*/ + +static vector > naiveDft(const vector > &input, bool inverse) { + int n = static_cast(input.size()); + vector > output; + double coef = (inverse ? 2 : -2) * M_PI / n; + for (int k = 0; k < n; k++) { // For each output element + complex sum(0); + for (int t = 0; t < n; t++) { // For each input element + double angle = coef * (static_cast(t) * k % n); + sum += input[t] * std::polar(1.0, angle); + } + output.push_back(sum); + } + return output; +} + + +static vector > naiveConvolve( + const vector > &xvec, const vector > &yvec) { + int n = static_cast(xvec.size()); + vector > result(n); // All zeros + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + int k = (i + j) % n; + result[k] += xvec[i] * yvec[j]; + } + } + return result; +} + + +/*---- Utility functions ----*/ + +static double log10RmsErr(const vector > &xvec, const vector > &yvec) { + int n = static_cast(xvec.size()); + double err = std::pow(10, -99 * 2); + for (int i = 0; i < n; i++) + err += std::norm(xvec.at(i) - yvec.at(i)); + err /= n > 0 ? n : 1; + err = std::sqrt(err); // Now this is a root mean square (RMS) error + err = std::log10(err); + maxLogError = std::max(err, maxLogError); + return err; +} + + +static vector > randomComplexes(int n) { + std::uniform_real_distribution valueDist(-1.0, 1.0); + vector > result; + for (int i = 0; i < n; i++) + result.push_back(complex(valueDist(randGen), valueDist(randGen))); + return result; +} diff --git a/src/complex.hpp b/src/complex.hpp new file mode 100644 index 0000000..23a2b65 --- /dev/null +++ b/src/complex.hpp @@ -0,0 +1,92 @@ +#ifndef COMPLEX_ARRAY_HPP +#define COMPLEX_ARRAY_HPP +#include +#include +#include + + +using Complex = std::complex; + +class ComplexArray { + private: + double *real; + double *imag; + size_t array_size; + + public: + ComplexArray(size_t n) : array_size(n) { + if (n == 0) { + real = nullptr; + imag = nullptr; + } else { + real = new double[n]; + imag = new double[n]; + } + } + + ~ComplexArray() { + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + } + + ComplexArray(const ComplexArray &other) : array_size(other.array_size) { + real = new double[array_size]; + imag = new double[array_size]; + std::copy(other.real, other.real + array_size, real); + std::copy(other.imag, other.imag + array_size, imag); + } + + ComplexArray &operator=(const ComplexArray &other) { + if (this == &other) { + return *this; + } + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + + array_size = other.array_size; + real = new double[array_size]; + imag = new double[array_size]; + + for (size_t i = 0; i < array_size; i++) { + real[i] = other.real[i]; + imag[i] = other.imag[i]; + } + + return *this; + } + + Complex operator[](size_t index) const { + return Complex(real[index], imag[index]); + } + + size_t size() const { + return array_size; + } + + double* get_real_ptr() { + return real; + } + + const double* get_real_ptr() const { + return real; + } + + double* get_imag_ptr() { + return imag; + } + + const double* get_imag_ptr() const { + return imag; + } + +}; + +#endif diff --git a/src/fdct.cpp b/src/fdct.cpp deleted file mode 100644 index 5163348..0000000 --- a/src/fdct.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include -#include -#include -#include -#include - -namespace py = pybind11; -#define _USE_MATH_DEFINES -#include -#include - -using data_t = double; - -constexpr data_t SQRT2_1 = 0.7071067811865475; -constexpr data_t ONE_SUB_SQRT2_1 = 1 - 0.7071067811865475; - -static inline data_t e(int n) { - // if (n == 0) - // return SQRT2_1; - return 1.0f; -} - - -static inline data_t dct_naive_single(data_t *buf, size_t buf_size, size_t i) { - data_t sum = 0.0f; - for (size_t j = 0; j < buf_size; j++) { - // sum += buf[j] * cos((2 * j + 1) * i * M_PI / (2 * buf_size)); - sum += buf[j] * cos(M_PI / buf_size * i * (j + 0.5)); - } - return sum; -} - -static inline data_t idct_naive_single(data_t *buf, size_t buf_size, size_t i) { - data_t sum = 0.0; - for (size_t j = 0; j < buf_size; j++) { - sum += buf[j] * cos(M_PI * i * (2 * j + 1.0) / (2 * buf_size)); - } - return sum; -} - -void dct_naive(data_t *buf, size_t buf_size, data_t *out) { - - for (int i = 0; i < buf_size; i++) { - out[i] = 2.0 * e(i) * dct_naive_single(buf, buf_size, i); - } -} - -void idct_naive(data_t *buf, size_t buf_size, data_t *out) { - - for (int i = 0; i < buf_size; i++) { - out[i] = (buf[0] + 2 * idct_naive_single(buf, buf_size, i)) / buf_size; - } -} - -double err(data_t* buf1, data_t* buf2, size_t buf_size) { - double sum = 0.0; - for(int i = 0; i < buf_size; i++) - sum += abs(buf1[i] - buf2[i]); - return sum; -} - -py::array fct(py::array_t in_arr) { - py::buffer_info buf_info = in_arr.request(); - double *ptr = static_cast(buf_info.ptr); - size_t buf_size = buf_info.size; - - std::cout << "fct: Get array with " << buf_info.size << std::endl; - std::cout << "buf: " << ptr[0] << " " << ptr[1] << std::endl; - - py::array_t out_arr(buf_info.shape); - - double *out_buf = static_cast(out_arr.request(true).ptr); - dct_naive(ptr, buf_size, out_buf); - - return out_arr; -} - -py::array ifct(py::array_t in_arr) { - py::buffer_info buf_info = in_arr.request(); - double *ptr = static_cast(buf_info.ptr); - size_t buf_size = buf_info.size; - - std::cout << "ifct: Get array with " << buf_info.size << std::endl; - std::cout << "buf: " << ptr[0] << " " << ptr[1] << std::endl; - - py::array_t out_arr(buf_info.shape); - - double *out_buf = static_cast(out_arr.request(true).ptr); - idct_naive(ptr, buf_size, out_buf); - - return out_arr; - -} - -PYBIND11_MODULE(fdct, m) { - m.def("fct", &fct, "Compute fast discrete cosine transformation."); - m.def("ifct", &ifct, "Compute inverse fast discrete cosine transformation."); -} diff --git a/src/fdct.hpp b/src/fdct.hpp new file mode 100644 index 0000000..e83eaa5 --- /dev/null +++ b/src/fdct.hpp @@ -0,0 +1,229 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "FftComplex.hpp" +#include "complex.hpp" +#include "fft.hpp" + +namespace py = pybind11; +#define _USE_MATH_DEFINES + +using data_t = double; + +constexpr data_t SQRT2_1 = 0.7071067811865475; +constexpr data_t ONE_SUB_SQRT2_1 = 1 - 0.7071067811865475; + +static inline data_t e(int n) { return 1.0f; } + +ComplexArray reorderInput(const ComplexArray &x) { + size_t N = x.size(); + ComplexArray u(N); + const double *x_real = x.get_real_ptr(); + const double *x_imag = x.get_imag_ptr(); + double *u_real = u.get_real_ptr(); + double *u_imag = u.get_imag_ptr(); + + for (size_t n = 0; n < N / 2; ++n) { + u_real[n] = x_real[2 * n]; + u_imag[n] = x_imag[2 * n]; + u_real[N - n - 1] = x_real[2 * n + 1]; + u_imag[N - n - 1] = x_imag[2 * n + 1]; + } + return u; +} + +std::vector reorderOutput(const std::vector &x) { + size_t N = x.size(); + std::vector reordered(N); + + for (size_t n = 0; n < N / 2; ++n) { + reordered[2 * n] = x[n]; + reordered[2 * n + 1] = x[N - n - 1]; + } + return reordered; +} + +// Function to compute the Fast Cosine Transform +std::vector fct_inner(ComplexArray &x) { + size_t N = x.size(); + std::vector dct(N); + auto u = reorderInput(x); + ComplexArray fft_in(N); + std::vector > vc; + // Fill FFT input array + for (size_t n = 0; n < N; ++n) { + vc.push_back(std::complex(u.get_real_ptr()[n], u.get_imag_ptr()[n])); + fft_in.get_real_ptr()[n] = u.get_real_ptr()[n]; + fft_in.get_imag_ptr()[n] = u.get_imag_ptr()[n]; + } + + // Perform FFT + fft::transform(fft_in, false); + Fft::transform(vc, false); + + for (int n = 0; n < N; ++n) { + std::cout << fft_in.get_real_ptr()[n] << " " << vc[n].real() << std::endl; + } + + // Compute the DCT using the FFT output + for (size_t k = 0; k < N; ++k) { + double real = fft_in[k].real(); + double imag = fft_in[k].imag(); + dct[k] = + (real * cos(M_PI * k / (2 * N)) - imag * sin(-M_PI * k / (2 * N))) * 2; + } + return dct; +} + +// Wrapper function to accept and return NumPy arrays +py::array +fdct(py::array_t in_arr) { + // Request a buffer from the input array + py::buffer_info buf = in_arr.request(); + if (buf.ndim != 1) { + throw std::runtime_error("Input array must be 1-dimensional."); + } + + // Convert input NumPy array to std::vector + size_t N = buf.shape[0]; + ComplexArray input_data(N); + std::copy(static_cast(buf.ptr), static_cast(buf.ptr) + N, + input_data.get_real_ptr()); + std::fill(input_data.get_imag_ptr(), input_data.get_imag_ptr() + N, 0.0); + // Perform FCT + std::vector result = fct_inner(input_data); + + // Convert result back to NumPy array + return py::array_t(result.size(), result.data()); +} + +std::vector idct_inner(const std::vector &c) { + size_t N = c.size(); + std::vector x(N); + auto u = reorderOutput(c); + + // Allocate FFTW input/output arrays + fftw_complex *fft_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N); + fftw_complex *fft_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N); + + // Initialize FFTW plan + fftw_plan plan = + fftw_plan_dft_1d(N, fft_in, fft_out, FFTW_BACKWARD, FFTW_ESTIMATE); + + // Fill FFTW input array + for (size_t n = 0; n < N; ++n) { + fft_in[n][0] = u[n]; // Real part + fft_in[n][1] = 0.0; // Imaginary part + } + + // Execute IFFT + fftw_execute(plan); + + // Compute the IDCT using the IFFT output + for (size_t k = 0; k < N; ++k) { + double real = fft_out[k][0]; + double imag = fft_out[k][1]; + x[k] = + (real * cos(M_PI * k / (2.0 * N)) + imag * sin(M_PI * k / (2.0 * N))) / + N; + } + + // Free FFTW resources + fftw_destroy_plan(plan); + fftw_free(fft_in); + fftw_free(fft_out); + + return x; +} + +// Wrapper function to accept and return NumPy arrays +py::array +ifdct(py::array_t in_arr) { + // Request a buffer from the input array + py::buffer_info buf = in_arr.request(); + if (buf.ndim != 1) { + throw std::runtime_error("Input array must be 1-dimensional."); + } + + // Convert input NumPy array to std::vector + size_t N = buf.shape[0]; + std::vector input_data(static_cast(buf.ptr), + static_cast(buf.ptr) + N); + + // Perform IDCT + std::vector result = idct_inner(input_data); + + // Convert result back to NumPy array + return py::array_t(result.size(), result.data()); +} + +static inline data_t dct_naive_single(data_t *buf, size_t buf_size, size_t i) { + data_t sum = 0.0f; + for (size_t j = 0; j < buf_size; j++) { + // sum += buf[j] * cos((2 * j + 1) * i * M_PI / (2 * buf_size)); + sum += buf[j] * cos(M_PI / buf_size * i * (j + 0.5)); + } + return sum; +} + +static inline data_t idct_naive_single(data_t *buf, size_t buf_size, size_t i) { + data_t sum = 0.0; + for (size_t j = 0; j < buf_size; j++) { + sum += buf[j] * cos(M_PI * i * (2 * j + 1.0) / (2 * buf_size)); + } + return sum; +} + +void dct_naive_inner(data_t *buf, size_t buf_size, data_t *out) { + for (int i = 0; i < buf_size; i++) { + out[i] = 2.0 * e(i) * dct_naive_single(buf, buf_size, i); + } +} + +void idct_naive_inner(data_t *buf, size_t buf_size, data_t *out) { + for (int i = 0; i < buf_size; i++) { + out[i] = (buf[0] + 2 * idct_naive_single(buf, buf_size, i)) / buf_size; + } +} + +double err(data_t *buf1, data_t *buf2, size_t buf_size) { + double sum = 0.0; + for (int i = 0; i < buf_size; i++) + sum += abs(buf1[i] - buf2[i]); + return sum; +} + +py::array dct_naive( + py::array_t in_arr) { + py::buffer_info buf_info = in_arr.request(); + double *ptr = static_cast(buf_info.ptr); + size_t buf_size = buf_info.size; + + py::array_t out_arr(buf_info.shape); + + double *out_buf = static_cast(out_arr.request(true).ptr); + dct_naive_inner(ptr, buf_size, out_buf); + + return out_arr; +} + +py::array idct_naive( + py::array_t in_arr) { + py::buffer_info buf_info = in_arr.request(); + double *ptr = static_cast(buf_info.ptr); + size_t buf_size = buf_info.size; + + py::array_t out_arr(buf_info.shape); + + double *out_buf = static_cast(out_arr.request(true).ptr); + idct_naive_inner(ptr, buf_size, out_buf); + + return out_arr; +} diff --git a/src/fdct_module.cpp b/src/fdct_module.cpp new file mode 100644 index 0000000..9a0135a --- /dev/null +++ b/src/fdct_module.cpp @@ -0,0 +1,15 @@ +#include +#include +#include +#include "fdct.hpp" + +namespace py = pybind11; +#define _USE_MATH_DEFINES +#include + +PYBIND11_MODULE(fdct, m) { + m.def("dct_naive", &dct_naive, "Compute fast discrete cosine transformation."); + m.def("idct_naive", &idct_naive, "Compute inverse fast discrete cosine transformation."); + m.def("fdct", &fdct, ""); + m.def("ifdct", &ifdct, ""); +} diff --git a/src/fft.cpp b/src/fft.cpp new file mode 100644 index 0000000..40f0948 --- /dev/null +++ b/src/fft.cpp @@ -0,0 +1,143 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "complex.hpp" +#include "fft.hpp" + +static size_t reverseBits(size_t val, int width); + +void fft::transformRadix2(ComplexArray &x, bool inverse) { + int levels = 0; + size_t n = x.size(); + double *real = x.get_imag_ptr(); + double *imag = x.get_real_ptr(); + + for (size_t temp = n; temp > 1U; temp >>= 1) + levels++; + if (static_cast(1U) << levels != n) + throw std::domain_error("Length is not a power of 2"); + + double *real_expTable = new double[n / 2]; + double *imag_expTable = new double[n / 2]; + + for (size_t i = 0; i < n / 2; i++) { + double angle = (inverse ? 2 : -2) * M_PI * i / n; + real_expTable[i] = cos(angle); + imag_expTable[i] = sin(angle); + } + + for (size_t i = 0; i < n; i++) { + size_t j = reverseBits(i, levels); + if (j > i) { + std::swap(real[i], real[j]); + std::swap(imag[i], imag[j]); + } + } + + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + double temp_real = real[j + halfsize] * real_expTable[k] - + imag[j + halfsize] * imag_expTable[k]; + double temp_imag = real[j + halfsize] * imag_expTable[k] + + imag[j + halfsize] * real_expTable[k]; + real[j + halfsize] = real[j] - temp_real; + imag[j + halfsize] = imag[j] - temp_imag; + real[j] += temp_real; + imag[j] += temp_imag; + } + } + if (size == n) + break; + } +} + +void fft::transformBluestein(ComplexArray &x, bool inverse) { + size_t n = x.size(); + double *real = x.get_imag_ptr(); + double *imag = x.get_real_ptr(); + size_t m = 1; + while (m / 2 <= n) { + if (m > SIZE_MAX / 2) + throw std::length_error("Vector too large"); + m *= 2; + } + + // Trigonometric table + double *real_expTable = new double[n]; + double *imag_expTable = new double[n]; + for (size_t i = 0; i < n; i++) { + uintmax_t temp = static_cast(i) * i; + temp %= static_cast(n) * 2; + double angle = (inverse ? M_PI : -M_PI) * temp / n; + real_expTable[i] = cos(angle); + imag_expTable[i] = sin(angle); + } + + double *real_avec = new double[m]; + double *imag_avec = new double[m]; + for (size_t i = 0; i < n; i++) { + real_avec[i] = real[i] * real_expTable[i] - imag[i] * imag_expTable[i]; + imag_avec[i] = real[i] * imag_expTable[i] + imag[i] * real_expTable[i]; + } + double *real_bvec = new double[m]; + double *imag_bvec = new double[m]; + real_bvec[0] = real_expTable[0]; + imag_bvec[0] = imag_expTable[0]; + for (size_t i = 1; i < n; ++i) { + real_bvec[i] = real_bvec[m - i] = real_expTable[i]; + imag_bvec[i] = imag_bvec[m - i] = imag_expTable[i]; + } + + ComplexArray c = convolove(, y); +} + +ComplexArray fft::convolove(ComplexArray x, ComplexArray y) { + size_t n = x.size(); + if (n != y.size()) + throw std::domain_error("Mismatched lengths"); + transform(x, false); + transform(y, false); + + double *xr = x.get_real_ptr(); + double *yr = y.get_real_ptr(); + double *xi = x.get_imag_ptr(); + double *yi = y.get_imag_ptr(); + + for (size_t i = 0; i < n; ++i) { + double xrn = xr[i] * yr[i] - xi[i] * yi[i]; + double xin = xr[i] * yi[i] + xi[i] * yr[i]; + xr[i] = xrn; + xi[i] = xin; + } + transform(x, true); + for (size_t i = 0; i < n; ++i) { + xr[i] /= static_cast(n); + xi[i] /= static_cast(n); + } + return x; +} + +void fft::transform(ComplexArray &x, bool inverse) { + size_t n = x.size(); + if (n == 0) + return; + else if ((n & (n - 1)) == 0) // Is power of 2 + transformRadix2(x, inverse); + else // More complicated algorithm for arbitrary sizes + transformBluestein(x, inverse); +} + +static size_t reverseBits(size_t val, int width) { + size_t result = 0; + for (int i = 0; i < width; i++, val >>= 1) + result = (result << 1) | (val & 1U); + return result; +} diff --git a/src/fft.hpp b/src/fft.hpp new file mode 100644 index 0000000..8813af0 --- /dev/null +++ b/src/fft.hpp @@ -0,0 +1,16 @@ +#ifndef FFT_H +#define FFT_H + +#include "complex.hpp" + +namespace fft { + void transformRadix2(ComplexArray &x, bool inverse); + + void transformBluestein(ComplexArray &x, bool inverse); + + void transform(ComplexArray &x, bool inverse); + + ComplexArray convolove(const ComplexArray &x, const ComplexArray &y); +} + +#endif // FFT_H diff --git a/test_dct.py b/test_dct.py index a8707cc..3ed77ee 100644 --- a/test_dct.py +++ b/test_dct.py @@ -1,25 +1,39 @@ import numpy as np import fdct -import scipy +import scipy.fft as fft def test_dct(): inp = np.random.rand(1024) - gt = scipy.fft.dct(inp) - ans = fdct.fct(inp) + gt = fft.dct(inp) + ans = fdct.dct_naive(inp) s = sum((gt - ans) ** 2) / 1024 assert s < 0.1 def test_idct(): inp = np.random.rand(1024) - gt = scipy.fft.idct(inp) - ans = fdct.ifct(inp) - print(gt) - print(ans) + gt = fft.idct(inp) + ans = fdct.idct_naive(inp) + s = sum((gt - ans) ** 2) / 1024 assert s < 0.1 +def test_fdct(): + inp = np.random.rand(10) + gt = fft.dct(inp) + ans = fdct.fdct(inp) + s = sum((gt - ans) ** 2) / 10 + print(gt) + print(ans) + assert s < 0.1 +def test_ifdct(): + inp = np.random.rand(10) + gt = fft.idct(inp) + ans = fdct.ifdct(inp) + s = sum((gt - ans) ** 2) / 10 + assert s < 0.1 if __name__ == "__main__": test_dct() test_idct() + test_fdct() From 8fc4db4d2b3a0755ca09428ce4c26f966345a893 Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Wed, 11 Dec 2024 15:13:15 +0000 Subject: [PATCH 02/16] chore: rearrangement C++ files --- src/FftComplex.cpp | 149 ---------------------------------- src/FftComplex.hpp | 61 -------------- src/FftComplexTest.cpp | 177 ----------------------------------------- src/complex.hpp | 92 --------------------- src/complex_array.hpp | 148 ++++++++++++++++++++++++++++++++++ src/dct_naive.hpp | 72 +++++++++++++++++ src/fdct.hpp | 141 ++++---------------------------- src/fdct_inner.cpp | 83 +++++++++++++++++++ src/fdct_inner.hpp | 17 ++++ src/fdct_module.cpp | 5 +- src/fft.cpp | 49 +++++++----- src/fft.hpp | 17 ++-- 12 files changed, 376 insertions(+), 635 deletions(-) delete mode 100644 src/FftComplex.cpp delete mode 100644 src/FftComplex.hpp delete mode 100644 src/FftComplexTest.cpp delete mode 100644 src/complex.hpp create mode 100644 src/complex_array.hpp create mode 100644 src/dct_naive.hpp create mode 100644 src/fdct_inner.cpp create mode 100644 src/fdct_inner.hpp diff --git a/src/FftComplex.cpp b/src/FftComplex.cpp deleted file mode 100644 index 4f842a0..0000000 --- a/src/FftComplex.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Free FFT and convolution (C++) - * - * Copyright (c) 2021 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/free-small-fft-in-multiple-languages - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#include -#include -#include -#include -#include "FftComplex.hpp" - -using std::complex; -using std::size_t; -using std::uintmax_t; -using std::vector; - - -// Private function prototypes -static size_t reverseBits(size_t val, int width); - - -void Fft::transform(vector > &vec, bool inverse) { - size_t n = vec.size(); - if (n == 0) - return; - else if ((n & (n - 1)) == 0) // Is power of 2 - transformRadix2(vec, inverse); - else // More complicated algorithm for arbitrary sizes - transformBluestein(vec, inverse); -} - - -void Fft::transformRadix2(vector > &vec, bool inverse) { - // Length variables - size_t n = vec.size(); - int levels = 0; // Compute levels = floor(log2(n)) - for (size_t temp = n; temp > 1U; temp >>= 1) - levels++; - if (static_cast(1U) << levels != n) - throw std::domain_error("Length is not a power of 2"); - - // Trigonometric table - vector > expTable(n / 2); - for (size_t i = 0; i < n / 2; i++) - expTable[i] = std::polar(1.0, (inverse ? 2 : -2) * M_PI * i / n); - - // Bit-reversed addressing permutation - for (size_t i = 0; i < n; i++) { - size_t j = reverseBits(i, levels); - if (j > i) - std::swap(vec[i], vec[j]); - } - - // Cooley-Tukey decimation-in-time radix-2 FFT - for (size_t size = 2; size <= n; size *= 2) { - size_t halfsize = size / 2; - size_t tablestep = n / size; - for (size_t i = 0; i < n; i += size) { - for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { - complex temp = vec[j + halfsize] * expTable[k]; - vec[j + halfsize] = vec[j] - temp; - vec[j] += temp; - } - } - if (size == n) // Prevent overflow in 'size *= 2' - break; - } -} - - -void Fft::transformBluestein(vector > &vec, bool inverse) { - // Find a power-of-2 convolution length m such that m >= n * 2 + 1 - size_t n = vec.size(); - size_t m = 1; - while (m / 2 <= n) { - if (m > SIZE_MAX / 2) - throw std::length_error("Vector too large"); - m *= 2; - } - - // Trigonometric table - vector > expTable(n); - for (size_t i = 0; i < n; i++) { - uintmax_t temp = static_cast(i) * i; - temp %= static_cast(n) * 2; - double angle = (inverse ? M_PI : -M_PI) * temp / n; - expTable[i] = std::polar(1.0, angle); - } - - // Temporary vectors and preprocessing - vector > avec(m); - for (size_t i = 0; i < n; i++) - avec[i] = vec[i] * expTable[i]; - vector > bvec(m); - bvec[0] = expTable[0]; - for (size_t i = 1; i < n; i++) - bvec[i] = bvec[m - i] = std::conj(expTable[i]); - - // Convolution - vector > cvec = convolve(std::move(avec), std::move(bvec)); - - // Postprocessing - for (size_t i = 0; i < n; i++) - vec[i] = cvec[i] * expTable[i]; -} - - -vector > Fft::convolve( - vector > xvec, - vector > yvec) { - - size_t n = xvec.size(); - if (n != yvec.size()) - throw std::domain_error("Mismatched lengths"); - transform(xvec, false); - transform(yvec, false); - for (size_t i = 0; i < n; i++) - xvec[i] *= yvec[i]; - transform(xvec, true); - for (size_t i = 0; i < n; i++) // Scaling (because this FFT implementation omits it) - xvec[i] /= static_cast(n); - return xvec; -} - - -static size_t reverseBits(size_t val, int width) { - size_t result = 0; - for (int i = 0; i < width; i++, val >>= 1) - result = (result << 1) | (val & 1U); - return result; -} diff --git a/src/FftComplex.hpp b/src/FftComplex.hpp deleted file mode 100644 index 24eeabb..0000000 --- a/src/FftComplex.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Free FFT and convolution (C++) - * - * Copyright (c) 2021 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/free-small-fft-in-multiple-languages - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#pragma once - -#include -#include -using Complex = std::complex; - -namespace Fft { - - /* - * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. - * The vector can have any length. This is a wrapper function. The inverse transform does not perform scaling, so it is not a true inverse. - */ - void transform(std::vector > &vec, bool inverse); - - - /* - * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. - * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. - */ - void transformRadix2(std::vector > &vec, bool inverse); - - - /* - * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. - * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. - * Uses Bluestein's chirp z-transform algorithm. - */ - void transformBluestein(std::vector > &vec, bool inverse); - - - /* - * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. - */ - std::vector > convolve( - std::vector > xvec, - std::vector > yvec); - -} diff --git a/src/FftComplexTest.cpp b/src/FftComplexTest.cpp deleted file mode 100644 index dd960d1..0000000 --- a/src/FftComplexTest.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* - * FFT and convolution test (C++) - * - * Copyright (c) 2021 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/free-small-fft-in-multiple-languages - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "FftComplex.hpp" - -using std::complex; -using std::cout; -using std::endl; -using std::vector; - - -// Private function prototypes -static void testFft(int n); -static void testConvolution(int n); -static vector > naiveDft(const vector > &input, bool inverse); -static vector > naiveConvolve(const vector > &xvec, const vector > &yvec); -static double log10RmsErr(const vector > &xvec, const vector > &yvec); -static vector > randomComplexes(int n); - -// Mutable global variable -static double maxLogError = -INFINITY; - -// Random number generation -std::default_random_engine randGen((std::random_device())()); - - -/*---- Main and test functions ----*/ - -int main() { - // Test power-of-2 size FFTs - for (int i = 0; i <= 12; i++) - testFft(1 << i); - - // Test small size FFTs - for (int i = 0; i < 30; i++) - testFft(i); - - // Test diverse size FFTs - for (int i = 0, prev = 0; i <= 100; i++) { - int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); - if (n > prev) { - testFft(n); - prev = n; - } - } - - // Test power-of-2 size convolutions - for (int i = 0; i <= 12; i++) - testConvolution(1 << i); - - // Test diverse size convolutions - for (int i = 0, prev = 0; i <= 100; i++) { - int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); - if (n > prev) { - testConvolution(n); - prev = n; - } - } - - cout << endl; - cout << "Max log err = " << std::setprecision(3) << maxLogError << endl; - cout << "Test " << (maxLogError < -10 ? "passed" : "failed") << endl; - return EXIT_SUCCESS; -} - - -static void testFft(int n) { - const vector > input = randomComplexes(n); - const vector > expect = naiveDft(input, false); - vector > actual = input; - Fft::transform(actual, false); - double err = log10RmsErr(expect, actual); - - for (auto it = actual.begin(); it != actual.end(); ++it) - *it /= n; - Fft::transform(actual, true); - err = std::max(log10RmsErr(input, actual), err); - cout << "fftsize=" << std::setw(4) << std::setfill(' ') << n << " " - << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) - << err << endl; -} - - -static void testConvolution(int n) { - const vector > input0 = randomComplexes(n); - const vector > input1 = randomComplexes(n); - const vector > expect = naiveConvolve(input0, input1); - const vector > actual = Fft::convolve(std::move(input0), std::move(input1)); - cout << "convsize=" << std::setw(4) << std::setfill(' ') << n << " " - << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) - << log10RmsErr(expect, actual) << endl; -} - - -/*---- Naive reference computation functions ----*/ - -static vector > naiveDft(const vector > &input, bool inverse) { - int n = static_cast(input.size()); - vector > output; - double coef = (inverse ? 2 : -2) * M_PI / n; - for (int k = 0; k < n; k++) { // For each output element - complex sum(0); - for (int t = 0; t < n; t++) { // For each input element - double angle = coef * (static_cast(t) * k % n); - sum += input[t] * std::polar(1.0, angle); - } - output.push_back(sum); - } - return output; -} - - -static vector > naiveConvolve( - const vector > &xvec, const vector > &yvec) { - int n = static_cast(xvec.size()); - vector > result(n); // All zeros - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - int k = (i + j) % n; - result[k] += xvec[i] * yvec[j]; - } - } - return result; -} - - -/*---- Utility functions ----*/ - -static double log10RmsErr(const vector > &xvec, const vector > &yvec) { - int n = static_cast(xvec.size()); - double err = std::pow(10, -99 * 2); - for (int i = 0; i < n; i++) - err += std::norm(xvec.at(i) - yvec.at(i)); - err /= n > 0 ? n : 1; - err = std::sqrt(err); // Now this is a root mean square (RMS) error - err = std::log10(err); - maxLogError = std::max(err, maxLogError); - return err; -} - - -static vector > randomComplexes(int n) { - std::uniform_real_distribution valueDist(-1.0, 1.0); - vector > result; - for (int i = 0; i < n; i++) - result.push_back(complex(valueDist(randGen), valueDist(randGen))); - return result; -} diff --git a/src/complex.hpp b/src/complex.hpp deleted file mode 100644 index 23a2b65..0000000 --- a/src/complex.hpp +++ /dev/null @@ -1,92 +0,0 @@ -#ifndef COMPLEX_ARRAY_HPP -#define COMPLEX_ARRAY_HPP -#include -#include -#include - - -using Complex = std::complex; - -class ComplexArray { - private: - double *real; - double *imag; - size_t array_size; - - public: - ComplexArray(size_t n) : array_size(n) { - if (n == 0) { - real = nullptr; - imag = nullptr; - } else { - real = new double[n]; - imag = new double[n]; - } - } - - ~ComplexArray() { - if (real != nullptr) { - delete[] real; - } - if (imag != nullptr) { - delete[] imag; - } - } - - ComplexArray(const ComplexArray &other) : array_size(other.array_size) { - real = new double[array_size]; - imag = new double[array_size]; - std::copy(other.real, other.real + array_size, real); - std::copy(other.imag, other.imag + array_size, imag); - } - - ComplexArray &operator=(const ComplexArray &other) { - if (this == &other) { - return *this; - } - if (real != nullptr) { - delete[] real; - } - if (imag != nullptr) { - delete[] imag; - } - - array_size = other.array_size; - real = new double[array_size]; - imag = new double[array_size]; - - for (size_t i = 0; i < array_size; i++) { - real[i] = other.real[i]; - imag[i] = other.imag[i]; - } - - return *this; - } - - Complex operator[](size_t index) const { - return Complex(real[index], imag[index]); - } - - size_t size() const { - return array_size; - } - - double* get_real_ptr() { - return real; - } - - const double* get_real_ptr() const { - return real; - } - - double* get_imag_ptr() { - return imag; - } - - const double* get_imag_ptr() const { - return imag; - } - -}; - -#endif diff --git a/src/complex_array.hpp b/src/complex_array.hpp new file mode 100644 index 0000000..8d0762c --- /dev/null +++ b/src/complex_array.hpp @@ -0,0 +1,148 @@ +#ifndef COMPLEX_ARRAY_HPP +#define COMPLEX_ARRAY_HPP +#include +#include +#include + +using Complex = std::complex; + +class ComplexProxy { + + double *real_ptr; + double *imag_ptr; + +public: + + double &real() { return *real_ptr; } + double &imag() { return *imag_ptr; } + + ComplexProxy(double *real_ptr, double *imag_ptr) + : real_ptr(real_ptr), imag_ptr(imag_ptr) {} + + ComplexProxy(ComplexProxy &&rhs) noexcept + : real_ptr(rhs.real_ptr), imag_ptr(rhs.imag_ptr) { + rhs.real_ptr = nullptr; + rhs.imag_ptr = nullptr; + } + + operator Complex() const { return Complex(*real_ptr, *imag_ptr); } + + + ComplexProxy &operator=(const Complex &c) { + *real_ptr = c.real(); + *imag_ptr = c.imag(); + return *this; + } +}; + +class ComplexArray { +private: + double *real; + double *imag; + size_t array_size; + +public: + ComplexArray(size_t n) : array_size(n) { + if (n == 0) { + real = nullptr; + imag = nullptr; + } else { + real = new double[n]; + std::fill(real, real + n, 0.0); + imag = new double[n]; + std::fill(imag, imag + n, 0.0); + } + } + + ~ComplexArray() { + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + } + + ComplexArray(const ComplexArray &other) : array_size(other.array_size) { + real = new double[array_size]; + imag = new double[array_size]; + std::copy(other.real, other.real + array_size, real); + std::copy(other.imag, other.imag + array_size, imag); + } + + ComplexArray(ComplexArray &&rhs) noexcept + : real(rhs.real), imag(rhs.imag), array_size(rhs.array_size) { + rhs.real = nullptr; + rhs.imag = nullptr; + rhs.array_size = 0; + } + + ComplexArray &operator=(const ComplexArray &other) { + if (this == &other) { + return *this; + } + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + + array_size = other.array_size; + real = new double[array_size]; + imag = new double[array_size]; + + for (size_t i = 0; i < array_size; i++) { + real[i] = other.real[i]; + imag[i] = other.imag[i]; + } + + return *this; + } + + ComplexArray operator*(const ComplexArray &other) const { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + + ComplexArray result(array_size); + for (size_t i = 0; i < array_size; i++) { + result.real[i] = real[i] * other.real[i] - imag[i] * other.imag[i]; + result.imag[i] = real[i] * other.imag[i] + imag[i] * other.real[i]; + } + return result; + } + + ComplexArray operator+(const ComplexArray &other) const { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + + ComplexArray result(array_size); + for (size_t i = 0; i < array_size; i++) { + result.real[i] = real[i] + other.real[i]; + result.imag[i] = imag[i] + other.imag[i]; + } + return result; + } + + Complex operator[](size_t index) const { + return Complex(real[index], imag[index]); + } + + ComplexProxy operator[](size_t index) { + return ComplexProxy(real + index, imag + index); + } + + size_t size() const { return array_size; } + + double *get_real_ptr() { return real; } + + const double *get_real_ptr() const { return real; } + + double *get_imag_ptr() { return imag; } + + const double *get_imag_ptr() const { return imag; } +}; + +#endif diff --git a/src/dct_naive.hpp b/src/dct_naive.hpp new file mode 100644 index 0000000..ffbae89 --- /dev/null +++ b/src/dct_naive.hpp @@ -0,0 +1,72 @@ +#pragma once +#include +#include +#include + +#include + + +namespace py = pybind11; +#define _USE_MATH_DEFINES + +constexpr double SQRT2_1 = 0.7071067811865475; +constexpr double ONE_SUB_SQRT2_1 = 1 - 0.7071067811865475; + +static inline double e(int n) { return 1.0f; } + +static inline double dct_naive_single(double *buf, size_t buf_size, size_t i) { + double sum = 0.0f; + for (size_t j = 0; j < buf_size; j++) { + // sum += buf[j] * cos((2 * j + 1) * i * M_PI / (2 * buf_size)); + sum += buf[j] * cos(M_PI / buf_size * i * (j + 0.5)); + } + return sum; +} + +static inline double idct_naive_single(double *buf, size_t buf_size, size_t i) { + double sum = 0.0; + for (size_t j = 0; j < buf_size; j++) { + sum += buf[j] * cos(M_PI * i * (2 * j + 1.0) / (2 * buf_size)); + } + return sum; +} + +void dct_naive_inner(double *buf, size_t buf_size, double *out) { + for (int i = 0; i < buf_size; i++) { + out[i] = 2.0 * e(i) * dct_naive_single(buf, buf_size, i); + } +} + +void idct_naive_inner(double *buf, size_t buf_size, double *out) { + for (int i = 0; i < buf_size; i++) { + out[i] = (buf[0] + 2 * idct_naive_single(buf, buf_size, i)) / buf_size; + } +} + +py::array dct_naive( + py::array_t in_arr) { + py::buffer_info buf_info = in_arr.request(); + double *ptr = static_cast(buf_info.ptr); + size_t buf_size = buf_info.size; + + py::array_t out_arr(buf_info.shape); + + double *out_buf = static_cast(out_arr.request(true).ptr); + dct_naive_inner(ptr, buf_size, out_buf); + + return out_arr; +} + +py::array idct_naive( + py::array_t in_arr) { + py::buffer_info buf_info = in_arr.request(); + double *ptr = static_cast(buf_info.ptr); + size_t buf_size = buf_info.size; + + py::array_t out_arr(buf_info.shape); + + double *out_buf = static_cast(out_arr.request(true).ptr); + idct_naive_inner(ptr, buf_size, out_buf); + + return out_arr; +} diff --git a/src/fdct.hpp b/src/fdct.hpp index e83eaa5..afe7665 100644 --- a/src/fdct.hpp +++ b/src/fdct.hpp @@ -1,43 +1,25 @@ +#pragma once + #include #include #include #include #include +#include #include #include #include #include "FftComplex.hpp" -#include "complex.hpp" +#include "complex_array.hpp" #include "fft.hpp" +#include "fdct_inner.hpp" + namespace py = pybind11; #define _USE_MATH_DEFINES -using data_t = double; - -constexpr data_t SQRT2_1 = 0.7071067811865475; -constexpr data_t ONE_SUB_SQRT2_1 = 1 - 0.7071067811865475; - -static inline data_t e(int n) { return 1.0f; } - -ComplexArray reorderInput(const ComplexArray &x) { - size_t N = x.size(); - ComplexArray u(N); - const double *x_real = x.get_real_ptr(); - const double *x_imag = x.get_imag_ptr(); - double *u_real = u.get_real_ptr(); - double *u_imag = u.get_imag_ptr(); - - for (size_t n = 0; n < N / 2; ++n) { - u_real[n] = x_real[2 * n]; - u_imag[n] = x_imag[2 * n]; - u_real[N - n - 1] = x_real[2 * n + 1]; - u_imag[N - n - 1] = x_imag[2 * n + 1]; - } - return u; -} std::vector reorderOutput(const std::vector &x) { size_t N = x.size(); @@ -50,38 +32,6 @@ std::vector reorderOutput(const std::vector &x) { return reordered; } -// Function to compute the Fast Cosine Transform -std::vector fct_inner(ComplexArray &x) { - size_t N = x.size(); - std::vector dct(N); - auto u = reorderInput(x); - ComplexArray fft_in(N); - std::vector > vc; - // Fill FFT input array - for (size_t n = 0; n < N; ++n) { - vc.push_back(std::complex(u.get_real_ptr()[n], u.get_imag_ptr()[n])); - fft_in.get_real_ptr()[n] = u.get_real_ptr()[n]; - fft_in.get_imag_ptr()[n] = u.get_imag_ptr()[n]; - } - - // Perform FFT - fft::transform(fft_in, false); - Fft::transform(vc, false); - - for (int n = 0; n < N; ++n) { - std::cout << fft_in.get_real_ptr()[n] << " " << vc[n].real() << std::endl; - } - - // Compute the DCT using the FFT output - for (size_t k = 0; k < N; ++k) { - double real = fft_in[k].real(); - double imag = fft_in[k].imag(); - dct[k] = - (real * cos(M_PI * k / (2 * N)) - imag * sin(-M_PI * k / (2 * N))) * 2; - } - return dct; -} - // Wrapper function to accept and return NumPy arrays py::array fdct(py::array_t in_arr) { @@ -104,6 +54,8 @@ fdct(py::array_t in_arr) { return py::array_t(result.size(), result.data()); } + + std::vector idct_inner(const std::vector &c) { size_t N = c.size(); std::vector x(N); @@ -154,76 +106,15 @@ ifdct(py::array_t in_arr) { // Convert input NumPy array to std::vector size_t N = buf.shape[0]; - std::vector input_data(static_cast(buf.ptr), - static_cast(buf.ptr) + N); - + // std::vector input_data(static_cast(buf.ptr), + // static_cast(buf.ptr) + N); + ComplexArray input_data(N); + std::copy(static_cast(buf.ptr), static_cast(buf.ptr) + N, + input_data.get_real_ptr()); + // Perform IDCT - std::vector result = idct_inner(input_data); - + std::vector result = ifdct_inner(input_data); + std::cout << "Result: " << result[0] << std::endl; // Convert result back to NumPy array return py::array_t(result.size(), result.data()); } - -static inline data_t dct_naive_single(data_t *buf, size_t buf_size, size_t i) { - data_t sum = 0.0f; - for (size_t j = 0; j < buf_size; j++) { - // sum += buf[j] * cos((2 * j + 1) * i * M_PI / (2 * buf_size)); - sum += buf[j] * cos(M_PI / buf_size * i * (j + 0.5)); - } - return sum; -} - -static inline data_t idct_naive_single(data_t *buf, size_t buf_size, size_t i) { - data_t sum = 0.0; - for (size_t j = 0; j < buf_size; j++) { - sum += buf[j] * cos(M_PI * i * (2 * j + 1.0) / (2 * buf_size)); - } - return sum; -} - -void dct_naive_inner(data_t *buf, size_t buf_size, data_t *out) { - for (int i = 0; i < buf_size; i++) { - out[i] = 2.0 * e(i) * dct_naive_single(buf, buf_size, i); - } -} - -void idct_naive_inner(data_t *buf, size_t buf_size, data_t *out) { - for (int i = 0; i < buf_size; i++) { - out[i] = (buf[0] + 2 * idct_naive_single(buf, buf_size, i)) / buf_size; - } -} - -double err(data_t *buf1, data_t *buf2, size_t buf_size) { - double sum = 0.0; - for (int i = 0; i < buf_size; i++) - sum += abs(buf1[i] - buf2[i]); - return sum; -} - -py::array dct_naive( - py::array_t in_arr) { - py::buffer_info buf_info = in_arr.request(); - double *ptr = static_cast(buf_info.ptr); - size_t buf_size = buf_info.size; - - py::array_t out_arr(buf_info.shape); - - double *out_buf = static_cast(out_arr.request(true).ptr); - dct_naive_inner(ptr, buf_size, out_buf); - - return out_arr; -} - -py::array idct_naive( - py::array_t in_arr) { - py::buffer_info buf_info = in_arr.request(); - double *ptr = static_cast(buf_info.ptr); - size_t buf_size = buf_info.size; - - py::array_t out_arr(buf_info.shape); - - double *out_buf = static_cast(out_arr.request(true).ptr); - idct_naive_inner(ptr, buf_size, out_buf); - - return out_arr; -} diff --git a/src/fdct_inner.cpp b/src/fdct_inner.cpp new file mode 100644 index 0000000..02d007c --- /dev/null +++ b/src/fdct_inner.cpp @@ -0,0 +1,83 @@ + +#pragma once +#include +#include +#include +#include +#include + + +#include "fdct_inner.hpp" +#include "complex_array.hpp" +#include "fft.hpp" +// Function to compute the Fast Cosine Transform + +ComplexArray reorderInput(const ComplexArray &x) { + size_t N = x.size(); + ComplexArray u(N); + const double *x_real = x.get_real_ptr(); + const double *x_imag = x.get_imag_ptr(); + double *u_real = u.get_real_ptr(); + double *u_imag = u.get_imag_ptr(); + + for (size_t n = 0; n < N / 2; ++n) { + u_real[n] = x_real[2 * n]; + u_imag[n] = x_imag[2 * n]; + u_real[N - n - 1] = x_real[2 * n + 1]; + u_imag[N - n - 1] = x_imag[2 * n + 1]; + } + return u; +} + +std::vector fct_inner(ComplexArray &x) { + size_t N = x.size(); + std::vector dct(N); + auto u = reorderInput(x); + ComplexArray fft_in(N); + std::vector > vc; + // Fill FFT input array + for (size_t n = 0; n < N; ++n) { + vc.push_back(std::complex(u.get_real_ptr()[n], u.get_imag_ptr()[n])); + fft_in.get_real_ptr()[n] = u.get_real_ptr()[n]; + fft_in.get_imag_ptr()[n] = u.get_imag_ptr()[n]; + } + + // Perform FFT + fft::transform(fft_in, false); + + // Compute the DCT using the FFT output + for (size_t k = 0; k < N; ++k) { + double real = fft_in[k].real(); + double imag = fft_in[k].imag(); + dct[k] = + (real * cos(M_PI * k / (2 * N)) - imag * sin(-M_PI * k / (2 * N))) * 2; + } + return dct; +} + +std::vector ifdct_inner(ComplexArray &c) { + size_t N = c.size(); + std::vector x(N); + auto u = reorderInput(c); + ComplexArray fft_in(N); + std::vector > vc; + // Fill FFT input array + for (size_t n = 0; n < N; ++n) { + vc.push_back(std::complex(u.get_real_ptr()[n], u.get_imag_ptr()[n])); + fft_in.get_real_ptr()[n] = u.get_real_ptr()[n]; + fft_in.get_imag_ptr()[n] = u.get_imag_ptr()[n]; + } + + // Perform FFT + fft::transform(fft_in, true); + + // Compute the IDCT using the FFT output + for (size_t k = 0; k < N; ++k) { + double real = fft_in[k].real(); + double imag = fft_in[k].imag(); + x[k] = + (real * cos(M_PI * k / (2.0 * N)) + imag * sin(M_PI * k / (2.0 * N))) / + N; + } + return x; +} \ No newline at end of file diff --git a/src/fdct_inner.hpp b/src/fdct_inner.hpp new file mode 100644 index 0000000..76e84f6 --- /dev/null +++ b/src/fdct_inner.hpp @@ -0,0 +1,17 @@ + +#pragma once +#include +#include +#include +#include +#include + +#include "complex_array.hpp" + +// Function to compute the Fast Cosine Transform + +ComplexArray reorderInput(const ComplexArray &x); + +std::vector fct_inner(ComplexArray &x); + +std::vector ifdct_inner(ComplexArray &c); diff --git a/src/fdct_module.cpp b/src/fdct_module.cpp index 9a0135a..38c5d1e 100644 --- a/src/fdct_module.cpp +++ b/src/fdct_module.cpp @@ -2,6 +2,7 @@ #include #include #include "fdct.hpp" +#include "dct_naive.hpp" namespace py = pybind11; #define _USE_MATH_DEFINES @@ -10,6 +11,6 @@ namespace py = pybind11; PYBIND11_MODULE(fdct, m) { m.def("dct_naive", &dct_naive, "Compute fast discrete cosine transformation."); m.def("idct_naive", &idct_naive, "Compute inverse fast discrete cosine transformation."); - m.def("fdct", &fdct, ""); - m.def("ifdct", &ifdct, ""); + m.def("fdct", &fdct, "Compute fast discrete cosine transformation."); + m.def("ifdct", &ifdct, "Compute inverse fast discrete cosine transformation."); } diff --git a/src/fft.cpp b/src/fft.cpp index 40f0948..93bd45d 100644 --- a/src/fft.cpp +++ b/src/fft.cpp @@ -5,8 +5,9 @@ #include #include #include +#include -#include "complex.hpp" +#include "complex_array.hpp" #include "fft.hpp" static size_t reverseBits(size_t val, int width); @@ -14,21 +15,19 @@ static size_t reverseBits(size_t val, int width); void fft::transformRadix2(ComplexArray &x, bool inverse) { int levels = 0; size_t n = x.size(); - double *real = x.get_imag_ptr(); - double *imag = x.get_real_ptr(); - + double *real = x.get_real_ptr(); + double *imag = x.get_imag_ptr(); + for (size_t temp = n; temp > 1U; temp >>= 1) levels++; if (static_cast(1U) << levels != n) throw std::domain_error("Length is not a power of 2"); - double *real_expTable = new double[n / 2]; - double *imag_expTable = new double[n / 2]; + ComplexArray expTable(n / 2); for (size_t i = 0; i < n / 2; i++) { double angle = (inverse ? 2 : -2) * M_PI * i / n; - real_expTable[i] = cos(angle); - imag_expTable[i] = sin(angle); + expTable[i] = std::polar(1.0, angle); } for (size_t i = 0; i < n; i++) { @@ -39,6 +38,9 @@ void fft::transformRadix2(ComplexArray &x, bool inverse) { } } + double *real_expTable = expTable.get_real_ptr(); + double *imag_expTable = expTable.get_imag_ptr(); + for (size_t size = 2; size <= n; size *= 2) { size_t halfsize = size / 2; size_t tablestep = n / size; @@ -61,8 +63,8 @@ void fft::transformRadix2(ComplexArray &x, bool inverse) { void fft::transformBluestein(ComplexArray &x, bool inverse) { size_t n = x.size(); - double *real = x.get_imag_ptr(); - double *imag = x.get_real_ptr(); + double *real = x.get_real_ptr(); + double *imag = x.get_imag_ptr(); size_t m = 1; while (m / 2 <= n) { if (m > SIZE_MAX / 2) @@ -71,32 +73,39 @@ void fft::transformBluestein(ComplexArray &x, bool inverse) { } // Trigonometric table - double *real_expTable = new double[n]; - double *imag_expTable = new double[n]; + ComplexArray expTable(m); + double *real_expTable = expTable.get_real_ptr(); + double *imag_expTable = expTable.get_imag_ptr(); for (size_t i = 0; i < n; i++) { uintmax_t temp = static_cast(i) * i; temp %= static_cast(n) * 2; double angle = (inverse ? M_PI : -M_PI) * temp / n; - real_expTable[i] = cos(angle); - imag_expTable[i] = sin(angle); + expTable[i] = std::polar(1.0, angle); } + ComplexArray a(m), b(m); + double *real_avec = a.get_real_ptr(); + double *imag_avec = a.get_imag_ptr(); - double *real_avec = new double[m]; - double *imag_avec = new double[m]; for (size_t i = 0; i < n; i++) { real_avec[i] = real[i] * real_expTable[i] - imag[i] * imag_expTable[i]; imag_avec[i] = real[i] * imag_expTable[i] + imag[i] * real_expTable[i]; } - double *real_bvec = new double[m]; - double *imag_bvec = new double[m]; + + // a = x * expTable; + + double *real_bvec = b.get_real_ptr(); + double *imag_bvec = b.get_imag_ptr(); real_bvec[0] = real_expTable[0]; imag_bvec[0] = imag_expTable[0]; + for (size_t i = 1; i < n; ++i) { real_bvec[i] = real_bvec[m - i] = real_expTable[i]; - imag_bvec[i] = imag_bvec[m - i] = imag_expTable[i]; + imag_bvec[i] = imag_bvec[m - i] = -imag_expTable[i]; } + + ComplexArray c = convolove(a, b); - ComplexArray c = convolove(, y); + x = c * expTable; } ComplexArray fft::convolove(ComplexArray x, ComplexArray y) { diff --git a/src/fft.hpp b/src/fft.hpp index 8813af0..c4620fb 100644 --- a/src/fft.hpp +++ b/src/fft.hpp @@ -1,16 +1,15 @@ -#ifndef FFT_H -#define FFT_H +#pragma once -#include "complex.hpp" +#include "complex_array.hpp" namespace fft { - void transformRadix2(ComplexArray &x, bool inverse); +void transformRadix2(ComplexArray &x, bool inverse); - void transformBluestein(ComplexArray &x, bool inverse); +void transformBluestein(ComplexArray &x, bool inverse); - void transform(ComplexArray &x, bool inverse); +void transform(ComplexArray &x, bool inverse); - ComplexArray convolove(const ComplexArray &x, const ComplexArray &y); -} +// ComplexArray convolove(const ComplexArray &x, const ComplexArray &y); +ComplexArray convolove(ComplexArray x, ComplexArray y); +} // namespace fft -#endif // FFT_H From e29dcb4219a892f9075439957010e10896fed273 Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Wed, 11 Dec 2024 15:13:40 +0000 Subject: [PATCH 03/16] chore: new test --- test_dct.py | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/test_dct.py b/test_dct.py index 3ed77ee..98eeb4f 100644 --- a/test_dct.py +++ b/test_dct.py @@ -1,39 +1,36 @@ +import pytest import numpy as np import fdct import scipy.fft as fft -def test_dct(): - inp = np.random.rand(1024) +# Parameters +test_size_seq = [10, 64, 127, 256, 1023, 2048, 4097] +tolerance = 1e-6 # Tolerance for individual element comparison + +@pytest.mark.parametrize("size", test_size_seq) +def test_dct_naive(size): + inp = np.random.rand(size) gt = fft.dct(inp) ans = fdct.dct_naive(inp) - s = sum((gt - ans) ** 2) / 1024 - assert s < 0.1 + np.testing.assert_allclose(ans, gt, atol=tolerance, rtol=0) -def test_idct(): - inp = np.random.rand(1024) +@pytest.mark.parametrize("size", test_size_seq) +def test_idct_naive(size): + inp = np.random.rand(size) gt = fft.idct(inp) ans = fdct.idct_naive(inp) - - s = sum((gt - ans) ** 2) / 1024 - assert s < 0.1 - -def test_fdct(): - inp = np.random.rand(10) + np.testing.assert_allclose(ans, gt, atol=tolerance, rtol=0) + +@pytest.mark.parametrize("size", test_size_seq) +def test_fdct(size): + inp = np.random.rand(size) gt = fft.dct(inp) ans = fdct.fdct(inp) - s = sum((gt - ans) ** 2) / 10 - print(gt) - print(ans) - assert s < 0.1 + np.testing.assert_allclose(ans, gt, atol=tolerance, rtol=0) -def test_ifdct(): - inp = np.random.rand(10) +@pytest.mark.parametrize("size", test_size_seq) +def test_ifdct(size): + inp = np.random.rand(size) gt = fft.idct(inp) ans = fdct.ifdct(inp) - s = sum((gt - ans) ** 2) / 10 - assert s < 0.1 - -if __name__ == "__main__": - test_dct() - test_idct() - test_fdct() + np.testing.assert_allclose(ans, gt, atol=tolerance, rtol=0) From b6cec8444471741b868d06153bb2a060f478c25e Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Wed, 11 Dec 2024 15:17:28 +0000 Subject: [PATCH 04/16] fix: missing file --- CMakeLists.txt | 12 +++++++++++- src/fdct.hpp | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 846483e..bfa4c97 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,17 @@ include_directories(${PYTHON_INCLUDE_DIRS}) # Find Pybind11 find_package(pybind11 REQUIRED) -pybind11_add_module(fdct src/fdct.cpp) +find_package(PkgConfig REQUIRED) +pkg_search_module(FFTW REQUIRED fftw3 IMPORTED_TARGET) +include_directories(PkgConfig::FFTW) +link_libraries(PkgConfig::FFTW) + +pybind11_add_module(fdct + src/fdct_module.cpp + # src/FftComplex.cpp + src/fft.cpp + src/fdct_inner.cpp +) # Link the module with Python target_link_libraries(fdct PRIVATE ${PYTHON_LIBRARIES}) diff --git a/src/fdct.hpp b/src/fdct.hpp index afe7665..04ef415 100644 --- a/src/fdct.hpp +++ b/src/fdct.hpp @@ -11,7 +11,7 @@ #include #include -#include "FftComplex.hpp" +// #include "FftComplex.hpp" #include "complex_array.hpp" #include "fft.hpp" #include "fdct_inner.hpp" From e2ac5fc0a26115cbf71eda1ab9c1da60f473ebfa Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Wed, 11 Dec 2024 15:21:31 +0000 Subject: [PATCH 05/16] fix: CI dependency --- .github/workflows/cmake-single-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index 5012e2a..ca27d04 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - sudo apt install python3-pybind11 + sudo apt install python3-pybind11 libfftw3-dev python -m pip install --upgrade pip python -m pip install pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi From 86047847ca8a3fc856207c0716cc197ea0611e10 Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Wed, 11 Dec 2024 16:09:48 +0000 Subject: [PATCH 06/16] fix: naive idct --- src/dct_naive.hpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/dct_naive.hpp b/src/dct_naive.hpp index ffbae89..7764d41 100644 --- a/src/dct_naive.hpp +++ b/src/dct_naive.hpp @@ -12,12 +12,9 @@ namespace py = pybind11; constexpr double SQRT2_1 = 0.7071067811865475; constexpr double ONE_SUB_SQRT2_1 = 1 - 0.7071067811865475; -static inline double e(int n) { return 1.0f; } - static inline double dct_naive_single(double *buf, size_t buf_size, size_t i) { double sum = 0.0f; for (size_t j = 0; j < buf_size; j++) { - // sum += buf[j] * cos((2 * j + 1) * i * M_PI / (2 * buf_size)); sum += buf[j] * cos(M_PI / buf_size * i * (j + 0.5)); } return sum; @@ -25,21 +22,21 @@ static inline double dct_naive_single(double *buf, size_t buf_size, size_t i) { static inline double idct_naive_single(double *buf, size_t buf_size, size_t i) { double sum = 0.0; - for (size_t j = 0; j < buf_size; j++) { - sum += buf[j] * cos(M_PI * i * (2 * j + 1.0) / (2 * buf_size)); + for (size_t j = 1; j < buf_size; j++) { + sum += buf[j] * cos(M_PI * j * (2 * i + 1.0) / (2.0 * buf_size)); } return sum; } void dct_naive_inner(double *buf, size_t buf_size, double *out) { - for (int i = 0; i < buf_size; i++) { - out[i] = 2.0 * e(i) * dct_naive_single(buf, buf_size, i); + for (size_t i = 0; i < buf_size; i++) { + out[i] = 2.0 * dct_naive_single(buf, buf_size, i); } } void idct_naive_inner(double *buf, size_t buf_size, double *out) { - for (int i = 0; i < buf_size; i++) { - out[i] = (buf[0] + 2 * idct_naive_single(buf, buf_size, i)) / buf_size; + for (size_t i = 0; i < buf_size; i++) { + out[i] = (buf[0] + (2 * idct_naive_single(buf, buf_size, i))) / (2.0 * buf_size); } } From 039c4140747bf5c06ddffdf3325d96a6775a5514 Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Thu, 12 Dec 2024 08:00:14 +0000 Subject: [PATCH 07/16] chore: new gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 9bf3746..374071e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,6 @@ compile_commands.json +build/ +.vscode/ +.cache/ +*.so + From 269f5cf5b2a0704c0611a29c11218b0376cb7d2c Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Thu, 12 Dec 2024 08:01:48 +0000 Subject: [PATCH 08/16] fix: correct fdct --- CMakeLists.txt | 3 +- src/fdct.hpp | 21 +--- src/fdct_inner.cpp | 223 ++++++++++++++++++++++++++----------- src/fft.cpp | 7 +- src/ref/FastDctFft.hpp | 36 ++++++ src/ref/FastDctFtt.cpp | 75 +++++++++++++ src/ref/FftComplex.cpp | 149 +++++++++++++++++++++++++ src/ref/FftComplex.hpp | 61 ++++++++++ src/ref/FftComplexTest.cpp | 177 +++++++++++++++++++++++++++++ src/ref/FftRealPair.cpp | 195 ++++++++++++++++++++++++++++++++ src/ref/FftRealPair.hpp | 75 +++++++++++++ src/ref/README.org | 5 + 12 files changed, 940 insertions(+), 87 deletions(-) create mode 100644 src/ref/FastDctFft.hpp create mode 100644 src/ref/FastDctFtt.cpp create mode 100644 src/ref/FftComplex.cpp create mode 100644 src/ref/FftComplex.hpp create mode 100644 src/ref/FftComplexTest.cpp create mode 100644 src/ref/FftRealPair.cpp create mode 100644 src/ref/FftRealPair.hpp create mode 100644 src/ref/README.org diff --git a/CMakeLists.txt b/CMakeLists.txt index bfa4c97..4afb28a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,8 +14,7 @@ include_directories(PkgConfig::FFTW) link_libraries(PkgConfig::FFTW) pybind11_add_module(fdct - src/fdct_module.cpp - # src/FftComplex.cpp + src/fdct_module.cpp src/fft.cpp src/fdct_inner.cpp ) diff --git a/src/fdct.hpp b/src/fdct.hpp index 04ef415..ee08687 100644 --- a/src/fdct.hpp +++ b/src/fdct.hpp @@ -20,18 +20,6 @@ namespace py = pybind11; #define _USE_MATH_DEFINES - -std::vector reorderOutput(const std::vector &x) { - size_t N = x.size(); - std::vector reordered(N); - - for (size_t n = 0; n < N / 2; ++n) { - reordered[2 * n] = x[n]; - reordered[2 * n + 1] = x[N - n - 1]; - } - return reordered; -} - // Wrapper function to accept and return NumPy arrays py::array fdct(py::array_t in_arr) { @@ -54,13 +42,11 @@ fdct(py::array_t in_arr) { return py::array_t(result.size(), result.data()); } - - std::vector idct_inner(const std::vector &c) { size_t N = c.size(); std::vector x(N); - auto u = reorderOutput(c); - + // auto u = reorderOutput(c); + ComplexArray u(N); // Allocate FFTW input/output arrays fftw_complex *fft_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N); fftw_complex *fft_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N); @@ -71,7 +57,7 @@ std::vector idct_inner(const std::vector &c) { // Fill FFTW input array for (size_t n = 0; n < N; ++n) { - fft_in[n][0] = u[n]; // Real part + fft_in[n][0] = c[n]; // Real part fft_in[n][1] = 0.0; // Imaginary part } @@ -114,7 +100,6 @@ ifdct(py::array_t in_arr) { // Perform IDCT std::vector result = ifdct_inner(input_data); - std::cout << "Result: " << result[0] << std::endl; // Convert result back to NumPy array return py::array_t(result.size(), result.data()); } diff --git a/src/fdct_inner.cpp b/src/fdct_inner.cpp index 02d007c..bc609dd 100644 --- a/src/fdct_inner.cpp +++ b/src/fdct_inner.cpp @@ -1,83 +1,176 @@ #pragma once -#include +#include "fdct_inner.hpp" + +#include + #include #include #include +#include #include - -#include "fdct_inner.hpp" #include "complex_array.hpp" #include "fft.hpp" + // Function to compute the Fast Cosine Transform ComplexArray reorderInput(const ComplexArray &x) { - size_t N = x.size(); - ComplexArray u(N); - const double *x_real = x.get_real_ptr(); - const double *x_imag = x.get_imag_ptr(); - double *u_real = u.get_real_ptr(); - double *u_imag = u.get_imag_ptr(); - - for (size_t n = 0; n < N / 2; ++n) { - u_real[n] = x_real[2 * n]; - u_imag[n] = x_imag[2 * n]; - u_real[N - n - 1] = x_real[2 * n + 1]; - u_imag[N - n - 1] = x_imag[2 * n + 1]; - } - return u; + size_t N = x.size(); + ComplexArray u(N); + const double *x_real = x.get_real_ptr(); + const double *x_imag = x.get_imag_ptr(); + double *u_real = u.get_real_ptr(); + double *u_imag = u.get_imag_ptr(); + + for (size_t n = 0; n < N / 2; ++n) { + u_real[n] = x_real[2 * n]; + u_imag[n] = x_imag[2 * n]; + u_real[N - n - 1] = x_real[2 * n + 1]; + u_imag[N - n - 1] = x_imag[2 * n + 1]; + } + if (N % 2 != 0) { + u_real[N / 2] = x_real[N - 1]; + u_imag[N / 2] = x_imag[N - 1]; + } + return u; +} + +ComplexArray reorderOutput(const ComplexArray &v) { + size_t N = v.size(); + ComplexArray x(N); + + // Access the real and imaginary parts directly + const double *v_real = v.get_real_ptr(); + const double *v_imag = v.get_imag_ptr(); + double *x_real = x.get_real_ptr(); + double *x_imag = x.get_imag_ptr(); + + // Reordering according to the formula + for (size_t n = 0; n < N / 2; ++n) { + x_real[2 * n] = v_real[n]; + x_imag[2 * n] = v_imag[n]; + x_real[2 * n + 1] = v_real[N - n - 1]; + x_imag[2 * n + 1] = v_imag[N - n - 1]; + } + if (N % 2 != 0) { + x_real[N - 1] = v_real[N / 2]; + x_imag[N - 1] = v_imag[N / 2]; + } + + return x; +} + +ComplexArray idctPreprocess(const ComplexArray &x) { + size_t N = x.size(); + ComplexArray u(N); + ComplexArray v(N); + v = x; + v.get_real_ptr()[0] /= 2; + // for (size_t n = 0; n < N; ++n) { + // // Compute the complex term + // Complex W = std::polar(1.0, -M_PI * n / (2 * N)); + + // // Compute u[n] + // u[n] = 0.5 * (x[n] - x[N - n - 1] * Complex(0, 1)) * W; + // } + + for (size_t n = 0; n < N; ++n) { + double tmp = n * M_PI / (2 * N); + u.get_real_ptr()[n] = v[n].real() * cos(tmp); + u.get_imag_ptr()[n] = -v[n].real() * sin(tmp); + } + return u; } + std::vector fct_inner(ComplexArray &x) { - size_t N = x.size(); - std::vector dct(N); - auto u = reorderInput(x); - ComplexArray fft_in(N); - std::vector > vc; - // Fill FFT input array - for (size_t n = 0; n < N; ++n) { - vc.push_back(std::complex(u.get_real_ptr()[n], u.get_imag_ptr()[n])); - fft_in.get_real_ptr()[n] = u.get_real_ptr()[n]; - fft_in.get_imag_ptr()[n] = u.get_imag_ptr()[n]; - } - - // Perform FFT - fft::transform(fft_in, false); - - // Compute the DCT using the FFT output - for (size_t k = 0; k < N; ++k) { - double real = fft_in[k].real(); - double imag = fft_in[k].imag(); - dct[k] = - (real * cos(M_PI * k / (2 * N)) - imag * sin(-M_PI * k / (2 * N))) * 2; - } - return dct; + size_t N = x.size(); + std::vector dct(N); + auto u = reorderInput(x); + ComplexArray fft_in(N); + std::vector > vc; + // Fill FFT input array + for (size_t n = 0; n < N; ++n) { + vc.push_back( + std::complex(u.get_real_ptr()[n], u.get_imag_ptr()[n])); + fft_in.get_real_ptr()[n] = u.get_real_ptr()[n]; + fft_in.get_imag_ptr()[n] = u.get_imag_ptr()[n]; + } + + // use fftw3 + // fftw_complex *in, *out; + // fftw_plan p; + // in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); + // out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); + // for (size_t i = 0; i < N; i++) { + // in[i][0] = fft_in.get_real_ptr()[i]; + // in[i][1] = fft_in.get_imag_ptr()[i]; + // } + // p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE); + // fftw_execute(p); + // for (size_t i = 0; i < N; i++) { + // fft_in.get_real_ptr()[i] = out[i][0]; + // fft_in.get_imag_ptr()[i] = out[i][1]; + // } + // fftw_destroy_plan(p); + // fftw_free(in); + // fftw_free(out); + + // Perform FFT + fft::transform(fft_in, false); + // std::vector > fft_in_p; + // for (size_t n = 0; n < N; ++n) { + // fft_in_p.push_back(std::complex(fft_in.get_real_ptr()[n], + // fft_in.get_imag_ptr()[n])); + // } + // Fft::transform(fft_in_p, false); + // for (size_t n = 0; n < N; ++n) { + // fft_in.get_real_ptr()[n] = fft_in_p[n].real(); + // fft_in.get_imag_ptr()[n] = fft_in_p[n].imag(); + // } + + // Compute the DCT using the FFT output + for (size_t k = 0; k < N; ++k) { + double real = fft_in[k].real(); + double imag = fft_in[k].imag(); + dct[k] = + (real * cos(M_PI * k / (2 * N)) - imag * sin(-M_PI * k / (2 * N))) * + 2; + } + return dct; } std::vector ifdct_inner(ComplexArray &c) { - size_t N = c.size(); - std::vector x(N); - auto u = reorderInput(c); - ComplexArray fft_in(N); - std::vector > vc; - // Fill FFT input array - for (size_t n = 0; n < N; ++n) { - vc.push_back(std::complex(u.get_real_ptr()[n], u.get_imag_ptr()[n])); - fft_in.get_real_ptr()[n] = u.get_real_ptr()[n]; - fft_in.get_imag_ptr()[n] = u.get_imag_ptr()[n]; - } - - // Perform FFT - fft::transform(fft_in, true); - - // Compute the IDCT using the FFT output - for (size_t k = 0; k < N; ++k) { - double real = fft_in[k].real(); - double imag = fft_in[k].imag(); - x[k] = - (real * cos(M_PI * k / (2.0 * N)) + imag * sin(M_PI * k / (2.0 * N))) / - N; - } - return x; -} \ No newline at end of file + size_t N = c.size(); + std::vector x(N); + // auto u = reorderInput(c); + auto u = idctPreprocess(c); + ComplexArray fft_in(N); + std::vector > vc; + // Fill FFT input array + for (size_t n = 0; n < N; ++n) { + vc.push_back( + std::complex(u.get_real_ptr()[n], u.get_imag_ptr()[n])); + fft_in.get_real_ptr()[n] = u.get_real_ptr()[n]; + fft_in.get_imag_ptr()[n] = u.get_imag_ptr()[n]; + } + + // Perform FFT + fft::transform(fft_in, false); + + auto x_ = reorderOutput(fft_in); + for (size_t n = 0; n < N; ++n) { + x[n] = x_.get_real_ptr()[n] / N; + } + + // Compute the IDCT using the FFT output + // for (size_t k = 0; k < N; ++k) { + // double real = fft_in[k].real(); + // double imag = fft_in[k].imag(); + // x[k] = (real * cos(M_PI * k / (2.0 * N)) + + // imag * sin(M_PI * k / (2.0 * N))) / + // N; + // } + return x; +} diff --git a/src/fft.cpp b/src/fft.cpp index 93bd45d..692d3f3 100644 --- a/src/fft.cpp +++ b/src/fft.cpp @@ -73,7 +73,7 @@ void fft::transformBluestein(ComplexArray &x, bool inverse) { } // Trigonometric table - ComplexArray expTable(m); + ComplexArray expTable(n); double *real_expTable = expTable.get_real_ptr(); double *imag_expTable = expTable.get_imag_ptr(); for (size_t i = 0; i < n; i++) { @@ -105,7 +105,10 @@ void fft::transformBluestein(ComplexArray &x, bool inverse) { ComplexArray c = convolove(a, b); - x = c * expTable; + for (size_t i = 0; i < n; i++) { + real[i] = c.get_real_ptr()[i] * real_expTable[i] - c.get_imag_ptr()[i] * imag_expTable[i]; + imag[i] = c.get_real_ptr()[i] * imag_expTable[i] + c.get_imag_ptr()[i] * real_expTable[i]; + } } ComplexArray fft::convolove(ComplexArray x, ComplexArray y) { diff --git a/src/ref/FastDctFft.hpp b/src/ref/FastDctFft.hpp new file mode 100644 index 0000000..624234d --- /dev/null +++ b/src/ref/FastDctFft.hpp @@ -0,0 +1,36 @@ +/* + * Fast discrete cosine transform algorithms (C++) + * + * Copyright (c) 2017 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include + + +namespace FastDctFft { + + void transform(std::vector &vec); + + void inverseTransform(std::vector &vec); + +} + diff --git a/src/ref/FastDctFtt.cpp b/src/ref/FastDctFtt.cpp new file mode 100644 index 0000000..5acfa96 --- /dev/null +++ b/src/ref/FastDctFtt.cpp @@ -0,0 +1,75 @@ +/* + * Fast discrete cosine transform algorithms (C++) + * + * Copyright (c) 2017 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include "FastDctFft.hpp" +#include "FftRealPair.hpp" + +using std::size_t; +using std::vector; + + +// DCT type II, unscaled +void FastDctFft::transform(vector &vec) { + size_t len = vec.size(); + size_t halfLen = len / 2; + vector real(len); + for (size_t i = 0; i < halfLen; i++) { + real.at(i) = vec.at(i * 2); + real.at(len - 1 - i) = vec.at(i * 2 + 1); + } + if (len % 2 == 1) + real.at(halfLen) = vec.at(len - 1); + std::fill(vec.begin(), vec.end(), 0.0); + Fft::transform(real, vec); + for (size_t i = 0; i < len; i++) { + double temp = i * M_PI / (len * 2); + vec.at(i) = real.at(i) * std::cos(temp) + vec.at(i) * std::sin(temp); + } +} + + +// DCT type III, unscaled +void FastDctFft::inverseTransform(vector &vec) { + size_t len = vec.size(); + if (len > 0) + vec.at(0) /= 2; + vector real(len); + for (size_t i = 0; i < len; i++) { + double temp = i * M_PI / (len * 2); + real.at(i) = vec.at(i) * std::cos(temp); + vec.at(i) *= -std::sin(temp); + } + Fft::transform(real, vec); + + size_t halfLen = len / 2; + for (size_t i = 0; i < halfLen; i++) { + vec.at(i * 2 + 0) = real.at(i); + vec.at(i * 2 + 1) = real.at(len - 1 - i); + } + if (len % 2 == 1) + vec.at(len - 1) = real.at(halfLen); +} + diff --git a/src/ref/FftComplex.cpp b/src/ref/FftComplex.cpp new file mode 100644 index 0000000..4f842a0 --- /dev/null +++ b/src/ref/FftComplex.cpp @@ -0,0 +1,149 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include "FftComplex.hpp" + +using std::complex; +using std::size_t; +using std::uintmax_t; +using std::vector; + + +// Private function prototypes +static size_t reverseBits(size_t val, int width); + + +void Fft::transform(vector > &vec, bool inverse) { + size_t n = vec.size(); + if (n == 0) + return; + else if ((n & (n - 1)) == 0) // Is power of 2 + transformRadix2(vec, inverse); + else // More complicated algorithm for arbitrary sizes + transformBluestein(vec, inverse); +} + + +void Fft::transformRadix2(vector > &vec, bool inverse) { + // Length variables + size_t n = vec.size(); + int levels = 0; // Compute levels = floor(log2(n)) + for (size_t temp = n; temp > 1U; temp >>= 1) + levels++; + if (static_cast(1U) << levels != n) + throw std::domain_error("Length is not a power of 2"); + + // Trigonometric table + vector > expTable(n / 2); + for (size_t i = 0; i < n / 2; i++) + expTable[i] = std::polar(1.0, (inverse ? 2 : -2) * M_PI * i / n); + + // Bit-reversed addressing permutation + for (size_t i = 0; i < n; i++) { + size_t j = reverseBits(i, levels); + if (j > i) + std::swap(vec[i], vec[j]); + } + + // Cooley-Tukey decimation-in-time radix-2 FFT + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + complex temp = vec[j + halfsize] * expTable[k]; + vec[j + halfsize] = vec[j] - temp; + vec[j] += temp; + } + } + if (size == n) // Prevent overflow in 'size *= 2' + break; + } +} + + +void Fft::transformBluestein(vector > &vec, bool inverse) { + // Find a power-of-2 convolution length m such that m >= n * 2 + 1 + size_t n = vec.size(); + size_t m = 1; + while (m / 2 <= n) { + if (m > SIZE_MAX / 2) + throw std::length_error("Vector too large"); + m *= 2; + } + + // Trigonometric table + vector > expTable(n); + for (size_t i = 0; i < n; i++) { + uintmax_t temp = static_cast(i) * i; + temp %= static_cast(n) * 2; + double angle = (inverse ? M_PI : -M_PI) * temp / n; + expTable[i] = std::polar(1.0, angle); + } + + // Temporary vectors and preprocessing + vector > avec(m); + for (size_t i = 0; i < n; i++) + avec[i] = vec[i] * expTable[i]; + vector > bvec(m); + bvec[0] = expTable[0]; + for (size_t i = 1; i < n; i++) + bvec[i] = bvec[m - i] = std::conj(expTable[i]); + + // Convolution + vector > cvec = convolve(std::move(avec), std::move(bvec)); + + // Postprocessing + for (size_t i = 0; i < n; i++) + vec[i] = cvec[i] * expTable[i]; +} + + +vector > Fft::convolve( + vector > xvec, + vector > yvec) { + + size_t n = xvec.size(); + if (n != yvec.size()) + throw std::domain_error("Mismatched lengths"); + transform(xvec, false); + transform(yvec, false); + for (size_t i = 0; i < n; i++) + xvec[i] *= yvec[i]; + transform(xvec, true); + for (size_t i = 0; i < n; i++) // Scaling (because this FFT implementation omits it) + xvec[i] /= static_cast(n); + return xvec; +} + + +static size_t reverseBits(size_t val, int width) { + size_t result = 0; + for (int i = 0; i < width; i++, val >>= 1) + result = (result << 1) | (val & 1U); + return result; +} diff --git a/src/ref/FftComplex.hpp b/src/ref/FftComplex.hpp new file mode 100644 index 0000000..24eeabb --- /dev/null +++ b/src/ref/FftComplex.hpp @@ -0,0 +1,61 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include +#include +using Complex = std::complex; + +namespace Fft { + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. The inverse transform does not perform scaling, so it is not a true inverse. + */ + void transform(std::vector > &vec, bool inverse); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. + */ + void transformRadix2(std::vector > &vec, bool inverse); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. + * Uses Bluestein's chirp z-transform algorithm. + */ + void transformBluestein(std::vector > &vec, bool inverse); + + + /* + * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. + */ + std::vector > convolve( + std::vector > xvec, + std::vector > yvec); + +} diff --git a/src/ref/FftComplexTest.cpp b/src/ref/FftComplexTest.cpp new file mode 100644 index 0000000..dd960d1 --- /dev/null +++ b/src/ref/FftComplexTest.cpp @@ -0,0 +1,177 @@ +/* + * FFT and convolution test (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "FftComplex.hpp" + +using std::complex; +using std::cout; +using std::endl; +using std::vector; + + +// Private function prototypes +static void testFft(int n); +static void testConvolution(int n); +static vector > naiveDft(const vector > &input, bool inverse); +static vector > naiveConvolve(const vector > &xvec, const vector > &yvec); +static double log10RmsErr(const vector > &xvec, const vector > &yvec); +static vector > randomComplexes(int n); + +// Mutable global variable +static double maxLogError = -INFINITY; + +// Random number generation +std::default_random_engine randGen((std::random_device())()); + + +/*---- Main and test functions ----*/ + +int main() { + // Test power-of-2 size FFTs + for (int i = 0; i <= 12; i++) + testFft(1 << i); + + // Test small size FFTs + for (int i = 0; i < 30; i++) + testFft(i); + + // Test diverse size FFTs + for (int i = 0, prev = 0; i <= 100; i++) { + int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); + if (n > prev) { + testFft(n); + prev = n; + } + } + + // Test power-of-2 size convolutions + for (int i = 0; i <= 12; i++) + testConvolution(1 << i); + + // Test diverse size convolutions + for (int i = 0, prev = 0; i <= 100; i++) { + int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); + if (n > prev) { + testConvolution(n); + prev = n; + } + } + + cout << endl; + cout << "Max log err = " << std::setprecision(3) << maxLogError << endl; + cout << "Test " << (maxLogError < -10 ? "passed" : "failed") << endl; + return EXIT_SUCCESS; +} + + +static void testFft(int n) { + const vector > input = randomComplexes(n); + const vector > expect = naiveDft(input, false); + vector > actual = input; + Fft::transform(actual, false); + double err = log10RmsErr(expect, actual); + + for (auto it = actual.begin(); it != actual.end(); ++it) + *it /= n; + Fft::transform(actual, true); + err = std::max(log10RmsErr(input, actual), err); + cout << "fftsize=" << std::setw(4) << std::setfill(' ') << n << " " + << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) + << err << endl; +} + + +static void testConvolution(int n) { + const vector > input0 = randomComplexes(n); + const vector > input1 = randomComplexes(n); + const vector > expect = naiveConvolve(input0, input1); + const vector > actual = Fft::convolve(std::move(input0), std::move(input1)); + cout << "convsize=" << std::setw(4) << std::setfill(' ') << n << " " + << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) + << log10RmsErr(expect, actual) << endl; +} + + +/*---- Naive reference computation functions ----*/ + +static vector > naiveDft(const vector > &input, bool inverse) { + int n = static_cast(input.size()); + vector > output; + double coef = (inverse ? 2 : -2) * M_PI / n; + for (int k = 0; k < n; k++) { // For each output element + complex sum(0); + for (int t = 0; t < n; t++) { // For each input element + double angle = coef * (static_cast(t) * k % n); + sum += input[t] * std::polar(1.0, angle); + } + output.push_back(sum); + } + return output; +} + + +static vector > naiveConvolve( + const vector > &xvec, const vector > &yvec) { + int n = static_cast(xvec.size()); + vector > result(n); // All zeros + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + int k = (i + j) % n; + result[k] += xvec[i] * yvec[j]; + } + } + return result; +} + + +/*---- Utility functions ----*/ + +static double log10RmsErr(const vector > &xvec, const vector > &yvec) { + int n = static_cast(xvec.size()); + double err = std::pow(10, -99 * 2); + for (int i = 0; i < n; i++) + err += std::norm(xvec.at(i) - yvec.at(i)); + err /= n > 0 ? n : 1; + err = std::sqrt(err); // Now this is a root mean square (RMS) error + err = std::log10(err); + maxLogError = std::max(err, maxLogError); + return err; +} + + +static vector > randomComplexes(int n) { + std::uniform_real_distribution valueDist(-1.0, 1.0); + vector > result; + for (int i = 0; i < n; i++) + result.push_back(complex(valueDist(randGen), valueDist(randGen))); + return result; +} diff --git a/src/ref/FftRealPair.cpp b/src/ref/FftRealPair.cpp new file mode 100644 index 0000000..9bb414f --- /dev/null +++ b/src/ref/FftRealPair.cpp @@ -0,0 +1,195 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include "FftRealPair.hpp" + +using std::size_t; +using std::uintmax_t; +using std::vector; + + +// Private function prototypes +static size_t reverseBits(size_t val, int width); + + +void Fft::transform(vector &real, vector &imag) { + size_t n = real.size(); + if (n != imag.size()) + throw std::invalid_argument("Mismatched lengths"); + if (n == 0) + return; + else if ((n & (n - 1)) == 0) // Is power of 2 + transformRadix2(real, imag); + else // More complicated algorithm for arbitrary sizes + transformBluestein(real, imag); +} + + +void Fft::inverseTransform(vector &real, vector &imag) { + transform(imag, real); +} + + +void Fft::transformRadix2(vector &real, vector &imag) { + // Length variables + size_t n = real.size(); + if (n != imag.size()) + throw std::invalid_argument("Mismatched lengths"); + int levels = 0; // Compute levels = floor(log2(n)) + for (size_t temp = n; temp > 1U; temp >>= 1) + levels++; + if (static_cast(1U) << levels != n) + throw std::domain_error("Length is not a power of 2"); + + // Trigonometric tables + vector cosTable(n / 2); + vector sinTable(n / 2); + for (size_t i = 0; i < n / 2; i++) { + cosTable[i] = std::cos(2 * M_PI * i / n); + sinTable[i] = std::sin(2 * M_PI * i / n); + } + + // Bit-reversed addressing permutation + for (size_t i = 0; i < n; i++) { + size_t j = reverseBits(i, levels); + if (j > i) { + std::swap(real[i], real[j]); + std::swap(imag[i], imag[j]); + } + } + + // Cooley-Tukey decimation-in-time radix-2 FFT + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + size_t l = j + halfsize; + double tpre = real[l] * cosTable[k] + imag[l] * sinTable[k]; + double tpim = -real[l] * sinTable[k] + imag[l] * cosTable[k]; + real[l] = real[j] - tpre; + imag[l] = imag[j] - tpim; + real[j] += tpre; + imag[j] += tpim; + } + } + if (size == n) // Prevent overflow in 'size *= 2' + break; + } +} + + +void Fft::transformBluestein(vector &real, vector &imag) { + // Find a power-of-2 convolution length m such that m >= n * 2 + 1 + size_t n = real.size(); + if (n != imag.size()) + throw std::invalid_argument("Mismatched lengths"); + size_t m = 1; + while (m / 2 <= n) { + if (m > SIZE_MAX / 2) + throw std::length_error("Vector too large"); + m *= 2; + } + + // Trigonometric tables + vector cosTable(n), sinTable(n); + for (size_t i = 0; i < n; i++) { + uintmax_t temp = static_cast(i) * i; + temp %= static_cast(n) * 2; + double angle = M_PI * temp / n; + cosTable[i] = std::cos(angle); + sinTable[i] = std::sin(angle); + } + + // Temporary vectors and preprocessing + vector areal(m), aimag(m); + for (size_t i = 0; i < n; i++) { + areal[i] = real[i] * cosTable[i] + imag[i] * sinTable[i]; + aimag[i] = -real[i] * sinTable[i] + imag[i] * cosTable[i]; + } + vector breal(m), bimag(m); + breal[0] = cosTable[0]; + bimag[0] = sinTable[0]; + for (size_t i = 1; i < n; i++) { + breal[i] = breal[m - i] = cosTable[i]; + bimag[i] = bimag[m - i] = sinTable[i]; + } + + // Convolution + std::pair, vector > cvec = convolve(areal, aimag, breal, bimag); + vector creal = std::move(cvec.first ); + vector cimag = std::move(cvec.second); + + // Postprocessing + for (size_t i = 0; i < n; i++) { + real[i] = creal[i] * cosTable[i] + cimag[i] * sinTable[i]; + imag[i] = -creal[i] * sinTable[i] + cimag[i] * cosTable[i]; + } +} + + +vector Fft::convolve(vector xvec, vector yvec) { + size_t n = xvec.size(); + if (n != yvec.size()) + throw std::invalid_argument("Mismatched lengths"); + return convolve(std::move(xvec), vector(n), std::move(yvec), vector(n)).first; +} + + +std::pair, vector > Fft::convolve( + vector xreal, vector ximag, + vector yreal, vector yimag) { + + size_t n = xreal.size(); + if (n != ximag.size() || n != yreal.size() || n != yimag.size()) + throw std::invalid_argument("Mismatched lengths"); + + transform(xreal, ximag); + transform(yreal, yimag); + + for (size_t i = 0; i < n; i++) { + double temp = xreal[i] * yreal[i] - ximag[i] * yimag[i]; + ximag[i] = ximag[i] * yreal[i] + xreal[i] * yimag[i]; + xreal[i] = temp; + } + inverseTransform(xreal, ximag); + + for (size_t i = 0; i < n; i++) { // Scaling (because this FFT implementation omits it) + xreal[i] /= n; + ximag[i] /= n; + } + return std::pair, vector >(std::move(xreal), std::move(ximag)); +} + + +static size_t reverseBits(size_t val, int width) { + size_t result = 0; + for (int i = 0; i < width; i++, val >>= 1) + result = (result << 1) | (val & 1U); + return result; +} + diff --git a/src/ref/FftRealPair.hpp b/src/ref/FftRealPair.hpp new file mode 100644 index 0000000..95db10a --- /dev/null +++ b/src/ref/FftRealPair.hpp @@ -0,0 +1,75 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include +#include + + +namespace Fft { + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. + */ + void transform(std::vector &real, std::vector &imag); + + + /* + * Computes the inverse discrete Fourier transform (IDFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. This transform does not perform scaling, so the inverse is not a true inverse. + */ + void inverseTransform(std::vector &real, std::vector &imag); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. + */ + void transformRadix2(std::vector &real, std::vector &imag); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. + * Uses Bluestein's chirp z-transform algorithm. + */ + void transformBluestein(std::vector &real, std::vector &imag); + + + /* + * Computes the circular convolution of the given real vectors. Each vector's length must be the same. + */ + std::vector convolve(std::vector xvec, std::vector yvec); + + + /* + * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. + */ + std::pair, std::vector > convolve( + std::vector xreal, std::vector ximag, + std::vector yreal, std::vector yimag); + +} + diff --git a/src/ref/README.org b/src/ref/README.org new file mode 100644 index 0000000..74fd6b9 --- /dev/null +++ b/src/ref/README.org @@ -0,0 +1,5 @@ +* Reference Implementation + +This reference implementation is provided by Nayuki. I greatly appreciate their contribution. + +For more details, please visit Nayuki's [[Website][https://www.nayuki.io/]] From 2d71e4a036a5966c32d74b2c93b4d1423fde18b9 Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Thu, 12 Dec 2024 08:02:52 +0000 Subject: [PATCH 09/16] chore: better test --- test_dct.py | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/test_dct.py b/test_dct.py index 98eeb4f..40a233f 100644 --- a/test_dct.py +++ b/test_dct.py @@ -2,35 +2,24 @@ import numpy as np import fdct import scipy.fft as fft +import time # Parameters -test_size_seq = [10, 64, 127, 256, 1023, 2048, 4097] +test_size_seq = [10, 64, 127, 256, 1023, 2048, 4097, 8192, 8193] tolerance = 1e-6 # Tolerance for individual element comparison -@pytest.mark.parametrize("size", test_size_seq) -def test_dct_naive(size): - inp = np.random.rand(size) - gt = fft.dct(inp) - ans = fdct.dct_naive(inp) - np.testing.assert_allclose(ans, gt, atol=tolerance, rtol=0) - -@pytest.mark.parametrize("size", test_size_seq) -def test_idct_naive(size): - inp = np.random.rand(size) - gt = fft.idct(inp) - ans = fdct.idct_naive(inp) - np.testing.assert_allclose(ans, gt, atol=tolerance, rtol=0) - -@pytest.mark.parametrize("size", test_size_seq) -def test_fdct(size): - inp = np.random.rand(size) - gt = fft.dct(inp) - ans = fdct.fdct(inp) - np.testing.assert_allclose(ans, gt, atol=tolerance, rtol=0) +# List of test functions and their corresponding references +test_cases = [ + ("dct_naive", fdct.dct_naive, fft.dct), + ("idct_naive", fdct.idct_naive, fft.idct), + ("fdct", fdct.fdct, fft.dct), + ("ifdct", fdct.ifdct, fft.idct), +] @pytest.mark.parametrize("size", test_size_seq) -def test_ifdct(size): +@pytest.mark.parametrize("name, func, ref_func", test_cases) +def test_transform(name, func, ref_func, size): inp = np.random.rand(size) - gt = fft.idct(inp) - ans = fdct.ifdct(inp) + gt = ref_func(inp) + ans = func(inp) np.testing.assert_allclose(ans, gt, atol=tolerance, rtol=0) From 988c0d742f40d079baf844407a956661c4c060d8 Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Thu, 12 Dec 2024 16:03:05 +0000 Subject: [PATCH 10/16] feat: add reference impl --- ref/FastDctFft.hpp | 36 ++++++++ ref/FastDctFtt.cpp | 75 ++++++++++++++++ ref/FftComplex.cpp | 149 +++++++++++++++++++++++++++++++ ref/FftComplex.hpp | 61 +++++++++++++ ref/FftComplexTest.cpp | 177 +++++++++++++++++++++++++++++++++++++ ref/FftRealPair.cpp | 195 +++++++++++++++++++++++++++++++++++++++++ ref/FftRealPair.hpp | 75 ++++++++++++++++ ref/README.org | 5 ++ 8 files changed, 773 insertions(+) create mode 100644 ref/FastDctFft.hpp create mode 100644 ref/FastDctFtt.cpp create mode 100644 ref/FftComplex.cpp create mode 100644 ref/FftComplex.hpp create mode 100644 ref/FftComplexTest.cpp create mode 100644 ref/FftRealPair.cpp create mode 100644 ref/FftRealPair.hpp create mode 100644 ref/README.org diff --git a/ref/FastDctFft.hpp b/ref/FastDctFft.hpp new file mode 100644 index 0000000..624234d --- /dev/null +++ b/ref/FastDctFft.hpp @@ -0,0 +1,36 @@ +/* + * Fast discrete cosine transform algorithms (C++) + * + * Copyright (c) 2017 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include + + +namespace FastDctFft { + + void transform(std::vector &vec); + + void inverseTransform(std::vector &vec); + +} + diff --git a/ref/FastDctFtt.cpp b/ref/FastDctFtt.cpp new file mode 100644 index 0000000..5acfa96 --- /dev/null +++ b/ref/FastDctFtt.cpp @@ -0,0 +1,75 @@ +/* + * Fast discrete cosine transform algorithms (C++) + * + * Copyright (c) 2017 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include "FastDctFft.hpp" +#include "FftRealPair.hpp" + +using std::size_t; +using std::vector; + + +// DCT type II, unscaled +void FastDctFft::transform(vector &vec) { + size_t len = vec.size(); + size_t halfLen = len / 2; + vector real(len); + for (size_t i = 0; i < halfLen; i++) { + real.at(i) = vec.at(i * 2); + real.at(len - 1 - i) = vec.at(i * 2 + 1); + } + if (len % 2 == 1) + real.at(halfLen) = vec.at(len - 1); + std::fill(vec.begin(), vec.end(), 0.0); + Fft::transform(real, vec); + for (size_t i = 0; i < len; i++) { + double temp = i * M_PI / (len * 2); + vec.at(i) = real.at(i) * std::cos(temp) + vec.at(i) * std::sin(temp); + } +} + + +// DCT type III, unscaled +void FastDctFft::inverseTransform(vector &vec) { + size_t len = vec.size(); + if (len > 0) + vec.at(0) /= 2; + vector real(len); + for (size_t i = 0; i < len; i++) { + double temp = i * M_PI / (len * 2); + real.at(i) = vec.at(i) * std::cos(temp); + vec.at(i) *= -std::sin(temp); + } + Fft::transform(real, vec); + + size_t halfLen = len / 2; + for (size_t i = 0; i < halfLen; i++) { + vec.at(i * 2 + 0) = real.at(i); + vec.at(i * 2 + 1) = real.at(len - 1 - i); + } + if (len % 2 == 1) + vec.at(len - 1) = real.at(halfLen); +} + diff --git a/ref/FftComplex.cpp b/ref/FftComplex.cpp new file mode 100644 index 0000000..4f842a0 --- /dev/null +++ b/ref/FftComplex.cpp @@ -0,0 +1,149 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include "FftComplex.hpp" + +using std::complex; +using std::size_t; +using std::uintmax_t; +using std::vector; + + +// Private function prototypes +static size_t reverseBits(size_t val, int width); + + +void Fft::transform(vector > &vec, bool inverse) { + size_t n = vec.size(); + if (n == 0) + return; + else if ((n & (n - 1)) == 0) // Is power of 2 + transformRadix2(vec, inverse); + else // More complicated algorithm for arbitrary sizes + transformBluestein(vec, inverse); +} + + +void Fft::transformRadix2(vector > &vec, bool inverse) { + // Length variables + size_t n = vec.size(); + int levels = 0; // Compute levels = floor(log2(n)) + for (size_t temp = n; temp > 1U; temp >>= 1) + levels++; + if (static_cast(1U) << levels != n) + throw std::domain_error("Length is not a power of 2"); + + // Trigonometric table + vector > expTable(n / 2); + for (size_t i = 0; i < n / 2; i++) + expTable[i] = std::polar(1.0, (inverse ? 2 : -2) * M_PI * i / n); + + // Bit-reversed addressing permutation + for (size_t i = 0; i < n; i++) { + size_t j = reverseBits(i, levels); + if (j > i) + std::swap(vec[i], vec[j]); + } + + // Cooley-Tukey decimation-in-time radix-2 FFT + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + complex temp = vec[j + halfsize] * expTable[k]; + vec[j + halfsize] = vec[j] - temp; + vec[j] += temp; + } + } + if (size == n) // Prevent overflow in 'size *= 2' + break; + } +} + + +void Fft::transformBluestein(vector > &vec, bool inverse) { + // Find a power-of-2 convolution length m such that m >= n * 2 + 1 + size_t n = vec.size(); + size_t m = 1; + while (m / 2 <= n) { + if (m > SIZE_MAX / 2) + throw std::length_error("Vector too large"); + m *= 2; + } + + // Trigonometric table + vector > expTable(n); + for (size_t i = 0; i < n; i++) { + uintmax_t temp = static_cast(i) * i; + temp %= static_cast(n) * 2; + double angle = (inverse ? M_PI : -M_PI) * temp / n; + expTable[i] = std::polar(1.0, angle); + } + + // Temporary vectors and preprocessing + vector > avec(m); + for (size_t i = 0; i < n; i++) + avec[i] = vec[i] * expTable[i]; + vector > bvec(m); + bvec[0] = expTable[0]; + for (size_t i = 1; i < n; i++) + bvec[i] = bvec[m - i] = std::conj(expTable[i]); + + // Convolution + vector > cvec = convolve(std::move(avec), std::move(bvec)); + + // Postprocessing + for (size_t i = 0; i < n; i++) + vec[i] = cvec[i] * expTable[i]; +} + + +vector > Fft::convolve( + vector > xvec, + vector > yvec) { + + size_t n = xvec.size(); + if (n != yvec.size()) + throw std::domain_error("Mismatched lengths"); + transform(xvec, false); + transform(yvec, false); + for (size_t i = 0; i < n; i++) + xvec[i] *= yvec[i]; + transform(xvec, true); + for (size_t i = 0; i < n; i++) // Scaling (because this FFT implementation omits it) + xvec[i] /= static_cast(n); + return xvec; +} + + +static size_t reverseBits(size_t val, int width) { + size_t result = 0; + for (int i = 0; i < width; i++, val >>= 1) + result = (result << 1) | (val & 1U); + return result; +} diff --git a/ref/FftComplex.hpp b/ref/FftComplex.hpp new file mode 100644 index 0000000..24eeabb --- /dev/null +++ b/ref/FftComplex.hpp @@ -0,0 +1,61 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include +#include +using Complex = std::complex; + +namespace Fft { + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. The inverse transform does not perform scaling, so it is not a true inverse. + */ + void transform(std::vector > &vec, bool inverse); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. + */ + void transformRadix2(std::vector > &vec, bool inverse); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. + * Uses Bluestein's chirp z-transform algorithm. + */ + void transformBluestein(std::vector > &vec, bool inverse); + + + /* + * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. + */ + std::vector > convolve( + std::vector > xvec, + std::vector > yvec); + +} diff --git a/ref/FftComplexTest.cpp b/ref/FftComplexTest.cpp new file mode 100644 index 0000000..dd960d1 --- /dev/null +++ b/ref/FftComplexTest.cpp @@ -0,0 +1,177 @@ +/* + * FFT and convolution test (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "FftComplex.hpp" + +using std::complex; +using std::cout; +using std::endl; +using std::vector; + + +// Private function prototypes +static void testFft(int n); +static void testConvolution(int n); +static vector > naiveDft(const vector > &input, bool inverse); +static vector > naiveConvolve(const vector > &xvec, const vector > &yvec); +static double log10RmsErr(const vector > &xvec, const vector > &yvec); +static vector > randomComplexes(int n); + +// Mutable global variable +static double maxLogError = -INFINITY; + +// Random number generation +std::default_random_engine randGen((std::random_device())()); + + +/*---- Main and test functions ----*/ + +int main() { + // Test power-of-2 size FFTs + for (int i = 0; i <= 12; i++) + testFft(1 << i); + + // Test small size FFTs + for (int i = 0; i < 30; i++) + testFft(i); + + // Test diverse size FFTs + for (int i = 0, prev = 0; i <= 100; i++) { + int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); + if (n > prev) { + testFft(n); + prev = n; + } + } + + // Test power-of-2 size convolutions + for (int i = 0; i <= 12; i++) + testConvolution(1 << i); + + // Test diverse size convolutions + for (int i = 0, prev = 0; i <= 100; i++) { + int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); + if (n > prev) { + testConvolution(n); + prev = n; + } + } + + cout << endl; + cout << "Max log err = " << std::setprecision(3) << maxLogError << endl; + cout << "Test " << (maxLogError < -10 ? "passed" : "failed") << endl; + return EXIT_SUCCESS; +} + + +static void testFft(int n) { + const vector > input = randomComplexes(n); + const vector > expect = naiveDft(input, false); + vector > actual = input; + Fft::transform(actual, false); + double err = log10RmsErr(expect, actual); + + for (auto it = actual.begin(); it != actual.end(); ++it) + *it /= n; + Fft::transform(actual, true); + err = std::max(log10RmsErr(input, actual), err); + cout << "fftsize=" << std::setw(4) << std::setfill(' ') << n << " " + << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) + << err << endl; +} + + +static void testConvolution(int n) { + const vector > input0 = randomComplexes(n); + const vector > input1 = randomComplexes(n); + const vector > expect = naiveConvolve(input0, input1); + const vector > actual = Fft::convolve(std::move(input0), std::move(input1)); + cout << "convsize=" << std::setw(4) << std::setfill(' ') << n << " " + << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) + << log10RmsErr(expect, actual) << endl; +} + + +/*---- Naive reference computation functions ----*/ + +static vector > naiveDft(const vector > &input, bool inverse) { + int n = static_cast(input.size()); + vector > output; + double coef = (inverse ? 2 : -2) * M_PI / n; + for (int k = 0; k < n; k++) { // For each output element + complex sum(0); + for (int t = 0; t < n; t++) { // For each input element + double angle = coef * (static_cast(t) * k % n); + sum += input[t] * std::polar(1.0, angle); + } + output.push_back(sum); + } + return output; +} + + +static vector > naiveConvolve( + const vector > &xvec, const vector > &yvec) { + int n = static_cast(xvec.size()); + vector > result(n); // All zeros + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + int k = (i + j) % n; + result[k] += xvec[i] * yvec[j]; + } + } + return result; +} + + +/*---- Utility functions ----*/ + +static double log10RmsErr(const vector > &xvec, const vector > &yvec) { + int n = static_cast(xvec.size()); + double err = std::pow(10, -99 * 2); + for (int i = 0; i < n; i++) + err += std::norm(xvec.at(i) - yvec.at(i)); + err /= n > 0 ? n : 1; + err = std::sqrt(err); // Now this is a root mean square (RMS) error + err = std::log10(err); + maxLogError = std::max(err, maxLogError); + return err; +} + + +static vector > randomComplexes(int n) { + std::uniform_real_distribution valueDist(-1.0, 1.0); + vector > result; + for (int i = 0; i < n; i++) + result.push_back(complex(valueDist(randGen), valueDist(randGen))); + return result; +} diff --git a/ref/FftRealPair.cpp b/ref/FftRealPair.cpp new file mode 100644 index 0000000..9bb414f --- /dev/null +++ b/ref/FftRealPair.cpp @@ -0,0 +1,195 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include "FftRealPair.hpp" + +using std::size_t; +using std::uintmax_t; +using std::vector; + + +// Private function prototypes +static size_t reverseBits(size_t val, int width); + + +void Fft::transform(vector &real, vector &imag) { + size_t n = real.size(); + if (n != imag.size()) + throw std::invalid_argument("Mismatched lengths"); + if (n == 0) + return; + else if ((n & (n - 1)) == 0) // Is power of 2 + transformRadix2(real, imag); + else // More complicated algorithm for arbitrary sizes + transformBluestein(real, imag); +} + + +void Fft::inverseTransform(vector &real, vector &imag) { + transform(imag, real); +} + + +void Fft::transformRadix2(vector &real, vector &imag) { + // Length variables + size_t n = real.size(); + if (n != imag.size()) + throw std::invalid_argument("Mismatched lengths"); + int levels = 0; // Compute levels = floor(log2(n)) + for (size_t temp = n; temp > 1U; temp >>= 1) + levels++; + if (static_cast(1U) << levels != n) + throw std::domain_error("Length is not a power of 2"); + + // Trigonometric tables + vector cosTable(n / 2); + vector sinTable(n / 2); + for (size_t i = 0; i < n / 2; i++) { + cosTable[i] = std::cos(2 * M_PI * i / n); + sinTable[i] = std::sin(2 * M_PI * i / n); + } + + // Bit-reversed addressing permutation + for (size_t i = 0; i < n; i++) { + size_t j = reverseBits(i, levels); + if (j > i) { + std::swap(real[i], real[j]); + std::swap(imag[i], imag[j]); + } + } + + // Cooley-Tukey decimation-in-time radix-2 FFT + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + size_t l = j + halfsize; + double tpre = real[l] * cosTable[k] + imag[l] * sinTable[k]; + double tpim = -real[l] * sinTable[k] + imag[l] * cosTable[k]; + real[l] = real[j] - tpre; + imag[l] = imag[j] - tpim; + real[j] += tpre; + imag[j] += tpim; + } + } + if (size == n) // Prevent overflow in 'size *= 2' + break; + } +} + + +void Fft::transformBluestein(vector &real, vector &imag) { + // Find a power-of-2 convolution length m such that m >= n * 2 + 1 + size_t n = real.size(); + if (n != imag.size()) + throw std::invalid_argument("Mismatched lengths"); + size_t m = 1; + while (m / 2 <= n) { + if (m > SIZE_MAX / 2) + throw std::length_error("Vector too large"); + m *= 2; + } + + // Trigonometric tables + vector cosTable(n), sinTable(n); + for (size_t i = 0; i < n; i++) { + uintmax_t temp = static_cast(i) * i; + temp %= static_cast(n) * 2; + double angle = M_PI * temp / n; + cosTable[i] = std::cos(angle); + sinTable[i] = std::sin(angle); + } + + // Temporary vectors and preprocessing + vector areal(m), aimag(m); + for (size_t i = 0; i < n; i++) { + areal[i] = real[i] * cosTable[i] + imag[i] * sinTable[i]; + aimag[i] = -real[i] * sinTable[i] + imag[i] * cosTable[i]; + } + vector breal(m), bimag(m); + breal[0] = cosTable[0]; + bimag[0] = sinTable[0]; + for (size_t i = 1; i < n; i++) { + breal[i] = breal[m - i] = cosTable[i]; + bimag[i] = bimag[m - i] = sinTable[i]; + } + + // Convolution + std::pair, vector > cvec = convolve(areal, aimag, breal, bimag); + vector creal = std::move(cvec.first ); + vector cimag = std::move(cvec.second); + + // Postprocessing + for (size_t i = 0; i < n; i++) { + real[i] = creal[i] * cosTable[i] + cimag[i] * sinTable[i]; + imag[i] = -creal[i] * sinTable[i] + cimag[i] * cosTable[i]; + } +} + + +vector Fft::convolve(vector xvec, vector yvec) { + size_t n = xvec.size(); + if (n != yvec.size()) + throw std::invalid_argument("Mismatched lengths"); + return convolve(std::move(xvec), vector(n), std::move(yvec), vector(n)).first; +} + + +std::pair, vector > Fft::convolve( + vector xreal, vector ximag, + vector yreal, vector yimag) { + + size_t n = xreal.size(); + if (n != ximag.size() || n != yreal.size() || n != yimag.size()) + throw std::invalid_argument("Mismatched lengths"); + + transform(xreal, ximag); + transform(yreal, yimag); + + for (size_t i = 0; i < n; i++) { + double temp = xreal[i] * yreal[i] - ximag[i] * yimag[i]; + ximag[i] = ximag[i] * yreal[i] + xreal[i] * yimag[i]; + xreal[i] = temp; + } + inverseTransform(xreal, ximag); + + for (size_t i = 0; i < n; i++) { // Scaling (because this FFT implementation omits it) + xreal[i] /= n; + ximag[i] /= n; + } + return std::pair, vector >(std::move(xreal), std::move(ximag)); +} + + +static size_t reverseBits(size_t val, int width) { + size_t result = 0; + for (int i = 0; i < width; i++, val >>= 1) + result = (result << 1) | (val & 1U); + return result; +} + diff --git a/ref/FftRealPair.hpp b/ref/FftRealPair.hpp new file mode 100644 index 0000000..95db10a --- /dev/null +++ b/ref/FftRealPair.hpp @@ -0,0 +1,75 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include +#include + + +namespace Fft { + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. + */ + void transform(std::vector &real, std::vector &imag); + + + /* + * Computes the inverse discrete Fourier transform (IDFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. This transform does not perform scaling, so the inverse is not a true inverse. + */ + void inverseTransform(std::vector &real, std::vector &imag); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. + */ + void transformRadix2(std::vector &real, std::vector &imag); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. + * Uses Bluestein's chirp z-transform algorithm. + */ + void transformBluestein(std::vector &real, std::vector &imag); + + + /* + * Computes the circular convolution of the given real vectors. Each vector's length must be the same. + */ + std::vector convolve(std::vector xvec, std::vector yvec); + + + /* + * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. + */ + std::pair, std::vector > convolve( + std::vector xreal, std::vector ximag, + std::vector yreal, std::vector yimag); + +} + diff --git a/ref/README.org b/ref/README.org new file mode 100644 index 0000000..74fd6b9 --- /dev/null +++ b/ref/README.org @@ -0,0 +1,5 @@ +* Reference Implementation + +This reference implementation is provided by Nayuki. I greatly appreciate their contribution. + +For more details, please visit Nayuki's [[Website][https://www.nayuki.io/]] From 921202c335076fca481f8c423f1bd1ecc607c86f Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Thu, 12 Dec 2024 16:15:32 +0000 Subject: [PATCH 11/16] chore: remove ref in src --- src/ref/FastDctFft.hpp | 36 ------- src/ref/FastDctFtt.cpp | 75 -------------- src/ref/FftComplex.cpp | 149 ---------------------------- src/ref/FftComplex.hpp | 61 ------------ src/ref/FftComplexTest.cpp | 177 --------------------------------- src/ref/FftRealPair.cpp | 195 ------------------------------------- src/ref/FftRealPair.hpp | 75 -------------- src/ref/README.org | 5 - 8 files changed, 773 deletions(-) delete mode 100644 src/ref/FastDctFft.hpp delete mode 100644 src/ref/FastDctFtt.cpp delete mode 100644 src/ref/FftComplex.cpp delete mode 100644 src/ref/FftComplex.hpp delete mode 100644 src/ref/FftComplexTest.cpp delete mode 100644 src/ref/FftRealPair.cpp delete mode 100644 src/ref/FftRealPair.hpp delete mode 100644 src/ref/README.org diff --git a/src/ref/FastDctFft.hpp b/src/ref/FastDctFft.hpp deleted file mode 100644 index 624234d..0000000 --- a/src/ref/FastDctFft.hpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Fast discrete cosine transform algorithms (C++) - * - * Copyright (c) 2017 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#pragma once - -#include - - -namespace FastDctFft { - - void transform(std::vector &vec); - - void inverseTransform(std::vector &vec); - -} - diff --git a/src/ref/FastDctFtt.cpp b/src/ref/FastDctFtt.cpp deleted file mode 100644 index 5acfa96..0000000 --- a/src/ref/FastDctFtt.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Fast discrete cosine transform algorithms (C++) - * - * Copyright (c) 2017 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#include -#include -#include -#include "FastDctFft.hpp" -#include "FftRealPair.hpp" - -using std::size_t; -using std::vector; - - -// DCT type II, unscaled -void FastDctFft::transform(vector &vec) { - size_t len = vec.size(); - size_t halfLen = len / 2; - vector real(len); - for (size_t i = 0; i < halfLen; i++) { - real.at(i) = vec.at(i * 2); - real.at(len - 1 - i) = vec.at(i * 2 + 1); - } - if (len % 2 == 1) - real.at(halfLen) = vec.at(len - 1); - std::fill(vec.begin(), vec.end(), 0.0); - Fft::transform(real, vec); - for (size_t i = 0; i < len; i++) { - double temp = i * M_PI / (len * 2); - vec.at(i) = real.at(i) * std::cos(temp) + vec.at(i) * std::sin(temp); - } -} - - -// DCT type III, unscaled -void FastDctFft::inverseTransform(vector &vec) { - size_t len = vec.size(); - if (len > 0) - vec.at(0) /= 2; - vector real(len); - for (size_t i = 0; i < len; i++) { - double temp = i * M_PI / (len * 2); - real.at(i) = vec.at(i) * std::cos(temp); - vec.at(i) *= -std::sin(temp); - } - Fft::transform(real, vec); - - size_t halfLen = len / 2; - for (size_t i = 0; i < halfLen; i++) { - vec.at(i * 2 + 0) = real.at(i); - vec.at(i * 2 + 1) = real.at(len - 1 - i); - } - if (len % 2 == 1) - vec.at(len - 1) = real.at(halfLen); -} - diff --git a/src/ref/FftComplex.cpp b/src/ref/FftComplex.cpp deleted file mode 100644 index 4f842a0..0000000 --- a/src/ref/FftComplex.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Free FFT and convolution (C++) - * - * Copyright (c) 2021 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/free-small-fft-in-multiple-languages - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#include -#include -#include -#include -#include "FftComplex.hpp" - -using std::complex; -using std::size_t; -using std::uintmax_t; -using std::vector; - - -// Private function prototypes -static size_t reverseBits(size_t val, int width); - - -void Fft::transform(vector > &vec, bool inverse) { - size_t n = vec.size(); - if (n == 0) - return; - else if ((n & (n - 1)) == 0) // Is power of 2 - transformRadix2(vec, inverse); - else // More complicated algorithm for arbitrary sizes - transformBluestein(vec, inverse); -} - - -void Fft::transformRadix2(vector > &vec, bool inverse) { - // Length variables - size_t n = vec.size(); - int levels = 0; // Compute levels = floor(log2(n)) - for (size_t temp = n; temp > 1U; temp >>= 1) - levels++; - if (static_cast(1U) << levels != n) - throw std::domain_error("Length is not a power of 2"); - - // Trigonometric table - vector > expTable(n / 2); - for (size_t i = 0; i < n / 2; i++) - expTable[i] = std::polar(1.0, (inverse ? 2 : -2) * M_PI * i / n); - - // Bit-reversed addressing permutation - for (size_t i = 0; i < n; i++) { - size_t j = reverseBits(i, levels); - if (j > i) - std::swap(vec[i], vec[j]); - } - - // Cooley-Tukey decimation-in-time radix-2 FFT - for (size_t size = 2; size <= n; size *= 2) { - size_t halfsize = size / 2; - size_t tablestep = n / size; - for (size_t i = 0; i < n; i += size) { - for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { - complex temp = vec[j + halfsize] * expTable[k]; - vec[j + halfsize] = vec[j] - temp; - vec[j] += temp; - } - } - if (size == n) // Prevent overflow in 'size *= 2' - break; - } -} - - -void Fft::transformBluestein(vector > &vec, bool inverse) { - // Find a power-of-2 convolution length m such that m >= n * 2 + 1 - size_t n = vec.size(); - size_t m = 1; - while (m / 2 <= n) { - if (m > SIZE_MAX / 2) - throw std::length_error("Vector too large"); - m *= 2; - } - - // Trigonometric table - vector > expTable(n); - for (size_t i = 0; i < n; i++) { - uintmax_t temp = static_cast(i) * i; - temp %= static_cast(n) * 2; - double angle = (inverse ? M_PI : -M_PI) * temp / n; - expTable[i] = std::polar(1.0, angle); - } - - // Temporary vectors and preprocessing - vector > avec(m); - for (size_t i = 0; i < n; i++) - avec[i] = vec[i] * expTable[i]; - vector > bvec(m); - bvec[0] = expTable[0]; - for (size_t i = 1; i < n; i++) - bvec[i] = bvec[m - i] = std::conj(expTable[i]); - - // Convolution - vector > cvec = convolve(std::move(avec), std::move(bvec)); - - // Postprocessing - for (size_t i = 0; i < n; i++) - vec[i] = cvec[i] * expTable[i]; -} - - -vector > Fft::convolve( - vector > xvec, - vector > yvec) { - - size_t n = xvec.size(); - if (n != yvec.size()) - throw std::domain_error("Mismatched lengths"); - transform(xvec, false); - transform(yvec, false); - for (size_t i = 0; i < n; i++) - xvec[i] *= yvec[i]; - transform(xvec, true); - for (size_t i = 0; i < n; i++) // Scaling (because this FFT implementation omits it) - xvec[i] /= static_cast(n); - return xvec; -} - - -static size_t reverseBits(size_t val, int width) { - size_t result = 0; - for (int i = 0; i < width; i++, val >>= 1) - result = (result << 1) | (val & 1U); - return result; -} diff --git a/src/ref/FftComplex.hpp b/src/ref/FftComplex.hpp deleted file mode 100644 index 24eeabb..0000000 --- a/src/ref/FftComplex.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Free FFT and convolution (C++) - * - * Copyright (c) 2021 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/free-small-fft-in-multiple-languages - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#pragma once - -#include -#include -using Complex = std::complex; - -namespace Fft { - - /* - * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. - * The vector can have any length. This is a wrapper function. The inverse transform does not perform scaling, so it is not a true inverse. - */ - void transform(std::vector > &vec, bool inverse); - - - /* - * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. - * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. - */ - void transformRadix2(std::vector > &vec, bool inverse); - - - /* - * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. - * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. - * Uses Bluestein's chirp z-transform algorithm. - */ - void transformBluestein(std::vector > &vec, bool inverse); - - - /* - * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. - */ - std::vector > convolve( - std::vector > xvec, - std::vector > yvec); - -} diff --git a/src/ref/FftComplexTest.cpp b/src/ref/FftComplexTest.cpp deleted file mode 100644 index dd960d1..0000000 --- a/src/ref/FftComplexTest.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* - * FFT and convolution test (C++) - * - * Copyright (c) 2021 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/free-small-fft-in-multiple-languages - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "FftComplex.hpp" - -using std::complex; -using std::cout; -using std::endl; -using std::vector; - - -// Private function prototypes -static void testFft(int n); -static void testConvolution(int n); -static vector > naiveDft(const vector > &input, bool inverse); -static vector > naiveConvolve(const vector > &xvec, const vector > &yvec); -static double log10RmsErr(const vector > &xvec, const vector > &yvec); -static vector > randomComplexes(int n); - -// Mutable global variable -static double maxLogError = -INFINITY; - -// Random number generation -std::default_random_engine randGen((std::random_device())()); - - -/*---- Main and test functions ----*/ - -int main() { - // Test power-of-2 size FFTs - for (int i = 0; i <= 12; i++) - testFft(1 << i); - - // Test small size FFTs - for (int i = 0; i < 30; i++) - testFft(i); - - // Test diverse size FFTs - for (int i = 0, prev = 0; i <= 100; i++) { - int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); - if (n > prev) { - testFft(n); - prev = n; - } - } - - // Test power-of-2 size convolutions - for (int i = 0; i <= 12; i++) - testConvolution(1 << i); - - // Test diverse size convolutions - for (int i = 0, prev = 0; i <= 100; i++) { - int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); - if (n > prev) { - testConvolution(n); - prev = n; - } - } - - cout << endl; - cout << "Max log err = " << std::setprecision(3) << maxLogError << endl; - cout << "Test " << (maxLogError < -10 ? "passed" : "failed") << endl; - return EXIT_SUCCESS; -} - - -static void testFft(int n) { - const vector > input = randomComplexes(n); - const vector > expect = naiveDft(input, false); - vector > actual = input; - Fft::transform(actual, false); - double err = log10RmsErr(expect, actual); - - for (auto it = actual.begin(); it != actual.end(); ++it) - *it /= n; - Fft::transform(actual, true); - err = std::max(log10RmsErr(input, actual), err); - cout << "fftsize=" << std::setw(4) << std::setfill(' ') << n << " " - << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) - << err << endl; -} - - -static void testConvolution(int n) { - const vector > input0 = randomComplexes(n); - const vector > input1 = randomComplexes(n); - const vector > expect = naiveConvolve(input0, input1); - const vector > actual = Fft::convolve(std::move(input0), std::move(input1)); - cout << "convsize=" << std::setw(4) << std::setfill(' ') << n << " " - << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) - << log10RmsErr(expect, actual) << endl; -} - - -/*---- Naive reference computation functions ----*/ - -static vector > naiveDft(const vector > &input, bool inverse) { - int n = static_cast(input.size()); - vector > output; - double coef = (inverse ? 2 : -2) * M_PI / n; - for (int k = 0; k < n; k++) { // For each output element - complex sum(0); - for (int t = 0; t < n; t++) { // For each input element - double angle = coef * (static_cast(t) * k % n); - sum += input[t] * std::polar(1.0, angle); - } - output.push_back(sum); - } - return output; -} - - -static vector > naiveConvolve( - const vector > &xvec, const vector > &yvec) { - int n = static_cast(xvec.size()); - vector > result(n); // All zeros - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - int k = (i + j) % n; - result[k] += xvec[i] * yvec[j]; - } - } - return result; -} - - -/*---- Utility functions ----*/ - -static double log10RmsErr(const vector > &xvec, const vector > &yvec) { - int n = static_cast(xvec.size()); - double err = std::pow(10, -99 * 2); - for (int i = 0; i < n; i++) - err += std::norm(xvec.at(i) - yvec.at(i)); - err /= n > 0 ? n : 1; - err = std::sqrt(err); // Now this is a root mean square (RMS) error - err = std::log10(err); - maxLogError = std::max(err, maxLogError); - return err; -} - - -static vector > randomComplexes(int n) { - std::uniform_real_distribution valueDist(-1.0, 1.0); - vector > result; - for (int i = 0; i < n; i++) - result.push_back(complex(valueDist(randGen), valueDist(randGen))); - return result; -} diff --git a/src/ref/FftRealPair.cpp b/src/ref/FftRealPair.cpp deleted file mode 100644 index 9bb414f..0000000 --- a/src/ref/FftRealPair.cpp +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Free FFT and convolution (C++) - * - * Copyright (c) 2021 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/free-small-fft-in-multiple-languages - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#include -#include -#include -#include -#include "FftRealPair.hpp" - -using std::size_t; -using std::uintmax_t; -using std::vector; - - -// Private function prototypes -static size_t reverseBits(size_t val, int width); - - -void Fft::transform(vector &real, vector &imag) { - size_t n = real.size(); - if (n != imag.size()) - throw std::invalid_argument("Mismatched lengths"); - if (n == 0) - return; - else if ((n & (n - 1)) == 0) // Is power of 2 - transformRadix2(real, imag); - else // More complicated algorithm for arbitrary sizes - transformBluestein(real, imag); -} - - -void Fft::inverseTransform(vector &real, vector &imag) { - transform(imag, real); -} - - -void Fft::transformRadix2(vector &real, vector &imag) { - // Length variables - size_t n = real.size(); - if (n != imag.size()) - throw std::invalid_argument("Mismatched lengths"); - int levels = 0; // Compute levels = floor(log2(n)) - for (size_t temp = n; temp > 1U; temp >>= 1) - levels++; - if (static_cast(1U) << levels != n) - throw std::domain_error("Length is not a power of 2"); - - // Trigonometric tables - vector cosTable(n / 2); - vector sinTable(n / 2); - for (size_t i = 0; i < n / 2; i++) { - cosTable[i] = std::cos(2 * M_PI * i / n); - sinTable[i] = std::sin(2 * M_PI * i / n); - } - - // Bit-reversed addressing permutation - for (size_t i = 0; i < n; i++) { - size_t j = reverseBits(i, levels); - if (j > i) { - std::swap(real[i], real[j]); - std::swap(imag[i], imag[j]); - } - } - - // Cooley-Tukey decimation-in-time radix-2 FFT - for (size_t size = 2; size <= n; size *= 2) { - size_t halfsize = size / 2; - size_t tablestep = n / size; - for (size_t i = 0; i < n; i += size) { - for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { - size_t l = j + halfsize; - double tpre = real[l] * cosTable[k] + imag[l] * sinTable[k]; - double tpim = -real[l] * sinTable[k] + imag[l] * cosTable[k]; - real[l] = real[j] - tpre; - imag[l] = imag[j] - tpim; - real[j] += tpre; - imag[j] += tpim; - } - } - if (size == n) // Prevent overflow in 'size *= 2' - break; - } -} - - -void Fft::transformBluestein(vector &real, vector &imag) { - // Find a power-of-2 convolution length m such that m >= n * 2 + 1 - size_t n = real.size(); - if (n != imag.size()) - throw std::invalid_argument("Mismatched lengths"); - size_t m = 1; - while (m / 2 <= n) { - if (m > SIZE_MAX / 2) - throw std::length_error("Vector too large"); - m *= 2; - } - - // Trigonometric tables - vector cosTable(n), sinTable(n); - for (size_t i = 0; i < n; i++) { - uintmax_t temp = static_cast(i) * i; - temp %= static_cast(n) * 2; - double angle = M_PI * temp / n; - cosTable[i] = std::cos(angle); - sinTable[i] = std::sin(angle); - } - - // Temporary vectors and preprocessing - vector areal(m), aimag(m); - for (size_t i = 0; i < n; i++) { - areal[i] = real[i] * cosTable[i] + imag[i] * sinTable[i]; - aimag[i] = -real[i] * sinTable[i] + imag[i] * cosTable[i]; - } - vector breal(m), bimag(m); - breal[0] = cosTable[0]; - bimag[0] = sinTable[0]; - for (size_t i = 1; i < n; i++) { - breal[i] = breal[m - i] = cosTable[i]; - bimag[i] = bimag[m - i] = sinTable[i]; - } - - // Convolution - std::pair, vector > cvec = convolve(areal, aimag, breal, bimag); - vector creal = std::move(cvec.first ); - vector cimag = std::move(cvec.second); - - // Postprocessing - for (size_t i = 0; i < n; i++) { - real[i] = creal[i] * cosTable[i] + cimag[i] * sinTable[i]; - imag[i] = -creal[i] * sinTable[i] + cimag[i] * cosTable[i]; - } -} - - -vector Fft::convolve(vector xvec, vector yvec) { - size_t n = xvec.size(); - if (n != yvec.size()) - throw std::invalid_argument("Mismatched lengths"); - return convolve(std::move(xvec), vector(n), std::move(yvec), vector(n)).first; -} - - -std::pair, vector > Fft::convolve( - vector xreal, vector ximag, - vector yreal, vector yimag) { - - size_t n = xreal.size(); - if (n != ximag.size() || n != yreal.size() || n != yimag.size()) - throw std::invalid_argument("Mismatched lengths"); - - transform(xreal, ximag); - transform(yreal, yimag); - - for (size_t i = 0; i < n; i++) { - double temp = xreal[i] * yreal[i] - ximag[i] * yimag[i]; - ximag[i] = ximag[i] * yreal[i] + xreal[i] * yimag[i]; - xreal[i] = temp; - } - inverseTransform(xreal, ximag); - - for (size_t i = 0; i < n; i++) { // Scaling (because this FFT implementation omits it) - xreal[i] /= n; - ximag[i] /= n; - } - return std::pair, vector >(std::move(xreal), std::move(ximag)); -} - - -static size_t reverseBits(size_t val, int width) { - size_t result = 0; - for (int i = 0; i < width; i++, val >>= 1) - result = (result << 1) | (val & 1U); - return result; -} - diff --git a/src/ref/FftRealPair.hpp b/src/ref/FftRealPair.hpp deleted file mode 100644 index 95db10a..0000000 --- a/src/ref/FftRealPair.hpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Free FFT and convolution (C++) - * - * Copyright (c) 2021 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/free-small-fft-in-multiple-languages - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#pragma once - -#include -#include - - -namespace Fft { - - /* - * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. - * The vector can have any length. This is a wrapper function. - */ - void transform(std::vector &real, std::vector &imag); - - - /* - * Computes the inverse discrete Fourier transform (IDFT) of the given complex vector, storing the result back into the vector. - * The vector can have any length. This is a wrapper function. This transform does not perform scaling, so the inverse is not a true inverse. - */ - void inverseTransform(std::vector &real, std::vector &imag); - - - /* - * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. - * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. - */ - void transformRadix2(std::vector &real, std::vector &imag); - - - /* - * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. - * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. - * Uses Bluestein's chirp z-transform algorithm. - */ - void transformBluestein(std::vector &real, std::vector &imag); - - - /* - * Computes the circular convolution of the given real vectors. Each vector's length must be the same. - */ - std::vector convolve(std::vector xvec, std::vector yvec); - - - /* - * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. - */ - std::pair, std::vector > convolve( - std::vector xreal, std::vector ximag, - std::vector yreal, std::vector yimag); - -} - diff --git a/src/ref/README.org b/src/ref/README.org deleted file mode 100644 index 74fd6b9..0000000 --- a/src/ref/README.org +++ /dev/null @@ -1,5 +0,0 @@ -* Reference Implementation - -This reference implementation is provided by Nayuki. I greatly appreciate their contribution. - -For more details, please visit Nayuki's [[Website][https://www.nayuki.io/]] From 4f6d79b0f2aac96398a5bd04727f9cf0d0ae60ab Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Thu, 12 Dec 2024 17:27:41 +0000 Subject: [PATCH 12/16] fix: refactor --- src/complex_array.hpp | 251 ++++++++++++++++++++++++------------------ src/fft.cpp | 35 +++--- src/fft.hpp | 2 +- 3 files changed, 157 insertions(+), 131 deletions(-) diff --git a/src/complex_array.hpp b/src/complex_array.hpp index 8d0762c..0bdb5ac 100644 --- a/src/complex_array.hpp +++ b/src/complex_array.hpp @@ -7,142 +7,175 @@ using Complex = std::complex; class ComplexProxy { + double *real_ptr; + double *imag_ptr; - double *real_ptr; - double *imag_ptr; - -public: - - double &real() { return *real_ptr; } - double &imag() { return *imag_ptr; } - - ComplexProxy(double *real_ptr, double *imag_ptr) - : real_ptr(real_ptr), imag_ptr(imag_ptr) {} - - ComplexProxy(ComplexProxy &&rhs) noexcept - : real_ptr(rhs.real_ptr), imag_ptr(rhs.imag_ptr) { - rhs.real_ptr = nullptr; - rhs.imag_ptr = nullptr; - } - - operator Complex() const { return Complex(*real_ptr, *imag_ptr); } - - - ComplexProxy &operator=(const Complex &c) { - *real_ptr = c.real(); - *imag_ptr = c.imag(); - return *this; - } -}; + public: + double &real() { return *real_ptr; } + double &imag() { return *imag_ptr; } -class ComplexArray { -private: - double *real; - double *imag; - size_t array_size; - -public: - ComplexArray(size_t n) : array_size(n) { - if (n == 0) { - real = nullptr; - imag = nullptr; - } else { - real = new double[n]; - std::fill(real, real + n, 0.0); - imag = new double[n]; - std::fill(imag, imag + n, 0.0); - } - } + ComplexProxy(double *real_ptr, double *imag_ptr) + : real_ptr(real_ptr), imag_ptr(imag_ptr) {} - ~ComplexArray() { - if (real != nullptr) { - delete[] real; + ComplexProxy(ComplexProxy &&rhs) noexcept + : real_ptr(rhs.real_ptr), imag_ptr(rhs.imag_ptr) { + rhs.real_ptr = nullptr; + rhs.imag_ptr = nullptr; } - if (imag != nullptr) { - delete[] imag; + + operator Complex() const { return Complex(*real_ptr, *imag_ptr); } + + ComplexProxy &operator=(const Complex &c) { + *real_ptr = c.real(); + *imag_ptr = c.imag(); + return *this; } - } - - ComplexArray(const ComplexArray &other) : array_size(other.array_size) { - real = new double[array_size]; - imag = new double[array_size]; - std::copy(other.real, other.real + array_size, real); - std::copy(other.imag, other.imag + array_size, imag); - } - - ComplexArray(ComplexArray &&rhs) noexcept - : real(rhs.real), imag(rhs.imag), array_size(rhs.array_size) { - rhs.real = nullptr; - rhs.imag = nullptr; - rhs.array_size = 0; - } - - ComplexArray &operator=(const ComplexArray &other) { - if (this == &other) { - return *this; +}; + +class ComplexArray { + private: + double *real; + double *imag; + size_t array_size; + + public: + ComplexArray(size_t n) : array_size(n) { + if (n == 0) { + real = nullptr; + imag = nullptr; + } else { + real = new double[n]; + std::fill(real, real + n, 0.0); + imag = new double[n]; + std::fill(imag, imag + n, 0.0); + } } - if (real != nullptr) { - delete[] real; + + ~ComplexArray() { + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } } - if (imag != nullptr) { - delete[] imag; + + ComplexArray(const ComplexArray &other) : array_size(other.array_size) { + real = new double[array_size]; + imag = new double[array_size]; + std::copy(other.real, other.real + array_size, real); + std::copy(other.imag, other.imag + array_size, imag); } - array_size = other.array_size; - real = new double[array_size]; - imag = new double[array_size]; + ComplexArray(ComplexArray &&rhs) noexcept + : real(rhs.real), imag(rhs.imag), array_size(rhs.array_size) { + rhs.real = nullptr; + rhs.imag = nullptr; + rhs.array_size = 0; + } - for (size_t i = 0; i < array_size; i++) { - real[i] = other.real[i]; - imag[i] = other.imag[i]; + ComplexArray &operator=(const ComplexArray &other) { + if (this == &other) { + return *this; + } + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + + array_size = other.array_size; + real = new double[array_size]; + imag = new double[array_size]; + + for (size_t i = 0; i < array_size; i++) { + real[i] = other.real[i]; + imag[i] = other.imag[i]; + } + + return *this; } - return *this; - } + void multiply(const ComplexArray &other) { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + + for (size_t i = 0; i < array_size; i++) { + double temp_real = + real[i] * other.real[i] - imag[i] * other.imag[i]; + double temp_imag = + real[i] * other.imag[i] + imag[i] * other.real[i]; + real[i] = temp_real; + imag[i] = temp_imag; + } + } - ComplexArray operator*(const ComplexArray &other) const { - if (array_size != other.array_size) { - throw std::invalid_argument("Array sizes do not match"); + // void partial_multiply(const ComplexArray &other, size_t start, size_t length) { + + // size_t end = start + length; + // if (end > array_size || end > other.array_size) { + // throw std::invalid_argument("Array sizes do not match"); + // } + // for (size_t i = 0; i < length; i++) { + // double temp_real = + // real[start + i] * other.real[i] - imag[start + i] * other.imag[i]; + // double temp_imag = + // real[start + i] * other.imag[i] + imag[start + i] * other.real[i]; + // real[start + i] = temp_real; + // imag[start + i] = temp_imag; + // } + // } + + void scale(double scalar) { + for (size_t i = 0; i < array_size; i++) { + real[i] *= scalar; + imag[i] *= scalar; + } } - ComplexArray result(array_size); - for (size_t i = 0; i < array_size; i++) { - result.real[i] = real[i] * other.real[i] - imag[i] * other.imag[i]; - result.imag[i] = real[i] * other.imag[i] + imag[i] * other.real[i]; + ComplexArray operator*(const ComplexArray &other) const { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + ComplexArray result = *this; // Copy constructor + result.multiply(other); + return result; } - return result; - } - ComplexArray operator+(const ComplexArray &other) const { - if (array_size != other.array_size) { - throw std::invalid_argument("Array sizes do not match"); + ComplexArray operator+(const ComplexArray &other) const { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + + ComplexArray result(array_size); + for (size_t i = 0; i < array_size; i++) { + result.real[i] = real[i] + other.real[i]; + result.imag[i] = imag[i] + other.imag[i]; + } + return result; } - ComplexArray result(array_size); - for (size_t i = 0; i < array_size; i++) { - result.real[i] = real[i] + other.real[i]; - result.imag[i] = imag[i] + other.imag[i]; + Complex operator[](size_t index) const { + return Complex(real[index], imag[index]); } - return result; - } - Complex operator[](size_t index) const { - return Complex(real[index], imag[index]); - } + ComplexProxy operator[](size_t index) { + return ComplexProxy(real + index, imag + index); + } - ComplexProxy operator[](size_t index) { - return ComplexProxy(real + index, imag + index); - } + - size_t size() const { return array_size; } + size_t size() const { return array_size; } - double *get_real_ptr() { return real; } + double *get_real_ptr() { return real; } - const double *get_real_ptr() const { return real; } + const double *get_real_ptr() const { return real; } - double *get_imag_ptr() { return imag; } + double *get_imag_ptr() { return imag; } - const double *get_imag_ptr() const { return imag; } + const double *get_imag_ptr() const { return imag; } }; #endif diff --git a/src/fft.cpp b/src/fft.cpp index 692d3f3..08f0525 100644 --- a/src/fft.cpp +++ b/src/fft.cpp @@ -86,13 +86,14 @@ void fft::transformBluestein(ComplexArray &x, bool inverse) { double *real_avec = a.get_real_ptr(); double *imag_avec = a.get_imag_ptr(); + + // TODO: Use SIMD for (size_t i = 0; i < n; i++) { real_avec[i] = real[i] * real_expTable[i] - imag[i] * imag_expTable[i]; imag_avec[i] = real[i] * imag_expTable[i] + imag[i] * real_expTable[i]; + // a[i] = Complex(x[i]) * Complex(expTable[i]); } - // a = x * expTable; - double *real_bvec = b.get_real_ptr(); double *imag_bvec = b.get_imag_ptr(); real_bvec[0] = real_expTable[0]; @@ -104,36 +105,28 @@ void fft::transformBluestein(ComplexArray &x, bool inverse) { } ComplexArray c = convolove(a, b); - + // x = c * expTable; for (size_t i = 0; i < n; i++) { real[i] = c.get_real_ptr()[i] * real_expTable[i] - c.get_imag_ptr()[i] * imag_expTable[i]; imag[i] = c.get_real_ptr()[i] * imag_expTable[i] + c.get_imag_ptr()[i] * real_expTable[i]; } } -ComplexArray fft::convolove(ComplexArray x, ComplexArray y) { - size_t n = x.size(); - if (n != y.size()) +ComplexArray fft::convolove(const ComplexArray &_x, const ComplexArray &_y) { + size_t n = _x.size(); + if (n != _y.size()) throw std::domain_error("Mismatched lengths"); + + ComplexArray x = _x; + ComplexArray y = _y; + transform(x, false); transform(y, false); - double *xr = x.get_real_ptr(); - double *yr = y.get_real_ptr(); - double *xi = x.get_imag_ptr(); - double *yi = y.get_imag_ptr(); - - for (size_t i = 0; i < n; ++i) { - double xrn = xr[i] * yr[i] - xi[i] * yi[i]; - double xin = xr[i] * yi[i] + xi[i] * yr[i]; - xr[i] = xrn; - xi[i] = xin; - } + x.multiply(y); transform(x, true); - for (size_t i = 0; i < n; ++i) { - xr[i] /= static_cast(n); - xi[i] /= static_cast(n); - } + x.scale(1.0 / n); + return x; } diff --git a/src/fft.hpp b/src/fft.hpp index c4620fb..768a7fe 100644 --- a/src/fft.hpp +++ b/src/fft.hpp @@ -10,6 +10,6 @@ void transformBluestein(ComplexArray &x, bool inverse); void transform(ComplexArray &x, bool inverse); // ComplexArray convolove(const ComplexArray &x, const ComplexArray &y); -ComplexArray convolove(ComplexArray x, ComplexArray y); +ComplexArray convolove(const ComplexArray &x, const ComplexArray &y); } // namespace fft From 7299b11bfa78d2e868acafb45ac5c20382d434ae Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Sat, 14 Dec 2024 09:13:10 +0000 Subject: [PATCH 13/16] fix: refractor and prepare for SIMD --- src/complex_array.cpp | 181 ++++++++++++++++++++++++++++++++++++++++++ src/complex_array.hpp | 4 +- src/dct_naive.hpp | 17 ++-- src/fdct.hpp | 164 ++++++++++++++++++++------------------ src/fdct_inner.cpp | 108 +++++-------------------- src/fdct_inner.hpp | 4 +- src/fft.cpp | 52 +++++++----- src/fft.hpp | 10 +-- test_dct.py | 7 +- 9 files changed, 337 insertions(+), 210 deletions(-) create mode 100644 src/complex_array.cpp diff --git a/src/complex_array.cpp b/src/complex_array.cpp new file mode 100644 index 0000000..0bdb5ac --- /dev/null +++ b/src/complex_array.cpp @@ -0,0 +1,181 @@ +#ifndef COMPLEX_ARRAY_HPP +#define COMPLEX_ARRAY_HPP +#include +#include +#include + +using Complex = std::complex; + +class ComplexProxy { + double *real_ptr; + double *imag_ptr; + + public: + double &real() { return *real_ptr; } + double &imag() { return *imag_ptr; } + + ComplexProxy(double *real_ptr, double *imag_ptr) + : real_ptr(real_ptr), imag_ptr(imag_ptr) {} + + ComplexProxy(ComplexProxy &&rhs) noexcept + : real_ptr(rhs.real_ptr), imag_ptr(rhs.imag_ptr) { + rhs.real_ptr = nullptr; + rhs.imag_ptr = nullptr; + } + + operator Complex() const { return Complex(*real_ptr, *imag_ptr); } + + ComplexProxy &operator=(const Complex &c) { + *real_ptr = c.real(); + *imag_ptr = c.imag(); + return *this; + } +}; + +class ComplexArray { + private: + double *real; + double *imag; + size_t array_size; + + public: + ComplexArray(size_t n) : array_size(n) { + if (n == 0) { + real = nullptr; + imag = nullptr; + } else { + real = new double[n]; + std::fill(real, real + n, 0.0); + imag = new double[n]; + std::fill(imag, imag + n, 0.0); + } + } + + ~ComplexArray() { + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + } + + ComplexArray(const ComplexArray &other) : array_size(other.array_size) { + real = new double[array_size]; + imag = new double[array_size]; + std::copy(other.real, other.real + array_size, real); + std::copy(other.imag, other.imag + array_size, imag); + } + + ComplexArray(ComplexArray &&rhs) noexcept + : real(rhs.real), imag(rhs.imag), array_size(rhs.array_size) { + rhs.real = nullptr; + rhs.imag = nullptr; + rhs.array_size = 0; + } + + ComplexArray &operator=(const ComplexArray &other) { + if (this == &other) { + return *this; + } + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + + array_size = other.array_size; + real = new double[array_size]; + imag = new double[array_size]; + + for (size_t i = 0; i < array_size; i++) { + real[i] = other.real[i]; + imag[i] = other.imag[i]; + } + + return *this; + } + + void multiply(const ComplexArray &other) { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + + for (size_t i = 0; i < array_size; i++) { + double temp_real = + real[i] * other.real[i] - imag[i] * other.imag[i]; + double temp_imag = + real[i] * other.imag[i] + imag[i] * other.real[i]; + real[i] = temp_real; + imag[i] = temp_imag; + } + } + + // void partial_multiply(const ComplexArray &other, size_t start, size_t length) { + + // size_t end = start + length; + // if (end > array_size || end > other.array_size) { + // throw std::invalid_argument("Array sizes do not match"); + // } + // for (size_t i = 0; i < length; i++) { + // double temp_real = + // real[start + i] * other.real[i] - imag[start + i] * other.imag[i]; + // double temp_imag = + // real[start + i] * other.imag[i] + imag[start + i] * other.real[i]; + // real[start + i] = temp_real; + // imag[start + i] = temp_imag; + // } + // } + + void scale(double scalar) { + for (size_t i = 0; i < array_size; i++) { + real[i] *= scalar; + imag[i] *= scalar; + } + } + + ComplexArray operator*(const ComplexArray &other) const { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + ComplexArray result = *this; // Copy constructor + result.multiply(other); + return result; + } + + ComplexArray operator+(const ComplexArray &other) const { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + + ComplexArray result(array_size); + for (size_t i = 0; i < array_size; i++) { + result.real[i] = real[i] + other.real[i]; + result.imag[i] = imag[i] + other.imag[i]; + } + return result; + } + + Complex operator[](size_t index) const { + return Complex(real[index], imag[index]); + } + + ComplexProxy operator[](size_t index) { + return ComplexProxy(real + index, imag + index); + } + + + + size_t size() const { return array_size; } + + double *get_real_ptr() { return real; } + + const double *get_real_ptr() const { return real; } + + double *get_imag_ptr() { return imag; } + + const double *get_imag_ptr() const { return imag; } +}; + +#endif diff --git a/src/complex_array.hpp b/src/complex_array.hpp index 0bdb5ac..911d8fa 100644 --- a/src/complex_array.hpp +++ b/src/complex_array.hpp @@ -97,7 +97,7 @@ class ComplexArray { return *this; } - void multiply(const ComplexArray &other) { + void multiply(const ComplexArray &other, bool simd = false) { if (array_size != other.array_size) { throw std::invalid_argument("Array sizes do not match"); } @@ -128,7 +128,7 @@ class ComplexArray { // } // } - void scale(double scalar) { + void scale(double scalar, bool simd = false) { for (size_t i = 0; i < array_size; i++) { real[i] *= scalar; imag[i] *= scalar; diff --git a/src/dct_naive.hpp b/src/dct_naive.hpp index 7764d41..07e19fe 100644 --- a/src/dct_naive.hpp +++ b/src/dct_naive.hpp @@ -12,7 +12,7 @@ namespace py = pybind11; constexpr double SQRT2_1 = 0.7071067811865475; constexpr double ONE_SUB_SQRT2_1 = 1 - 0.7071067811865475; -static inline double dct_naive_single(double *buf, size_t buf_size, size_t i) { +static inline double dct_naive_single(double *buf, size_t buf_size, size_t i, bool simd) { double sum = 0.0f; for (size_t j = 0; j < buf_size; j++) { sum += buf[j] * cos(M_PI / buf_size * i * (j + 0.5)); @@ -28,20 +28,20 @@ static inline double idct_naive_single(double *buf, size_t buf_size, size_t i) { return sum; } -void dct_naive_inner(double *buf, size_t buf_size, double *out) { +void dct_naive_inner(double *buf, size_t buf_size, double *out, bool simd) { for (size_t i = 0; i < buf_size; i++) { - out[i] = 2.0 * dct_naive_single(buf, buf_size, i); + out[i] = 2.0 * dct_naive_single(buf, buf_size, i, simd); } } -void idct_naive_inner(double *buf, size_t buf_size, double *out) { +void idct_naive_inner(double *buf, size_t buf_size, double *out, bool simd) { for (size_t i = 0; i < buf_size; i++) { out[i] = (buf[0] + (2 * idct_naive_single(buf, buf_size, i))) / (2.0 * buf_size); } } py::array dct_naive( - py::array_t in_arr) { + py::array_t in_arr, bool simd) { py::buffer_info buf_info = in_arr.request(); double *ptr = static_cast(buf_info.ptr); size_t buf_size = buf_info.size; @@ -49,13 +49,13 @@ py::array dct_naive( py::array_t out_arr(buf_info.shape); double *out_buf = static_cast(out_arr.request(true).ptr); - dct_naive_inner(ptr, buf_size, out_buf); + dct_naive_inner(ptr, buf_size, out_buf, simd); return out_arr; } py::array idct_naive( - py::array_t in_arr) { + py::array_t in_arr, bool simd) { py::buffer_info buf_info = in_arr.request(); double *ptr = static_cast(buf_info.ptr); size_t buf_size = buf_info.size; @@ -63,7 +63,8 @@ py::array idct_naive( py::array_t out_arr(buf_info.shape); double *out_buf = static_cast(out_arr.request(true).ptr); - idct_naive_inner(ptr, buf_size, out_buf); + idct_naive_inner(ptr, buf_size, out_buf, simd); return out_arr; } + diff --git a/src/fdct.hpp b/src/fdct.hpp index ee08687..476d7fc 100644 --- a/src/fdct.hpp +++ b/src/fdct.hpp @@ -1,105 +1,111 @@ #pragma once -#include #include #include #include #include -#include +#include #include #include #include +#include // #include "FftComplex.hpp" #include "complex_array.hpp" -#include "fft.hpp" #include "fdct_inner.hpp" - +#include "fft.hpp" namespace py = pybind11; #define _USE_MATH_DEFINES // Wrapper function to accept and return NumPy arrays -py::array -fdct(py::array_t in_arr) { - // Request a buffer from the input array - py::buffer_info buf = in_arr.request(); - if (buf.ndim != 1) { - throw std::runtime_error("Input array must be 1-dimensional."); - } - - // Convert input NumPy array to std::vector - size_t N = buf.shape[0]; - ComplexArray input_data(N); - std::copy(static_cast(buf.ptr), static_cast(buf.ptr) + N, - input_data.get_real_ptr()); - std::fill(input_data.get_imag_ptr(), input_data.get_imag_ptr() + N, 0.0); - // Perform FCT - std::vector result = fct_inner(input_data); - - // Convert result back to NumPy array - return py::array_t(result.size(), result.data()); +py::array fdct( + py::array_t in_arr, bool simd) { + // Request a buffer from the input array + py::buffer_info buf = in_arr.request(); + // Check if the input array is 1-dimensional or 2-dimensional + + if (buf.ndim > 2) { + throw std::runtime_error( + "Input array must be 1-dimensional or 2-dimensional."); + } + + // Convert input NumPy array to std::vector + size_t N = buf.shape[0]; + ComplexArray input_data(N); + std::copy(static_cast(buf.ptr), + static_cast(buf.ptr) + N, input_data.get_real_ptr()); + std::fill(input_data.get_imag_ptr(), input_data.get_imag_ptr() + N, 0.0); + // Perform FCT + double *result = new double[N]; + fct_inner(input_data, result, simd); + + // Convert result back to NumPy array + return py::array_t(N, result); } std::vector idct_inner(const std::vector &c) { - size_t N = c.size(); - std::vector x(N); - // auto u = reorderOutput(c); - ComplexArray u(N); - // Allocate FFTW input/output arrays - fftw_complex *fft_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N); - fftw_complex *fft_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N); - - // Initialize FFTW plan - fftw_plan plan = - fftw_plan_dft_1d(N, fft_in, fft_out, FFTW_BACKWARD, FFTW_ESTIMATE); - - // Fill FFTW input array - for (size_t n = 0; n < N; ++n) { - fft_in[n][0] = c[n]; // Real part - fft_in[n][1] = 0.0; // Imaginary part - } - - // Execute IFFT - fftw_execute(plan); - - // Compute the IDCT using the IFFT output - for (size_t k = 0; k < N; ++k) { - double real = fft_out[k][0]; - double imag = fft_out[k][1]; - x[k] = - (real * cos(M_PI * k / (2.0 * N)) + imag * sin(M_PI * k / (2.0 * N))) / - N; - } - - // Free FFTW resources - fftw_destroy_plan(plan); - fftw_free(fft_in); - fftw_free(fft_out); - - return x; + size_t N = c.size(); + std::vector x(N); + // auto u = reorderOutput(c); + ComplexArray u(N); + // Allocate FFTW input/output arrays + fftw_complex *fft_in = + (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N); + fftw_complex *fft_out = + (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N); + + // Initialize FFTW plan + fftw_plan plan = + fftw_plan_dft_1d(N, fft_in, fft_out, FFTW_BACKWARD, FFTW_ESTIMATE); + + // Fill FFTW input array + for (size_t n = 0; n < N; ++n) { + fft_in[n][0] = c[n]; // Real part + fft_in[n][1] = 0.0; // Imaginary part + } + + // Execute IFFT + fftw_execute(plan); + + // Compute the IDCT using the IFFT output + for (size_t k = 0; k < N; ++k) { + double real = fft_out[k][0]; + double imag = fft_out[k][1]; + x[k] = (real * cos(M_PI * k / (2.0 * N)) + + imag * sin(M_PI * k / (2.0 * N))) / + N; + } + + // Free FFTW resources + fftw_destroy_plan(plan); + fftw_free(fft_in); + fftw_free(fft_out); + + return x; } // Wrapper function to accept and return NumPy arrays -py::array -ifdct(py::array_t in_arr) { - // Request a buffer from the input array - py::buffer_info buf = in_arr.request(); - if (buf.ndim != 1) { - throw std::runtime_error("Input array must be 1-dimensional."); - } - - // Convert input NumPy array to std::vector - size_t N = buf.shape[0]; - // std::vector input_data(static_cast(buf.ptr), - // static_cast(buf.ptr) + N); - ComplexArray input_data(N); - std::copy(static_cast(buf.ptr), static_cast(buf.ptr) + N, - input_data.get_real_ptr()); - - // Perform IDCT - std::vector result = ifdct_inner(input_data); - // Convert result back to NumPy array - return py::array_t(result.size(), result.data()); +py::array ifdct( + py::array_t in_arr, bool simd) { + // Request a buffer from the input array + py::buffer_info buf = in_arr.request(); + if (buf.ndim != 1) { + throw std::runtime_error("Input array must be 1-dimensional."); + } + + // Convert input NumPy array to std::vector + size_t N = buf.shape[0]; + // std::vector input_data(static_cast(buf.ptr), + // static_cast(buf.ptr) + N); + ComplexArray input_data(N); + std::copy(static_cast(buf.ptr), + static_cast(buf.ptr) + N, input_data.get_real_ptr()); + + // Perform IDCT + double *result = new double[N]; + ifdct_inner(input_data, result, simd); + // Convert result back to NumPy array + return py::array_t(N, result); } diff --git a/src/fdct_inner.cpp b/src/fdct_inner.cpp index bc609dd..4b717dd 100644 --- a/src/fdct_inner.cpp +++ b/src/fdct_inner.cpp @@ -1,7 +1,4 @@ -#pragma once -#include "fdct_inner.hpp" - #include #include @@ -11,6 +8,7 @@ #include #include "complex_array.hpp" +#include "fdct_inner.hpp" #include "fft.hpp" // Function to compute the Fast Cosine Transform @@ -61,116 +59,48 @@ ComplexArray reorderOutput(const ComplexArray &v) { return x; } -ComplexArray idctPreprocess(const ComplexArray &x) { +void idctPreprocess(ComplexArray &x) { size_t N = x.size(); - ComplexArray u(N); - ComplexArray v(N); - v = x; - v.get_real_ptr()[0] /= 2; - // for (size_t n = 0; n < N; ++n) { - // // Compute the complex term - // Complex W = std::polar(1.0, -M_PI * n / (2 * N)); - - // // Compute u[n] - // u[n] = 0.5 * (x[n] - x[N - n - 1] * Complex(0, 1)) * W; - // } - + auto real = x.get_real_ptr(); + auto imag = x.get_imag_ptr(); + + real[0] /= 2; for (size_t n = 0; n < N; ++n) { double tmp = n * M_PI / (2 * N); - u.get_real_ptr()[n] = v[n].real() * cos(tmp); - u.get_imag_ptr()[n] = -v[n].real() * sin(tmp); + double tmp_real = real[n]; + real[n] = tmp_real * cos(tmp); + imag[n] = -tmp_real * sin(tmp); } - return u; } -std::vector fct_inner(ComplexArray &x) { +void fct_inner(const ComplexArray &x, double *result, bool simd) { size_t N = x.size(); - std::vector dct(N); - auto u = reorderInput(x); - ComplexArray fft_in(N); - std::vector > vc; - // Fill FFT input array - for (size_t n = 0; n < N; ++n) { - vc.push_back( - std::complex(u.get_real_ptr()[n], u.get_imag_ptr()[n])); - fft_in.get_real_ptr()[n] = u.get_real_ptr()[n]; - fft_in.get_imag_ptr()[n] = u.get_imag_ptr()[n]; - } - - // use fftw3 - // fftw_complex *in, *out; - // fftw_plan p; - // in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); - // out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); - // for (size_t i = 0; i < N; i++) { - // in[i][0] = fft_in.get_real_ptr()[i]; - // in[i][1] = fft_in.get_imag_ptr()[i]; - // } - // p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE); - // fftw_execute(p); - // for (size_t i = 0; i < N; i++) { - // fft_in.get_real_ptr()[i] = out[i][0]; - // fft_in.get_imag_ptr()[i] = out[i][1]; - // } - // fftw_destroy_plan(p); - // fftw_free(in); - // fftw_free(out); + auto fft_in = reorderInput(x); // Perform FFT fft::transform(fft_in, false); - // std::vector > fft_in_p; - // for (size_t n = 0; n < N; ++n) { - // fft_in_p.push_back(std::complex(fft_in.get_real_ptr()[n], - // fft_in.get_imag_ptr()[n])); - // } - // Fft::transform(fft_in_p, false); - // for (size_t n = 0; n < N; ++n) { - // fft_in.get_real_ptr()[n] = fft_in_p[n].real(); - // fft_in.get_imag_ptr()[n] = fft_in_p[n].imag(); - // } // Compute the DCT using the FFT output + double *x_real = fft_in.get_real_ptr(); + double *x_imag = fft_in.get_imag_ptr(); for (size_t k = 0; k < N; ++k) { - double real = fft_in[k].real(); - double imag = fft_in[k].imag(); - dct[k] = - (real * cos(M_PI * k / (2 * N)) - imag * sin(-M_PI * k / (2 * N))) * + result[k] = + (x_real[k] * cos(M_PI * k / (2 * N)) - x_imag[k] * sin(-M_PI * k / (2 * N))) * 2; } - return dct; } -std::vector ifdct_inner(ComplexArray &c) { +void ifdct_inner(const ComplexArray &c, double *result, bool simd) { size_t N = c.size(); - std::vector x(N); - // auto u = reorderInput(c); - auto u = idctPreprocess(c); - ComplexArray fft_in(N); - std::vector > vc; - // Fill FFT input array - for (size_t n = 0; n < N; ++n) { - vc.push_back( - std::complex(u.get_real_ptr()[n], u.get_imag_ptr()[n])); - fft_in.get_real_ptr()[n] = u.get_real_ptr()[n]; - fft_in.get_imag_ptr()[n] = u.get_imag_ptr()[n]; - } + ComplexArray fft_in = c; + idctPreprocess(fft_in); // Perform FFT fft::transform(fft_in, false); auto x_ = reorderOutput(fft_in); for (size_t n = 0; n < N; ++n) { - x[n] = x_.get_real_ptr()[n] / N; + result[n] = x_.get_real_ptr()[n] / N; } - - // Compute the IDCT using the FFT output - // for (size_t k = 0; k < N; ++k) { - // double real = fft_in[k].real(); - // double imag = fft_in[k].imag(); - // x[k] = (real * cos(M_PI * k / (2.0 * N)) + - // imag * sin(M_PI * k / (2.0 * N))) / - // N; - // } - return x; } diff --git a/src/fdct_inner.hpp b/src/fdct_inner.hpp index 76e84f6..38dd6a3 100644 --- a/src/fdct_inner.hpp +++ b/src/fdct_inner.hpp @@ -12,6 +12,6 @@ ComplexArray reorderInput(const ComplexArray &x); -std::vector fct_inner(ComplexArray &x); +void fct_inner(const ComplexArray &x, double *result, bool simd); -std::vector ifdct_inner(ComplexArray &c); +void ifdct_inner(const ComplexArray &c, double *result, bool simd); diff --git a/src/fft.cpp b/src/fft.cpp index 08f0525..0e8bfb0 100644 --- a/src/fft.cpp +++ b/src/fft.cpp @@ -12,7 +12,19 @@ static size_t reverseBits(size_t val, int width); -void fft::transformRadix2(ComplexArray &x, bool inverse) { +static ComplexArray mkExpTable(size_t n, bool inverse) { + ComplexArray expTable(n); + double *real = expTable.get_real_ptr(); + double *imag = expTable.get_imag_ptr(); + for (size_t i = 0; i < n; i++) { + double angle = (inverse ? 2 : -2) * M_PI * i / n; + real[i] = std::cos(angle); + imag[i] = std::sin(angle); + } + return expTable; +} + +void fft::transformRadix2(ComplexArray &x, bool inverse, bool simd) { int levels = 0; size_t n = x.size(); double *real = x.get_real_ptr(); @@ -23,12 +35,8 @@ void fft::transformRadix2(ComplexArray &x, bool inverse) { if (static_cast(1U) << levels != n) throw std::domain_error("Length is not a power of 2"); - ComplexArray expTable(n / 2); + ComplexArray expTable = mkExpTable(n, inverse); - for (size_t i = 0; i < n / 2; i++) { - double angle = (inverse ? 2 : -2) * M_PI * i / n; - expTable[i] = std::polar(1.0, angle); - } for (size_t i = 0; i < n; i++) { size_t j = reverseBits(i, levels); @@ -61,7 +69,7 @@ void fft::transformRadix2(ComplexArray &x, bool inverse) { } } -void fft::transformBluestein(ComplexArray &x, bool inverse) { +void fft::transformBluestein(ComplexArray &x, bool inverse, bool simd) { size_t n = x.size(); double *real = x.get_real_ptr(); double *imag = x.get_imag_ptr(); @@ -104,40 +112,40 @@ void fft::transformBluestein(ComplexArray &x, bool inverse) { imag_bvec[i] = imag_bvec[m - i] = -imag_expTable[i]; } - ComplexArray c = convolove(a, b); + convolove(a, b, simd); // x = c * expTable; for (size_t i = 0; i < n; i++) { - real[i] = c.get_real_ptr()[i] * real_expTable[i] - c.get_imag_ptr()[i] * imag_expTable[i]; - imag[i] = c.get_real_ptr()[i] * imag_expTable[i] + c.get_imag_ptr()[i] * real_expTable[i]; + real[i] = a.get_real_ptr()[i] * real_expTable[i] - a.get_imag_ptr()[i] * imag_expTable[i]; + imag[i] = a.get_real_ptr()[i] * imag_expTable[i] + a.get_imag_ptr()[i] * real_expTable[i]; } } -ComplexArray fft::convolove(const ComplexArray &_x, const ComplexArray &_y) { + +// compute the convolution of two complex arrays and set the result to x +void fft::convolove(ComplexArray &_x, const ComplexArray &_y, bool simd) { size_t n = _x.size(); if (n != _y.size()) throw std::domain_error("Mismatched lengths"); - ComplexArray x = _x; + ComplexArray& x = _x; ComplexArray y = _y; - transform(x, false); - transform(y, false); + transform(x, false, simd); + transform(y, false, simd); - x.multiply(y); - transform(x, true); - x.scale(1.0 / n); - - return x; + x.multiply(y, simd); + transform(x, true, simd); + x.scale(1.0 / n, simd); } -void fft::transform(ComplexArray &x, bool inverse) { +void fft::transform(ComplexArray &x, bool inverse, bool simd) { size_t n = x.size(); if (n == 0) return; else if ((n & (n - 1)) == 0) // Is power of 2 - transformRadix2(x, inverse); + transformRadix2(x, inverse, simd); else // More complicated algorithm for arbitrary sizes - transformBluestein(x, inverse); + transformBluestein(x, inverse, simd); } static size_t reverseBits(size_t val, int width) { diff --git a/src/fft.hpp b/src/fft.hpp index 768a7fe..868eee7 100644 --- a/src/fft.hpp +++ b/src/fft.hpp @@ -3,13 +3,11 @@ #include "complex_array.hpp" namespace fft { -void transformRadix2(ComplexArray &x, bool inverse); +void transformRadix2(ComplexArray &x, bool inverse, bool simd = false); -void transformBluestein(ComplexArray &x, bool inverse); +void transformBluestein(ComplexArray &x, bool inverse, bool simd = false); -void transform(ComplexArray &x, bool inverse); +void transform(ComplexArray &x, bool inverse, bool simd = false); -// ComplexArray convolove(const ComplexArray &x, const ComplexArray &y); -ComplexArray convolove(const ComplexArray &x, const ComplexArray &y); +void convolove(ComplexArray &x, const ComplexArray &y, bool simd = false); } // namespace fft - diff --git a/test_dct.py b/test_dct.py index 40a233f..092c6f7 100644 --- a/test_dct.py +++ b/test_dct.py @@ -7,6 +7,7 @@ # Parameters test_size_seq = [10, 64, 127, 256, 1023, 2048, 4097, 8192, 8193] tolerance = 1e-6 # Tolerance for individual element comparison +simd = [False, True] # SIMD flag # List of test functions and their corresponding references test_cases = [ @@ -18,8 +19,10 @@ @pytest.mark.parametrize("size", test_size_seq) @pytest.mark.parametrize("name, func, ref_func", test_cases) -def test_transform(name, func, ref_func, size): +@pytest.mark.parametrize("simd", simd) +def test_transform(name, func, ref_func, size, simd): inp = np.random.rand(size) gt = ref_func(inp) - ans = func(inp) + ans = func(inp, simd) np.testing.assert_allclose(ans, gt, atol=tolerance, rtol=0) + From c80074e553034802c8a709b4a437bfd0c8aa7c94 Mon Sep 17 00:00:00 2001 From: cfmc30 Date: Sat, 14 Dec 2024 13:48:42 +0000 Subject: [PATCH 14/16] fix: Use ubuntu 24.04 --- .github/workflows/cmake-single-platform.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index ca27d04..66b348d 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -17,7 +17,7 @@ jobs: # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. # You can convert this to a matrix build if you need cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: fail-fast: false @@ -29,7 +29,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - sudo apt install python3-pybind11 libfftw3-dev + sudo apt -y update + sudo apt -y install python3-pybind11 libfftw3-dev cmake python3-dev python -m pip install --upgrade pip python -m pip install pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi From 10a359e3e4445ff9fc53149efb73b6b400f0849d Mon Sep 17 00:00:00 2001 From: Colorfulmooncat Date: Sun, 15 Dec 2024 02:52:43 +0800 Subject: [PATCH 15/16] doc: Add nayuki's website --- ref/README.org | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ref/README.org b/ref/README.org index 74fd6b9..2befb38 100644 --- a/ref/README.org +++ b/ref/README.org @@ -2,4 +2,6 @@ This reference implementation is provided by Nayuki. I greatly appreciate their contribution. -For more details, please visit Nayuki's [[Website][https://www.nayuki.io/]] +For more details, please visit Nayuki's website +- https://www.nayuki.io/page/free-small-fft-in-multiple-languages +- https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms From 569d74b3e5d57361863829ff3c475c5ecb62e218 Mon Sep 17 00:00:00 2001 From: Colorfulmooncat Date: Sun, 15 Dec 2024 02:56:48 +0800 Subject: [PATCH 16/16] doc: fix typo and add reference --- README.org | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.org b/README.org index e1782c1..6ed7575 100644 --- a/README.org +++ b/README.org @@ -43,7 +43,7 @@ modern multicore processors. :PROPERTIES: :CUSTOM_ID: prospective-users :END: -FDCP will be valuable in fieds where DCT is used extensively: +FDCT will be valuable in fieds where DCT is used extensively: - Image processing and compression - Audio processing and compression @@ -136,3 +136,7 @@ multithread=True, simd=True) [[https://coehuman.uodiyala.edu.iq/uploads/Coehuman%20library%20pdf/%D9%83%D8%AA%D8%A8%20%D8%A7%D9%84%D8%B1%D9%8A%D8%A7%D8%B6%D9%8A%D8%A7%D8%AA%20Mathematics%20books/Wavelets/25%20(2).pdf]] 3. C. W. Kok, Fast algorithm for computing discrete cosine transform [[https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=558495]] +4. Nayuki, Fast discrete cosine transform algorithms + [[https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms]] +5. Nayuki, Free small FFT in multiple languages + [[https://www.nayuki.io/page/free-small-fft-in-multiple-languages]]