diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index 5012e2a..66b348d 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -17,7 +17,7 @@ jobs: # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. # You can convert this to a matrix build if you need cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: fail-fast: false @@ -29,7 +29,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - sudo apt install python3-pybind11 + sudo apt -y update + sudo apt -y install python3-pybind11 libfftw3-dev cmake python3-dev python -m pip install --upgrade pip python -m pip install pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi diff --git a/.gitignore b/.gitignore index 9bf3746..374071e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,6 @@ compile_commands.json +build/ +.vscode/ +.cache/ +*.so + diff --git a/CMakeLists.txt b/CMakeLists.txt index 846483e..4afb28a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,16 @@ include_directories(${PYTHON_INCLUDE_DIRS}) # Find Pybind11 find_package(pybind11 REQUIRED) -pybind11_add_module(fdct src/fdct.cpp) +find_package(PkgConfig REQUIRED) +pkg_search_module(FFTW REQUIRED fftw3 IMPORTED_TARGET) +include_directories(PkgConfig::FFTW) +link_libraries(PkgConfig::FFTW) + +pybind11_add_module(fdct + src/fdct_module.cpp + src/fft.cpp + src/fdct_inner.cpp +) # Link the module with Python target_link_libraries(fdct PRIVATE ${PYTHON_LIBRARIES}) diff --git a/ref/FastDctFft.hpp b/ref/FastDctFft.hpp new file mode 100644 index 0000000..624234d --- /dev/null +++ b/ref/FastDctFft.hpp @@ -0,0 +1,36 @@ +/* + * Fast discrete cosine transform algorithms (C++) + * + * Copyright (c) 2017 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include + + +namespace FastDctFft { + + void transform(std::vector &vec); + + void inverseTransform(std::vector &vec); + +} + diff --git a/ref/FastDctFtt.cpp b/ref/FastDctFtt.cpp new file mode 100644 index 0000000..5acfa96 --- /dev/null +++ b/ref/FastDctFtt.cpp @@ -0,0 +1,75 @@ +/* + * Fast discrete cosine transform algorithms (C++) + * + * Copyright (c) 2017 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include "FastDctFft.hpp" +#include "FftRealPair.hpp" + +using std::size_t; +using std::vector; + + +// DCT type II, unscaled +void FastDctFft::transform(vector &vec) { + size_t len = vec.size(); + size_t halfLen = len / 2; + vector real(len); + for (size_t i = 0; i < halfLen; i++) { + real.at(i) = vec.at(i * 2); + real.at(len - 1 - i) = vec.at(i * 2 + 1); + } + if (len % 2 == 1) + real.at(halfLen) = vec.at(len - 1); + std::fill(vec.begin(), vec.end(), 0.0); + Fft::transform(real, vec); + for (size_t i = 0; i < len; i++) { + double temp = i * M_PI / (len * 2); + vec.at(i) = real.at(i) * std::cos(temp) + vec.at(i) * std::sin(temp); + } +} + + +// DCT type III, unscaled +void FastDctFft::inverseTransform(vector &vec) { + size_t len = vec.size(); + if (len > 0) + vec.at(0) /= 2; + vector real(len); + for (size_t i = 0; i < len; i++) { + double temp = i * M_PI / (len * 2); + real.at(i) = vec.at(i) * std::cos(temp); + vec.at(i) *= -std::sin(temp); + } + Fft::transform(real, vec); + + size_t halfLen = len / 2; + for (size_t i = 0; i < halfLen; i++) { + vec.at(i * 2 + 0) = real.at(i); + vec.at(i * 2 + 1) = real.at(len - 1 - i); + } + if (len % 2 == 1) + vec.at(len - 1) = real.at(halfLen); +} + diff --git a/ref/FftComplex.cpp b/ref/FftComplex.cpp new file mode 100644 index 0000000..4f842a0 --- /dev/null +++ b/ref/FftComplex.cpp @@ -0,0 +1,149 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include "FftComplex.hpp" + +using std::complex; +using std::size_t; +using std::uintmax_t; +using std::vector; + + +// Private function prototypes +static size_t reverseBits(size_t val, int width); + + +void Fft::transform(vector > &vec, bool inverse) { + size_t n = vec.size(); + if (n == 0) + return; + else if ((n & (n - 1)) == 0) // Is power of 2 + transformRadix2(vec, inverse); + else // More complicated algorithm for arbitrary sizes + transformBluestein(vec, inverse); +} + + +void Fft::transformRadix2(vector > &vec, bool inverse) { + // Length variables + size_t n = vec.size(); + int levels = 0; // Compute levels = floor(log2(n)) + for (size_t temp = n; temp > 1U; temp >>= 1) + levels++; + if (static_cast(1U) << levels != n) + throw std::domain_error("Length is not a power of 2"); + + // Trigonometric table + vector > expTable(n / 2); + for (size_t i = 0; i < n / 2; i++) + expTable[i] = std::polar(1.0, (inverse ? 2 : -2) * M_PI * i / n); + + // Bit-reversed addressing permutation + for (size_t i = 0; i < n; i++) { + size_t j = reverseBits(i, levels); + if (j > i) + std::swap(vec[i], vec[j]); + } + + // Cooley-Tukey decimation-in-time radix-2 FFT + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + complex temp = vec[j + halfsize] * expTable[k]; + vec[j + halfsize] = vec[j] - temp; + vec[j] += temp; + } + } + if (size == n) // Prevent overflow in 'size *= 2' + break; + } +} + + +void Fft::transformBluestein(vector > &vec, bool inverse) { + // Find a power-of-2 convolution length m such that m >= n * 2 + 1 + size_t n = vec.size(); + size_t m = 1; + while (m / 2 <= n) { + if (m > SIZE_MAX / 2) + throw std::length_error("Vector too large"); + m *= 2; + } + + // Trigonometric table + vector > expTable(n); + for (size_t i = 0; i < n; i++) { + uintmax_t temp = static_cast(i) * i; + temp %= static_cast(n) * 2; + double angle = (inverse ? M_PI : -M_PI) * temp / n; + expTable[i] = std::polar(1.0, angle); + } + + // Temporary vectors and preprocessing + vector > avec(m); + for (size_t i = 0; i < n; i++) + avec[i] = vec[i] * expTable[i]; + vector > bvec(m); + bvec[0] = expTable[0]; + for (size_t i = 1; i < n; i++) + bvec[i] = bvec[m - i] = std::conj(expTable[i]); + + // Convolution + vector > cvec = convolve(std::move(avec), std::move(bvec)); + + // Postprocessing + for (size_t i = 0; i < n; i++) + vec[i] = cvec[i] * expTable[i]; +} + + +vector > Fft::convolve( + vector > xvec, + vector > yvec) { + + size_t n = xvec.size(); + if (n != yvec.size()) + throw std::domain_error("Mismatched lengths"); + transform(xvec, false); + transform(yvec, false); + for (size_t i = 0; i < n; i++) + xvec[i] *= yvec[i]; + transform(xvec, true); + for (size_t i = 0; i < n; i++) // Scaling (because this FFT implementation omits it) + xvec[i] /= static_cast(n); + return xvec; +} + + +static size_t reverseBits(size_t val, int width) { + size_t result = 0; + for (int i = 0; i < width; i++, val >>= 1) + result = (result << 1) | (val & 1U); + return result; +} diff --git a/ref/FftComplex.hpp b/ref/FftComplex.hpp new file mode 100644 index 0000000..24eeabb --- /dev/null +++ b/ref/FftComplex.hpp @@ -0,0 +1,61 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include +#include +using Complex = std::complex; + +namespace Fft { + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. The inverse transform does not perform scaling, so it is not a true inverse. + */ + void transform(std::vector > &vec, bool inverse); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. + */ + void transformRadix2(std::vector > &vec, bool inverse); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. + * Uses Bluestein's chirp z-transform algorithm. + */ + void transformBluestein(std::vector > &vec, bool inverse); + + + /* + * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. + */ + std::vector > convolve( + std::vector > xvec, + std::vector > yvec); + +} diff --git a/ref/FftComplexTest.cpp b/ref/FftComplexTest.cpp new file mode 100644 index 0000000..dd960d1 --- /dev/null +++ b/ref/FftComplexTest.cpp @@ -0,0 +1,177 @@ +/* + * FFT and convolution test (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "FftComplex.hpp" + +using std::complex; +using std::cout; +using std::endl; +using std::vector; + + +// Private function prototypes +static void testFft(int n); +static void testConvolution(int n); +static vector > naiveDft(const vector > &input, bool inverse); +static vector > naiveConvolve(const vector > &xvec, const vector > &yvec); +static double log10RmsErr(const vector > &xvec, const vector > &yvec); +static vector > randomComplexes(int n); + +// Mutable global variable +static double maxLogError = -INFINITY; + +// Random number generation +std::default_random_engine randGen((std::random_device())()); + + +/*---- Main and test functions ----*/ + +int main() { + // Test power-of-2 size FFTs + for (int i = 0; i <= 12; i++) + testFft(1 << i); + + // Test small size FFTs + for (int i = 0; i < 30; i++) + testFft(i); + + // Test diverse size FFTs + for (int i = 0, prev = 0; i <= 100; i++) { + int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); + if (n > prev) { + testFft(n); + prev = n; + } + } + + // Test power-of-2 size convolutions + for (int i = 0; i <= 12; i++) + testConvolution(1 << i); + + // Test diverse size convolutions + for (int i = 0, prev = 0; i <= 100; i++) { + int n = static_cast(std::lround(std::pow(1500.0, i / 100.0))); + if (n > prev) { + testConvolution(n); + prev = n; + } + } + + cout << endl; + cout << "Max log err = " << std::setprecision(3) << maxLogError << endl; + cout << "Test " << (maxLogError < -10 ? "passed" : "failed") << endl; + return EXIT_SUCCESS; +} + + +static void testFft(int n) { + const vector > input = randomComplexes(n); + const vector > expect = naiveDft(input, false); + vector > actual = input; + Fft::transform(actual, false); + double err = log10RmsErr(expect, actual); + + for (auto it = actual.begin(); it != actual.end(); ++it) + *it /= n; + Fft::transform(actual, true); + err = std::max(log10RmsErr(input, actual), err); + cout << "fftsize=" << std::setw(4) << std::setfill(' ') << n << " " + << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) + << err << endl; +} + + +static void testConvolution(int n) { + const vector > input0 = randomComplexes(n); + const vector > input1 = randomComplexes(n); + const vector > expect = naiveConvolve(input0, input1); + const vector > actual = Fft::convolve(std::move(input0), std::move(input1)); + cout << "convsize=" << std::setw(4) << std::setfill(' ') << n << " " + << "logerr=" << std::setw(5) << std::setprecision(3) << std::setiosflags(std::ios::showpoint) + << log10RmsErr(expect, actual) << endl; +} + + +/*---- Naive reference computation functions ----*/ + +static vector > naiveDft(const vector > &input, bool inverse) { + int n = static_cast(input.size()); + vector > output; + double coef = (inverse ? 2 : -2) * M_PI / n; + for (int k = 0; k < n; k++) { // For each output element + complex sum(0); + for (int t = 0; t < n; t++) { // For each input element + double angle = coef * (static_cast(t) * k % n); + sum += input[t] * std::polar(1.0, angle); + } + output.push_back(sum); + } + return output; +} + + +static vector > naiveConvolve( + const vector > &xvec, const vector > &yvec) { + int n = static_cast(xvec.size()); + vector > result(n); // All zeros + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + int k = (i + j) % n; + result[k] += xvec[i] * yvec[j]; + } + } + return result; +} + + +/*---- Utility functions ----*/ + +static double log10RmsErr(const vector > &xvec, const vector > &yvec) { + int n = static_cast(xvec.size()); + double err = std::pow(10, -99 * 2); + for (int i = 0; i < n; i++) + err += std::norm(xvec.at(i) - yvec.at(i)); + err /= n > 0 ? n : 1; + err = std::sqrt(err); // Now this is a root mean square (RMS) error + err = std::log10(err); + maxLogError = std::max(err, maxLogError); + return err; +} + + +static vector > randomComplexes(int n) { + std::uniform_real_distribution valueDist(-1.0, 1.0); + vector > result; + for (int i = 0; i < n; i++) + result.push_back(complex(valueDist(randGen), valueDist(randGen))); + return result; +} diff --git a/ref/FftRealPair.cpp b/ref/FftRealPair.cpp new file mode 100644 index 0000000..9bb414f --- /dev/null +++ b/ref/FftRealPair.cpp @@ -0,0 +1,195 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include "FftRealPair.hpp" + +using std::size_t; +using std::uintmax_t; +using std::vector; + + +// Private function prototypes +static size_t reverseBits(size_t val, int width); + + +void Fft::transform(vector &real, vector &imag) { + size_t n = real.size(); + if (n != imag.size()) + throw std::invalid_argument("Mismatched lengths"); + if (n == 0) + return; + else if ((n & (n - 1)) == 0) // Is power of 2 + transformRadix2(real, imag); + else // More complicated algorithm for arbitrary sizes + transformBluestein(real, imag); +} + + +void Fft::inverseTransform(vector &real, vector &imag) { + transform(imag, real); +} + + +void Fft::transformRadix2(vector &real, vector &imag) { + // Length variables + size_t n = real.size(); + if (n != imag.size()) + throw std::invalid_argument("Mismatched lengths"); + int levels = 0; // Compute levels = floor(log2(n)) + for (size_t temp = n; temp > 1U; temp >>= 1) + levels++; + if (static_cast(1U) << levels != n) + throw std::domain_error("Length is not a power of 2"); + + // Trigonometric tables + vector cosTable(n / 2); + vector sinTable(n / 2); + for (size_t i = 0; i < n / 2; i++) { + cosTable[i] = std::cos(2 * M_PI * i / n); + sinTable[i] = std::sin(2 * M_PI * i / n); + } + + // Bit-reversed addressing permutation + for (size_t i = 0; i < n; i++) { + size_t j = reverseBits(i, levels); + if (j > i) { + std::swap(real[i], real[j]); + std::swap(imag[i], imag[j]); + } + } + + // Cooley-Tukey decimation-in-time radix-2 FFT + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + size_t l = j + halfsize; + double tpre = real[l] * cosTable[k] + imag[l] * sinTable[k]; + double tpim = -real[l] * sinTable[k] + imag[l] * cosTable[k]; + real[l] = real[j] - tpre; + imag[l] = imag[j] - tpim; + real[j] += tpre; + imag[j] += tpim; + } + } + if (size == n) // Prevent overflow in 'size *= 2' + break; + } +} + + +void Fft::transformBluestein(vector &real, vector &imag) { + // Find a power-of-2 convolution length m such that m >= n * 2 + 1 + size_t n = real.size(); + if (n != imag.size()) + throw std::invalid_argument("Mismatched lengths"); + size_t m = 1; + while (m / 2 <= n) { + if (m > SIZE_MAX / 2) + throw std::length_error("Vector too large"); + m *= 2; + } + + // Trigonometric tables + vector cosTable(n), sinTable(n); + for (size_t i = 0; i < n; i++) { + uintmax_t temp = static_cast(i) * i; + temp %= static_cast(n) * 2; + double angle = M_PI * temp / n; + cosTable[i] = std::cos(angle); + sinTable[i] = std::sin(angle); + } + + // Temporary vectors and preprocessing + vector areal(m), aimag(m); + for (size_t i = 0; i < n; i++) { + areal[i] = real[i] * cosTable[i] + imag[i] * sinTable[i]; + aimag[i] = -real[i] * sinTable[i] + imag[i] * cosTable[i]; + } + vector breal(m), bimag(m); + breal[0] = cosTable[0]; + bimag[0] = sinTable[0]; + for (size_t i = 1; i < n; i++) { + breal[i] = breal[m - i] = cosTable[i]; + bimag[i] = bimag[m - i] = sinTable[i]; + } + + // Convolution + std::pair, vector > cvec = convolve(areal, aimag, breal, bimag); + vector creal = std::move(cvec.first ); + vector cimag = std::move(cvec.second); + + // Postprocessing + for (size_t i = 0; i < n; i++) { + real[i] = creal[i] * cosTable[i] + cimag[i] * sinTable[i]; + imag[i] = -creal[i] * sinTable[i] + cimag[i] * cosTable[i]; + } +} + + +vector Fft::convolve(vector xvec, vector yvec) { + size_t n = xvec.size(); + if (n != yvec.size()) + throw std::invalid_argument("Mismatched lengths"); + return convolve(std::move(xvec), vector(n), std::move(yvec), vector(n)).first; +} + + +std::pair, vector > Fft::convolve( + vector xreal, vector ximag, + vector yreal, vector yimag) { + + size_t n = xreal.size(); + if (n != ximag.size() || n != yreal.size() || n != yimag.size()) + throw std::invalid_argument("Mismatched lengths"); + + transform(xreal, ximag); + transform(yreal, yimag); + + for (size_t i = 0; i < n; i++) { + double temp = xreal[i] * yreal[i] - ximag[i] * yimag[i]; + ximag[i] = ximag[i] * yreal[i] + xreal[i] * yimag[i]; + xreal[i] = temp; + } + inverseTransform(xreal, ximag); + + for (size_t i = 0; i < n; i++) { // Scaling (because this FFT implementation omits it) + xreal[i] /= n; + ximag[i] /= n; + } + return std::pair, vector >(std::move(xreal), std::move(ximag)); +} + + +static size_t reverseBits(size_t val, int width) { + size_t result = 0; + for (int i = 0; i < width; i++, val >>= 1) + result = (result << 1) | (val & 1U); + return result; +} + diff --git a/ref/FftRealPair.hpp b/ref/FftRealPair.hpp new file mode 100644 index 0000000..95db10a --- /dev/null +++ b/ref/FftRealPair.hpp @@ -0,0 +1,75 @@ +/* + * Free FFT and convolution (C++) + * + * Copyright (c) 2021 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include +#include + + +namespace Fft { + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. + */ + void transform(std::vector &real, std::vector &imag); + + + /* + * Computes the inverse discrete Fourier transform (IDFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. This transform does not perform scaling, so the inverse is not a true inverse. + */ + void inverseTransform(std::vector &real, std::vector &imag); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. + */ + void transformRadix2(std::vector &real, std::vector &imag); + + + /* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. + * Uses Bluestein's chirp z-transform algorithm. + */ + void transformBluestein(std::vector &real, std::vector &imag); + + + /* + * Computes the circular convolution of the given real vectors. Each vector's length must be the same. + */ + std::vector convolve(std::vector xvec, std::vector yvec); + + + /* + * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. + */ + std::pair, std::vector > convolve( + std::vector xreal, std::vector ximag, + std::vector yreal, std::vector yimag); + +} + diff --git a/ref/README.org b/ref/README.org new file mode 100644 index 0000000..74fd6b9 --- /dev/null +++ b/ref/README.org @@ -0,0 +1,5 @@ +* Reference Implementation + +This reference implementation is provided by Nayuki. I greatly appreciate their contribution. + +For more details, please visit Nayuki's [[Website][https://www.nayuki.io/]] diff --git a/src/complex_array.cpp b/src/complex_array.cpp new file mode 100644 index 0000000..0bdb5ac --- /dev/null +++ b/src/complex_array.cpp @@ -0,0 +1,181 @@ +#ifndef COMPLEX_ARRAY_HPP +#define COMPLEX_ARRAY_HPP +#include +#include +#include + +using Complex = std::complex; + +class ComplexProxy { + double *real_ptr; + double *imag_ptr; + + public: + double &real() { return *real_ptr; } + double &imag() { return *imag_ptr; } + + ComplexProxy(double *real_ptr, double *imag_ptr) + : real_ptr(real_ptr), imag_ptr(imag_ptr) {} + + ComplexProxy(ComplexProxy &&rhs) noexcept + : real_ptr(rhs.real_ptr), imag_ptr(rhs.imag_ptr) { + rhs.real_ptr = nullptr; + rhs.imag_ptr = nullptr; + } + + operator Complex() const { return Complex(*real_ptr, *imag_ptr); } + + ComplexProxy &operator=(const Complex &c) { + *real_ptr = c.real(); + *imag_ptr = c.imag(); + return *this; + } +}; + +class ComplexArray { + private: + double *real; + double *imag; + size_t array_size; + + public: + ComplexArray(size_t n) : array_size(n) { + if (n == 0) { + real = nullptr; + imag = nullptr; + } else { + real = new double[n]; + std::fill(real, real + n, 0.0); + imag = new double[n]; + std::fill(imag, imag + n, 0.0); + } + } + + ~ComplexArray() { + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + } + + ComplexArray(const ComplexArray &other) : array_size(other.array_size) { + real = new double[array_size]; + imag = new double[array_size]; + std::copy(other.real, other.real + array_size, real); + std::copy(other.imag, other.imag + array_size, imag); + } + + ComplexArray(ComplexArray &&rhs) noexcept + : real(rhs.real), imag(rhs.imag), array_size(rhs.array_size) { + rhs.real = nullptr; + rhs.imag = nullptr; + rhs.array_size = 0; + } + + ComplexArray &operator=(const ComplexArray &other) { + if (this == &other) { + return *this; + } + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + + array_size = other.array_size; + real = new double[array_size]; + imag = new double[array_size]; + + for (size_t i = 0; i < array_size; i++) { + real[i] = other.real[i]; + imag[i] = other.imag[i]; + } + + return *this; + } + + void multiply(const ComplexArray &other) { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + + for (size_t i = 0; i < array_size; i++) { + double temp_real = + real[i] * other.real[i] - imag[i] * other.imag[i]; + double temp_imag = + real[i] * other.imag[i] + imag[i] * other.real[i]; + real[i] = temp_real; + imag[i] = temp_imag; + } + } + + // void partial_multiply(const ComplexArray &other, size_t start, size_t length) { + + // size_t end = start + length; + // if (end > array_size || end > other.array_size) { + // throw std::invalid_argument("Array sizes do not match"); + // } + // for (size_t i = 0; i < length; i++) { + // double temp_real = + // real[start + i] * other.real[i] - imag[start + i] * other.imag[i]; + // double temp_imag = + // real[start + i] * other.imag[i] + imag[start + i] * other.real[i]; + // real[start + i] = temp_real; + // imag[start + i] = temp_imag; + // } + // } + + void scale(double scalar) { + for (size_t i = 0; i < array_size; i++) { + real[i] *= scalar; + imag[i] *= scalar; + } + } + + ComplexArray operator*(const ComplexArray &other) const { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + ComplexArray result = *this; // Copy constructor + result.multiply(other); + return result; + } + + ComplexArray operator+(const ComplexArray &other) const { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + + ComplexArray result(array_size); + for (size_t i = 0; i < array_size; i++) { + result.real[i] = real[i] + other.real[i]; + result.imag[i] = imag[i] + other.imag[i]; + } + return result; + } + + Complex operator[](size_t index) const { + return Complex(real[index], imag[index]); + } + + ComplexProxy operator[](size_t index) { + return ComplexProxy(real + index, imag + index); + } + + + + size_t size() const { return array_size; } + + double *get_real_ptr() { return real; } + + const double *get_real_ptr() const { return real; } + + double *get_imag_ptr() { return imag; } + + const double *get_imag_ptr() const { return imag; } +}; + +#endif diff --git a/src/complex_array.hpp b/src/complex_array.hpp new file mode 100644 index 0000000..911d8fa --- /dev/null +++ b/src/complex_array.hpp @@ -0,0 +1,181 @@ +#ifndef COMPLEX_ARRAY_HPP +#define COMPLEX_ARRAY_HPP +#include +#include +#include + +using Complex = std::complex; + +class ComplexProxy { + double *real_ptr; + double *imag_ptr; + + public: + double &real() { return *real_ptr; } + double &imag() { return *imag_ptr; } + + ComplexProxy(double *real_ptr, double *imag_ptr) + : real_ptr(real_ptr), imag_ptr(imag_ptr) {} + + ComplexProxy(ComplexProxy &&rhs) noexcept + : real_ptr(rhs.real_ptr), imag_ptr(rhs.imag_ptr) { + rhs.real_ptr = nullptr; + rhs.imag_ptr = nullptr; + } + + operator Complex() const { return Complex(*real_ptr, *imag_ptr); } + + ComplexProxy &operator=(const Complex &c) { + *real_ptr = c.real(); + *imag_ptr = c.imag(); + return *this; + } +}; + +class ComplexArray { + private: + double *real; + double *imag; + size_t array_size; + + public: + ComplexArray(size_t n) : array_size(n) { + if (n == 0) { + real = nullptr; + imag = nullptr; + } else { + real = new double[n]; + std::fill(real, real + n, 0.0); + imag = new double[n]; + std::fill(imag, imag + n, 0.0); + } + } + + ~ComplexArray() { + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + } + + ComplexArray(const ComplexArray &other) : array_size(other.array_size) { + real = new double[array_size]; + imag = new double[array_size]; + std::copy(other.real, other.real + array_size, real); + std::copy(other.imag, other.imag + array_size, imag); + } + + ComplexArray(ComplexArray &&rhs) noexcept + : real(rhs.real), imag(rhs.imag), array_size(rhs.array_size) { + rhs.real = nullptr; + rhs.imag = nullptr; + rhs.array_size = 0; + } + + ComplexArray &operator=(const ComplexArray &other) { + if (this == &other) { + return *this; + } + if (real != nullptr) { + delete[] real; + } + if (imag != nullptr) { + delete[] imag; + } + + array_size = other.array_size; + real = new double[array_size]; + imag = new double[array_size]; + + for (size_t i = 0; i < array_size; i++) { + real[i] = other.real[i]; + imag[i] = other.imag[i]; + } + + return *this; + } + + void multiply(const ComplexArray &other, bool simd = false) { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + + for (size_t i = 0; i < array_size; i++) { + double temp_real = + real[i] * other.real[i] - imag[i] * other.imag[i]; + double temp_imag = + real[i] * other.imag[i] + imag[i] * other.real[i]; + real[i] = temp_real; + imag[i] = temp_imag; + } + } + + // void partial_multiply(const ComplexArray &other, size_t start, size_t length) { + + // size_t end = start + length; + // if (end > array_size || end > other.array_size) { + // throw std::invalid_argument("Array sizes do not match"); + // } + // for (size_t i = 0; i < length; i++) { + // double temp_real = + // real[start + i] * other.real[i] - imag[start + i] * other.imag[i]; + // double temp_imag = + // real[start + i] * other.imag[i] + imag[start + i] * other.real[i]; + // real[start + i] = temp_real; + // imag[start + i] = temp_imag; + // } + // } + + void scale(double scalar, bool simd = false) { + for (size_t i = 0; i < array_size; i++) { + real[i] *= scalar; + imag[i] *= scalar; + } + } + + ComplexArray operator*(const ComplexArray &other) const { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + ComplexArray result = *this; // Copy constructor + result.multiply(other); + return result; + } + + ComplexArray operator+(const ComplexArray &other) const { + if (array_size != other.array_size) { + throw std::invalid_argument("Array sizes do not match"); + } + + ComplexArray result(array_size); + for (size_t i = 0; i < array_size; i++) { + result.real[i] = real[i] + other.real[i]; + result.imag[i] = imag[i] + other.imag[i]; + } + return result; + } + + Complex operator[](size_t index) const { + return Complex(real[index], imag[index]); + } + + ComplexProxy operator[](size_t index) { + return ComplexProxy(real + index, imag + index); + } + + + + size_t size() const { return array_size; } + + double *get_real_ptr() { return real; } + + const double *get_real_ptr() const { return real; } + + double *get_imag_ptr() { return imag; } + + const double *get_imag_ptr() const { return imag; } +}; + +#endif diff --git a/src/dct_naive.hpp b/src/dct_naive.hpp new file mode 100644 index 0000000..07e19fe --- /dev/null +++ b/src/dct_naive.hpp @@ -0,0 +1,70 @@ +#pragma once +#include +#include +#include + +#include + + +namespace py = pybind11; +#define _USE_MATH_DEFINES + +constexpr double SQRT2_1 = 0.7071067811865475; +constexpr double ONE_SUB_SQRT2_1 = 1 - 0.7071067811865475; + +static inline double dct_naive_single(double *buf, size_t buf_size, size_t i, bool simd) { + double sum = 0.0f; + for (size_t j = 0; j < buf_size; j++) { + sum += buf[j] * cos(M_PI / buf_size * i * (j + 0.5)); + } + return sum; +} + +static inline double idct_naive_single(double *buf, size_t buf_size, size_t i) { + double sum = 0.0; + for (size_t j = 1; j < buf_size; j++) { + sum += buf[j] * cos(M_PI * j * (2 * i + 1.0) / (2.0 * buf_size)); + } + return sum; +} + +void dct_naive_inner(double *buf, size_t buf_size, double *out, bool simd) { + for (size_t i = 0; i < buf_size; i++) { + out[i] = 2.0 * dct_naive_single(buf, buf_size, i, simd); + } +} + +void idct_naive_inner(double *buf, size_t buf_size, double *out, bool simd) { + for (size_t i = 0; i < buf_size; i++) { + out[i] = (buf[0] + (2 * idct_naive_single(buf, buf_size, i))) / (2.0 * buf_size); + } +} + +py::array dct_naive( + py::array_t in_arr, bool simd) { + py::buffer_info buf_info = in_arr.request(); + double *ptr = static_cast(buf_info.ptr); + size_t buf_size = buf_info.size; + + py::array_t out_arr(buf_info.shape); + + double *out_buf = static_cast(out_arr.request(true).ptr); + dct_naive_inner(ptr, buf_size, out_buf, simd); + + return out_arr; +} + +py::array idct_naive( + py::array_t in_arr, bool simd) { + py::buffer_info buf_info = in_arr.request(); + double *ptr = static_cast(buf_info.ptr); + size_t buf_size = buf_info.size; + + py::array_t out_arr(buf_info.shape); + + double *out_buf = static_cast(out_arr.request(true).ptr); + idct_naive_inner(ptr, buf_size, out_buf, simd); + + return out_arr; +} + diff --git a/src/fdct.cpp b/src/fdct.cpp deleted file mode 100644 index 5163348..0000000 --- a/src/fdct.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include -#include -#include -#include -#include - -namespace py = pybind11; -#define _USE_MATH_DEFINES -#include -#include - -using data_t = double; - -constexpr data_t SQRT2_1 = 0.7071067811865475; -constexpr data_t ONE_SUB_SQRT2_1 = 1 - 0.7071067811865475; - -static inline data_t e(int n) { - // if (n == 0) - // return SQRT2_1; - return 1.0f; -} - - -static inline data_t dct_naive_single(data_t *buf, size_t buf_size, size_t i) { - data_t sum = 0.0f; - for (size_t j = 0; j < buf_size; j++) { - // sum += buf[j] * cos((2 * j + 1) * i * M_PI / (2 * buf_size)); - sum += buf[j] * cos(M_PI / buf_size * i * (j + 0.5)); - } - return sum; -} - -static inline data_t idct_naive_single(data_t *buf, size_t buf_size, size_t i) { - data_t sum = 0.0; - for (size_t j = 0; j < buf_size; j++) { - sum += buf[j] * cos(M_PI * i * (2 * j + 1.0) / (2 * buf_size)); - } - return sum; -} - -void dct_naive(data_t *buf, size_t buf_size, data_t *out) { - - for (int i = 0; i < buf_size; i++) { - out[i] = 2.0 * e(i) * dct_naive_single(buf, buf_size, i); - } -} - -void idct_naive(data_t *buf, size_t buf_size, data_t *out) { - - for (int i = 0; i < buf_size; i++) { - out[i] = (buf[0] + 2 * idct_naive_single(buf, buf_size, i)) / buf_size; - } -} - -double err(data_t* buf1, data_t* buf2, size_t buf_size) { - double sum = 0.0; - for(int i = 0; i < buf_size; i++) - sum += abs(buf1[i] - buf2[i]); - return sum; -} - -py::array fct(py::array_t in_arr) { - py::buffer_info buf_info = in_arr.request(); - double *ptr = static_cast(buf_info.ptr); - size_t buf_size = buf_info.size; - - std::cout << "fct: Get array with " << buf_info.size << std::endl; - std::cout << "buf: " << ptr[0] << " " << ptr[1] << std::endl; - - py::array_t out_arr(buf_info.shape); - - double *out_buf = static_cast(out_arr.request(true).ptr); - dct_naive(ptr, buf_size, out_buf); - - return out_arr; -} - -py::array ifct(py::array_t in_arr) { - py::buffer_info buf_info = in_arr.request(); - double *ptr = static_cast(buf_info.ptr); - size_t buf_size = buf_info.size; - - std::cout << "ifct: Get array with " << buf_info.size << std::endl; - std::cout << "buf: " << ptr[0] << " " << ptr[1] << std::endl; - - py::array_t out_arr(buf_info.shape); - - double *out_buf = static_cast(out_arr.request(true).ptr); - idct_naive(ptr, buf_size, out_buf); - - return out_arr; - -} - -PYBIND11_MODULE(fdct, m) { - m.def("fct", &fct, "Compute fast discrete cosine transformation."); - m.def("ifct", &ifct, "Compute inverse fast discrete cosine transformation."); -} diff --git a/src/fdct.hpp b/src/fdct.hpp new file mode 100644 index 0000000..476d7fc --- /dev/null +++ b/src/fdct.hpp @@ -0,0 +1,111 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// #include "FftComplex.hpp" +#include "complex_array.hpp" +#include "fdct_inner.hpp" +#include "fft.hpp" + +namespace py = pybind11; +#define _USE_MATH_DEFINES + +// Wrapper function to accept and return NumPy arrays +py::array fdct( + py::array_t in_arr, bool simd) { + // Request a buffer from the input array + py::buffer_info buf = in_arr.request(); + // Check if the input array is 1-dimensional or 2-dimensional + + if (buf.ndim > 2) { + throw std::runtime_error( + "Input array must be 1-dimensional or 2-dimensional."); + } + + // Convert input NumPy array to std::vector + size_t N = buf.shape[0]; + ComplexArray input_data(N); + std::copy(static_cast(buf.ptr), + static_cast(buf.ptr) + N, input_data.get_real_ptr()); + std::fill(input_data.get_imag_ptr(), input_data.get_imag_ptr() + N, 0.0); + // Perform FCT + double *result = new double[N]; + fct_inner(input_data, result, simd); + + // Convert result back to NumPy array + return py::array_t(N, result); +} + +std::vector idct_inner(const std::vector &c) { + size_t N = c.size(); + std::vector x(N); + // auto u = reorderOutput(c); + ComplexArray u(N); + // Allocate FFTW input/output arrays + fftw_complex *fft_in = + (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N); + fftw_complex *fft_out = + (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N); + + // Initialize FFTW plan + fftw_plan plan = + fftw_plan_dft_1d(N, fft_in, fft_out, FFTW_BACKWARD, FFTW_ESTIMATE); + + // Fill FFTW input array + for (size_t n = 0; n < N; ++n) { + fft_in[n][0] = c[n]; // Real part + fft_in[n][1] = 0.0; // Imaginary part + } + + // Execute IFFT + fftw_execute(plan); + + // Compute the IDCT using the IFFT output + for (size_t k = 0; k < N; ++k) { + double real = fft_out[k][0]; + double imag = fft_out[k][1]; + x[k] = (real * cos(M_PI * k / (2.0 * N)) + + imag * sin(M_PI * k / (2.0 * N))) / + N; + } + + // Free FFTW resources + fftw_destroy_plan(plan); + fftw_free(fft_in); + fftw_free(fft_out); + + return x; +} + +// Wrapper function to accept and return NumPy arrays +py::array ifdct( + py::array_t in_arr, bool simd) { + // Request a buffer from the input array + py::buffer_info buf = in_arr.request(); + if (buf.ndim != 1) { + throw std::runtime_error("Input array must be 1-dimensional."); + } + + // Convert input NumPy array to std::vector + size_t N = buf.shape[0]; + // std::vector input_data(static_cast(buf.ptr), + // static_cast(buf.ptr) + N); + ComplexArray input_data(N); + std::copy(static_cast(buf.ptr), + static_cast(buf.ptr) + N, input_data.get_real_ptr()); + + // Perform IDCT + double *result = new double[N]; + ifdct_inner(input_data, result, simd); + // Convert result back to NumPy array + return py::array_t(N, result); +} diff --git a/src/fdct_inner.cpp b/src/fdct_inner.cpp new file mode 100644 index 0000000..4b717dd --- /dev/null +++ b/src/fdct_inner.cpp @@ -0,0 +1,106 @@ + +#include + +#include +#include +#include +#include +#include + +#include "complex_array.hpp" +#include "fdct_inner.hpp" +#include "fft.hpp" + +// Function to compute the Fast Cosine Transform + +ComplexArray reorderInput(const ComplexArray &x) { + size_t N = x.size(); + ComplexArray u(N); + const double *x_real = x.get_real_ptr(); + const double *x_imag = x.get_imag_ptr(); + double *u_real = u.get_real_ptr(); + double *u_imag = u.get_imag_ptr(); + + for (size_t n = 0; n < N / 2; ++n) { + u_real[n] = x_real[2 * n]; + u_imag[n] = x_imag[2 * n]; + u_real[N - n - 1] = x_real[2 * n + 1]; + u_imag[N - n - 1] = x_imag[2 * n + 1]; + } + if (N % 2 != 0) { + u_real[N / 2] = x_real[N - 1]; + u_imag[N / 2] = x_imag[N - 1]; + } + return u; +} + +ComplexArray reorderOutput(const ComplexArray &v) { + size_t N = v.size(); + ComplexArray x(N); + + // Access the real and imaginary parts directly + const double *v_real = v.get_real_ptr(); + const double *v_imag = v.get_imag_ptr(); + double *x_real = x.get_real_ptr(); + double *x_imag = x.get_imag_ptr(); + + // Reordering according to the formula + for (size_t n = 0; n < N / 2; ++n) { + x_real[2 * n] = v_real[n]; + x_imag[2 * n] = v_imag[n]; + x_real[2 * n + 1] = v_real[N - n - 1]; + x_imag[2 * n + 1] = v_imag[N - n - 1]; + } + if (N % 2 != 0) { + x_real[N - 1] = v_real[N / 2]; + x_imag[N - 1] = v_imag[N / 2]; + } + + return x; +} + +void idctPreprocess(ComplexArray &x) { + size_t N = x.size(); + auto real = x.get_real_ptr(); + auto imag = x.get_imag_ptr(); + + real[0] /= 2; + for (size_t n = 0; n < N; ++n) { + double tmp = n * M_PI / (2 * N); + double tmp_real = real[n]; + real[n] = tmp_real * cos(tmp); + imag[n] = -tmp_real * sin(tmp); + } +} + + +void fct_inner(const ComplexArray &x, double *result, bool simd) { + size_t N = x.size(); + auto fft_in = reorderInput(x); + + // Perform FFT + fft::transform(fft_in, false); + + // Compute the DCT using the FFT output + double *x_real = fft_in.get_real_ptr(); + double *x_imag = fft_in.get_imag_ptr(); + for (size_t k = 0; k < N; ++k) { + result[k] = + (x_real[k] * cos(M_PI * k / (2 * N)) - x_imag[k] * sin(-M_PI * k / (2 * N))) * + 2; + } +} + +void ifdct_inner(const ComplexArray &c, double *result, bool simd) { + size_t N = c.size(); + ComplexArray fft_in = c; + idctPreprocess(fft_in); + + // Perform FFT + fft::transform(fft_in, false); + + auto x_ = reorderOutput(fft_in); + for (size_t n = 0; n < N; ++n) { + result[n] = x_.get_real_ptr()[n] / N; + } +} diff --git a/src/fdct_inner.hpp b/src/fdct_inner.hpp new file mode 100644 index 0000000..38dd6a3 --- /dev/null +++ b/src/fdct_inner.hpp @@ -0,0 +1,17 @@ + +#pragma once +#include +#include +#include +#include +#include + +#include "complex_array.hpp" + +// Function to compute the Fast Cosine Transform + +ComplexArray reorderInput(const ComplexArray &x); + +void fct_inner(const ComplexArray &x, double *result, bool simd); + +void ifdct_inner(const ComplexArray &c, double *result, bool simd); diff --git a/src/fdct_module.cpp b/src/fdct_module.cpp new file mode 100644 index 0000000..38c5d1e --- /dev/null +++ b/src/fdct_module.cpp @@ -0,0 +1,16 @@ +#include +#include +#include +#include "fdct.hpp" +#include "dct_naive.hpp" + +namespace py = pybind11; +#define _USE_MATH_DEFINES +#include + +PYBIND11_MODULE(fdct, m) { + m.def("dct_naive", &dct_naive, "Compute fast discrete cosine transformation."); + m.def("idct_naive", &idct_naive, "Compute inverse fast discrete cosine transformation."); + m.def("fdct", &fdct, "Compute fast discrete cosine transformation."); + m.def("ifdct", &ifdct, "Compute inverse fast discrete cosine transformation."); +} diff --git a/src/fft.cpp b/src/fft.cpp new file mode 100644 index 0000000..0e8bfb0 --- /dev/null +++ b/src/fft.cpp @@ -0,0 +1,156 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "complex_array.hpp" +#include "fft.hpp" + +static size_t reverseBits(size_t val, int width); + +static ComplexArray mkExpTable(size_t n, bool inverse) { + ComplexArray expTable(n); + double *real = expTable.get_real_ptr(); + double *imag = expTable.get_imag_ptr(); + for (size_t i = 0; i < n; i++) { + double angle = (inverse ? 2 : -2) * M_PI * i / n; + real[i] = std::cos(angle); + imag[i] = std::sin(angle); + } + return expTable; +} + +void fft::transformRadix2(ComplexArray &x, bool inverse, bool simd) { + int levels = 0; + size_t n = x.size(); + double *real = x.get_real_ptr(); + double *imag = x.get_imag_ptr(); + + for (size_t temp = n; temp > 1U; temp >>= 1) + levels++; + if (static_cast(1U) << levels != n) + throw std::domain_error("Length is not a power of 2"); + + ComplexArray expTable = mkExpTable(n, inverse); + + + for (size_t i = 0; i < n; i++) { + size_t j = reverseBits(i, levels); + if (j > i) { + std::swap(real[i], real[j]); + std::swap(imag[i], imag[j]); + } + } + + double *real_expTable = expTable.get_real_ptr(); + double *imag_expTable = expTable.get_imag_ptr(); + + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + double temp_real = real[j + halfsize] * real_expTable[k] - + imag[j + halfsize] * imag_expTable[k]; + double temp_imag = real[j + halfsize] * imag_expTable[k] + + imag[j + halfsize] * real_expTable[k]; + real[j + halfsize] = real[j] - temp_real; + imag[j + halfsize] = imag[j] - temp_imag; + real[j] += temp_real; + imag[j] += temp_imag; + } + } + if (size == n) + break; + } +} + +void fft::transformBluestein(ComplexArray &x, bool inverse, bool simd) { + size_t n = x.size(); + double *real = x.get_real_ptr(); + double *imag = x.get_imag_ptr(); + size_t m = 1; + while (m / 2 <= n) { + if (m > SIZE_MAX / 2) + throw std::length_error("Vector too large"); + m *= 2; + } + + // Trigonometric table + ComplexArray expTable(n); + double *real_expTable = expTable.get_real_ptr(); + double *imag_expTable = expTable.get_imag_ptr(); + for (size_t i = 0; i < n; i++) { + uintmax_t temp = static_cast(i) * i; + temp %= static_cast(n) * 2; + double angle = (inverse ? M_PI : -M_PI) * temp / n; + expTable[i] = std::polar(1.0, angle); + } + ComplexArray a(m), b(m); + double *real_avec = a.get_real_ptr(); + double *imag_avec = a.get_imag_ptr(); + + + // TODO: Use SIMD + for (size_t i = 0; i < n; i++) { + real_avec[i] = real[i] * real_expTable[i] - imag[i] * imag_expTable[i]; + imag_avec[i] = real[i] * imag_expTable[i] + imag[i] * real_expTable[i]; + // a[i] = Complex(x[i]) * Complex(expTable[i]); + } + + double *real_bvec = b.get_real_ptr(); + double *imag_bvec = b.get_imag_ptr(); + real_bvec[0] = real_expTable[0]; + imag_bvec[0] = imag_expTable[0]; + + for (size_t i = 1; i < n; ++i) { + real_bvec[i] = real_bvec[m - i] = real_expTable[i]; + imag_bvec[i] = imag_bvec[m - i] = -imag_expTable[i]; + } + + convolove(a, b, simd); + // x = c * expTable; + for (size_t i = 0; i < n; i++) { + real[i] = a.get_real_ptr()[i] * real_expTable[i] - a.get_imag_ptr()[i] * imag_expTable[i]; + imag[i] = a.get_real_ptr()[i] * imag_expTable[i] + a.get_imag_ptr()[i] * real_expTable[i]; + } +} + + +// compute the convolution of two complex arrays and set the result to x +void fft::convolove(ComplexArray &_x, const ComplexArray &_y, bool simd) { + size_t n = _x.size(); + if (n != _y.size()) + throw std::domain_error("Mismatched lengths"); + + ComplexArray& x = _x; + ComplexArray y = _y; + + transform(x, false, simd); + transform(y, false, simd); + + x.multiply(y, simd); + transform(x, true, simd); + x.scale(1.0 / n, simd); +} + +void fft::transform(ComplexArray &x, bool inverse, bool simd) { + size_t n = x.size(); + if (n == 0) + return; + else if ((n & (n - 1)) == 0) // Is power of 2 + transformRadix2(x, inverse, simd); + else // More complicated algorithm for arbitrary sizes + transformBluestein(x, inverse, simd); +} + +static size_t reverseBits(size_t val, int width) { + size_t result = 0; + for (int i = 0; i < width; i++, val >>= 1) + result = (result << 1) | (val & 1U); + return result; +} diff --git a/src/fft.hpp b/src/fft.hpp new file mode 100644 index 0000000..868eee7 --- /dev/null +++ b/src/fft.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include "complex_array.hpp" + +namespace fft { +void transformRadix2(ComplexArray &x, bool inverse, bool simd = false); + +void transformBluestein(ComplexArray &x, bool inverse, bool simd = false); + +void transform(ComplexArray &x, bool inverse, bool simd = false); + +void convolove(ComplexArray &x, const ComplexArray &y, bool simd = false); +} // namespace fft diff --git a/test_dct.py b/test_dct.py index a8707cc..092c6f7 100644 --- a/test_dct.py +++ b/test_dct.py @@ -1,25 +1,28 @@ +import pytest import numpy as np import fdct -import scipy +import scipy.fft as fft +import time -def test_dct(): - inp = np.random.rand(1024) - gt = scipy.fft.dct(inp) - ans = fdct.fct(inp) - s = sum((gt - ans) ** 2) / 1024 - assert s < 0.1 +# Parameters +test_size_seq = [10, 64, 127, 256, 1023, 2048, 4097, 8192, 8193] +tolerance = 1e-6 # Tolerance for individual element comparison +simd = [False, True] # SIMD flag -def test_idct(): - inp = np.random.rand(1024) - gt = scipy.fft.idct(inp) - ans = fdct.ifct(inp) - print(gt) - print(ans) - s = sum((gt - ans) ** 2) / 1024 - assert s < 0.1 - +# List of test functions and their corresponding references +test_cases = [ + ("dct_naive", fdct.dct_naive, fft.dct), + ("idct_naive", fdct.idct_naive, fft.idct), + ("fdct", fdct.fdct, fft.dct), + ("ifdct", fdct.ifdct, fft.idct), +] + +@pytest.mark.parametrize("size", test_size_seq) +@pytest.mark.parametrize("name, func, ref_func", test_cases) +@pytest.mark.parametrize("simd", simd) +def test_transform(name, func, ref_func, size, simd): + inp = np.random.rand(size) + gt = ref_func(inp) + ans = func(inp, simd) + np.testing.assert_allclose(ans, gt, atol=tolerance, rtol=0) - -if __name__ == "__main__": - test_dct() - test_idct()