From 0e215d4a1361caa7589cef4d179b38ba50105afb Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 28 Jan 2026 09:55:45 -0700 Subject: [PATCH] Implement LWG4302; fix #320 * Remove vector_sum_of_squares from the public interface. This implements the proposed fix for LWG4302, that is now in the Working Draft for C++26. * Fix Issue #320. If the "init" argument of vector_two_norm is nonzero, return the square root of the ((square of init) and the (sum of squares of the absolute values of the elements of x)). --- examples/kokkos-based/CMakeLists.txt | 1 - .../vector_sum_of_squares_kokkos.cpp | 52 -------- .../__p1673_bits/blas1_vector_norm2.hpp | 31 +++-- .../blas1_vector_sum_of_squares.hpp | 23 ++-- tests/kokkos-based/CMakeLists.txt | 3 - .../vector_sum_of_squares_kokkos.cpp | 111 ------------------ tests/native/norm2.cpp | 14 +-- .../blas1_vector_sum_of_squares_kk.hpp | 2 +- 8 files changed, 42 insertions(+), 195 deletions(-) delete mode 100644 examples/kokkos-based/vector_sum_of_squares_kokkos.cpp delete mode 100644 tests/kokkos-based/vector_sum_of_squares_kokkos.cpp diff --git a/examples/kokkos-based/CMakeLists.txt b/examples/kokkos-based/CMakeLists.txt index d88f634b..fc46076f 100644 --- a/examples/kokkos-based/CMakeLists.txt +++ b/examples/kokkos-based/CMakeLists.txt @@ -5,6 +5,5 @@ linalg_add_example(dotc_kokkos) linalg_add_example(idx_abs_max_kokkos) linalg_add_example(vector_norm2_kokkos) linalg_add_example(vector_abs_sum_kokkos) -linalg_add_example(vector_sum_of_squares_kokkos) linalg_add_example(scale_kokkos) linalg_add_example(matrix_vector_product_kokkos) diff --git a/examples/kokkos-based/vector_sum_of_squares_kokkos.cpp b/examples/kokkos-based/vector_sum_of_squares_kokkos.cpp deleted file mode 100644 index 1820b89a..00000000 --- a/examples/kokkos-based/vector_sum_of_squares_kokkos.cpp +++ /dev/null @@ -1,52 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// ************************************************************************ -//@HEADER - -#include -#include - -int main(int argc, char* argv[]) -{ - std::cout << "vector_sum_of_squares example: calling kokkos-kernels" << std::endl; - - std::size_t N = 20; - Kokkos::initialize(argc,argv); - { - using value_type = double; - Kokkos::View x_view("x",N); - value_type* x_ptr = x_view.data(); - - using dyn_1d_ext_type = std::experimental::extents; - using mdspan_type = std::experimental::mdspan; - mdspan_type x(x_ptr,N); - for(std::size_t i=0; i init_value{1., 1.}; - - const auto res = stdla::vector_sum_of_squares(x, init_value); - std::cout << "Default result: " << res.scaling_factor << " " << res.scaled_sum_of_squares << '\n'; - - // FRIZZI: Oct 27: kk currently not impl yet, just placeholder to ensure hook forwards correctly - const auto res_kk = stdla::vector_sum_of_squares(KokkosKernelsSTD::kokkos_exec<>(), x, init_value); - (void)res_kk; - //std::cout << "Kokkos result: " << res_kk.scaling_factor << " " << res_kk.scaled_sum_of_squares << '\n'; - - } - Kokkos::finalize(); -} diff --git a/include/experimental/__p1673_bits/blas1_vector_norm2.hpp b/include/experimental/__p1673_bits/blas1_vector_norm2.hpp index 61d657ac..147f523e 100644 --- a/include/experimental/__p1673_bits/blas1_vector_norm2.hpp +++ b/include/experimental/__p1673_bits/blas1_vector_norm2.hpp @@ -18,6 +18,7 @@ #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_VECTOR_NORM2_HPP_ #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_VECTOR_NORM2_HPP_ +#include "abs_if_needed.hpp" #include "blas1_vector_sum_of_squares.hpp" #include #include @@ -60,18 +61,26 @@ Scalar vector_two_norm( mdspan, Layout, Accessor> x, Scalar init) { - // Initialize the sum of squares result - sum_of_squares_result ssq_init; - ssq_init.scaling_factor = Scalar{}; - // FIXME (Hoemmen 2021/05/27) We'll need separate versions of this - // for types whose "one" we don't know how to construct. - ssq_init.scaled_sum_of_squares = 1.0; - - // Compute the sum of squares using an algorithm that avoids - // underflow and overflow by scaling. - auto ssq_res = vector_sum_of_squares(exec, x, ssq_init); using std::sqrt; - return init + ssq_res.scaling_factor * sqrt(ssq_res.scaled_sum_of_squares); + + if constexpr (std::is_floating_point_v && std::is_floating_point_v) { + // Initialize the sum of squares result + detail::sum_of_squares_result ssq_init; + ssq_init.scaling_factor = init == Scalar{} ? Scalar{} : impl::abs_if_needed(init); + ssq_init.scaled_sum_of_squares = init == Scalar{} ? Scalar{} : Scalar{1.0}; + + // Compute the sum of squares using an algorithm that avoids + // underflow and overflow by scaling. + auto ssq_res = detail::vector_sum_of_squares(exec, x, ssq_init); + return ssq_res.scaling_factor * sqrt(ssq_res.scaled_sum_of_squares); + } + else { + Scalar result = impl::abs_if_needed(init) * impl::abs_if_needed(init); + for (SizeType i = 0; i < x.extent(0); ++i) { + result += impl::abs_if_needed(x(i)) * impl::abs_if_needed(x(i)); + } + return sqrt(result); + } } template struct sum_of_squares_result { @@ -53,19 +55,23 @@ struct is_custom_vector_sum_of_squares_avail< && ! impl::is_inline_exec_v > > - : std::true_type{}; + : std::bool_constant< + std::is_floating_point_v && + std::is_floating_point_v + > + {}; } // end anonymous namespace template sum_of_squares_result vector_sum_of_squares( impl::inline_exec_t&& /* exec */, - mdspan, Layout, Accessor> x, + mdspan, Layout, Accessor> x, sum_of_squares_result init) { using std::abs; @@ -79,7 +85,7 @@ sum_of_squares_result vector_sum_of_squares( Scalar scale = init.scaling_factor; Scalar ssq = init.scaled_sum_of_squares; - for (SizeType i = 0; i < x.extent(0); ++i) { + for (IndexType i = 0; i < x.extent(0); ++i) { if (abs(x(i)) != 0.0) { const auto absxi = abs(x(i)); if (scale < absxi) { @@ -102,14 +108,14 @@ sum_of_squares_result vector_sum_of_squares( template sum_of_squares_result vector_sum_of_squares( ExecutionPolicy&& exec, - mdspan, Layout, Accessor> v, + mdspan, Layout, Accessor> v, sum_of_squares_result init) { constexpr bool use_custom = is_custom_vector_sum_of_squares_avail< @@ -125,18 +131,19 @@ sum_of_squares_result vector_sum_of_squares( } template sum_of_squares_result vector_sum_of_squares( - mdspan, Layout, Accessor> v, + mdspan, Layout, Accessor> v, sum_of_squares_result init) { return vector_sum_of_squares(impl::default_exec_t{}, v, init); } +} // end namespace detail } // end namespace linalg } // end inline namespace __p1673_version_0 diff --git a/tests/kokkos-based/CMakeLists.txt b/tests/kokkos-based/CMakeLists.txt index aa247c04..28cfcd84 100644 --- a/tests/kokkos-based/CMakeLists.txt +++ b/tests/kokkos-based/CMakeLists.txt @@ -80,9 +80,6 @@ linalg_add_test_kokkos( linalg_add_test_kokkos( vector_norm2_kokkos "vector_norm2: kokkos impl") -linalg_add_test_kokkos( - vector_sum_of_squares_kokkos - "vector_sum_of_squares: kokkos impl") linalg_add_test_kokkos( vector_abs_sum_kokkos diff --git a/tests/kokkos-based/vector_sum_of_squares_kokkos.cpp b/tests/kokkos-based/vector_sum_of_squares_kokkos.cpp deleted file mode 100644 index 15614147..00000000 --- a/tests/kokkos-based/vector_sum_of_squares_kokkos.cpp +++ /dev/null @@ -1,111 +0,0 @@ - -#include "gtest_fixtures.hpp" -#include "helpers.hpp" -#include - -namespace -{ - -template -std::experimental::linalg::sum_of_squares_result -vector_sum_of_squares_gold_solution(x_t x, - std::experimental::linalg::sum_of_squares_result init) -{ - using std::abs; - - T scale = init.scaling_factor; - for (std::size_t i = 0; i < x.extent(0); ++i) { - scale = std::max(scale, abs(x(i))); - } - - T ssq = (init.scaling_factor*init.scaling_factor*init.scaled_sum_of_squares)/(scale*scale); - T s=0.; - for (std::size_t i = 0; i < x.extent(0); ++i) { - const auto absxi = abs(x(i)); - const auto quotient = absxi/scale; - ssq = ssq + quotient * quotient; - s += absxi*absxi; - } - - std::experimental::linalg::sum_of_squares_result result; - result.scaled_sum_of_squares = ssq; - result.scaling_factor = scale; - - // verify that things are consistent according to definition - // scaled_sum_of_squares: is a value such that - // scaling_factor^2 * scaled_sum_of_squares equals the - // sum of squares of abs(x[i]) plus init.scaling_factor^2 * init.scaled_sum_of_squares. - // - const auto lhs = scale*scale*ssq; - const auto rhs = s+init.scaling_factor*init.scaling_factor*init.scaled_sum_of_squares; - std::cout << "Gold check : " << lhs << " " << rhs << std::endl; - if constexpr(std::is_same_v){ - EXPECT_NEAR(lhs, rhs, 1e-2); - } - if constexpr(std::is_same_v){ - EXPECT_NEAR(lhs, rhs, 1e-9); - } - - return result; -} - -template -void kokkos_blas1_vector_sum_of_squares_test_impl(x_t x, - std::experimental::linalg::sum_of_squares_result initValue) -{ - namespace stdla = std::experimental::linalg; - - using value_type = typename x_t::value_type; - const std::size_t extent = x.extent(0); - - // copy x to verify they are not changed after kernel - auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); - - const auto gold = vector_sum_of_squares_gold_solution(x, initValue); - auto result = stdla::vector_sum_of_squares(KokkosKernelsSTD::kokkos_exec<>(), - x, initValue); - - if constexpr(std::is_same_v) - { - EXPECT_NEAR(result.scaled_sum_of_squares, gold.scaled_sum_of_squares, 1e-3); - EXPECT_NEAR(result.scaling_factor, gold.scaling_factor, 1e-3); - } - - if constexpr(std::is_same_v) - { - EXPECT_NEAR(result.scaled_sum_of_squares, gold.scaled_sum_of_squares, 1e-9); - EXPECT_NEAR(result.scaling_factor, gold.scaling_factor, 1e-9); - } - - // x should not change after kernel - for (std::size_t i=0; i init_value{2.5f, 1.2f}; - kokkos_blas1_vector_sum_of_squares_test_impl(x, init_value); -} - -TEST_F(blas1_signed_double_fixture, kokkos_vector_sum_of_squares) -{ - namespace stdla = std::experimental::linalg; - stdla::sum_of_squares_result init_value{3.0, 1.2}; - kokkos_blas1_vector_sum_of_squares_test_impl(x, init_value); -} - -TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_sum_of_squares) -{ - namespace stdla = std::experimental::linalg; - using kc_t = Kokkos::complex; - using stdc_t = value_type; - if constexpr (alignof(value_type) == alignof(kc_t)){ - stdla::sum_of_squares_result init_value{2.5, 1.2}; - kokkos_blas1_vector_sum_of_squares_test_impl(x, init_value); - } -} diff --git a/tests/native/norm2.cpp b/tests/native/norm2.cpp index 5c3877bb..f489a28b 100644 --- a/tests/native/norm2.cpp +++ b/tests/native/norm2.cpp @@ -31,15 +31,14 @@ namespace { std::vector storage(vectorSize); vector_t x(storage.data(), vectorSize); - // Testing for absolute equality const auto normResult = vector_two_norm(x, mag_t{}); static_assert( std::is_same_v, mag_t> ); const mag_t expectedNormResult{}; EXPECT_EQ( expectedNormResult, normResult ); - // Make sure that init always gets added to the result. - const mag_t normResultPlusOne = vector_two_norm(x, mag_t(1.0)); - EXPECT_EQ( expectedNormResult + mag_t(1.0), normResultPlusOne ); + const mag_t normResultPlusOne = vector_two_norm(x, mag_t(3.0)); + const mag_t expectedNormResultPlusOne = mag_t(3.0); + EXPECT_EQ( expectedNormResultPlusOne, normResultPlusOne ); // Test 'auto' overload. const auto normResultAuto = vector_two_norm(x); @@ -63,15 +62,14 @@ namespace { x[0] = -3; - // Testing for absolute equality const auto normResult = vector_two_norm(x, mag_t{}); static_assert( std::is_same_v, mag_t> ); const mag_t expectedNormResult = abs( x[0] ); EXPECT_EQ( expectedNormResult, normResult ); - // Make sure that init always gets added to the result. - const mag_t normResultPlusOne = vector_two_norm(x, mag_t(1.0)); - EXPECT_EQ( expectedNormResult + mag_t(1.0), normResultPlusOne ); + const mag_t normResultPlusOne = vector_two_norm(x, mag_t(4.0)); + const mag_t expectedNormResultPlusOne = mag_t(5.0); + EXPECT_EQ( expectedNormResultPlusOne, normResultPlusOne ); // Test 'auto' overload. const auto normResultAuto = vector_two_norm(x); diff --git a/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_vector_sum_of_squares_kk.hpp b/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_vector_sum_of_squares_kk.hpp index 413fda4c..2e8e8475 100644 --- a/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_vector_sum_of_squares_kk.hpp +++ b/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_vector_sum_of_squares_kk.hpp @@ -4,7 +4,7 @@ #include "signal_kokkos_impl_called.hpp" -namespace KokkosKernelsSTD { +namespace KokkosKernelsSTD::detail { template