Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion examples/kokkos-based/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@ linalg_add_example(dotc_kokkos)
linalg_add_example(idx_abs_max_kokkos)
linalg_add_example(vector_norm2_kokkos)
linalg_add_example(vector_abs_sum_kokkos)
linalg_add_example(vector_sum_of_squares_kokkos)
linalg_add_example(scale_kokkos)
linalg_add_example(matrix_vector_product_kokkos)
52 changes: 0 additions & 52 deletions examples/kokkos-based/vector_sum_of_squares_kokkos.cpp

This file was deleted.

31 changes: 20 additions & 11 deletions include/experimental/__p1673_bits/blas1_vector_norm2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_VECTOR_NORM2_HPP_
#define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_VECTOR_NORM2_HPP_

#include "abs_if_needed.hpp"
#include "blas1_vector_sum_of_squares.hpp"
#include <cmath>
#include <cstdlib>
Expand Down Expand Up @@ -60,18 +61,26 @@ Scalar vector_two_norm(
mdspan<ElementType, extents<SizeType, ext0>, Layout, Accessor> x,
Scalar init)
{
// Initialize the sum of squares result
sum_of_squares_result<Scalar> ssq_init;
ssq_init.scaling_factor = Scalar{};
// FIXME (Hoemmen 2021/05/27) We'll need separate versions of this
// for types whose "one" we don't know how to construct.
ssq_init.scaled_sum_of_squares = 1.0;

// Compute the sum of squares using an algorithm that avoids
// underflow and overflow by scaling.
auto ssq_res = vector_sum_of_squares(exec, x, ssq_init);
using std::sqrt;
return init + ssq_res.scaling_factor * sqrt(ssq_res.scaled_sum_of_squares);

if constexpr (std::is_floating_point_v<Scalar> && std::is_floating_point_v<typename decltype(x)::value_type>) {
// Initialize the sum of squares result
detail::sum_of_squares_result<Scalar> ssq_init;
ssq_init.scaling_factor = init == Scalar{} ? Scalar{} : impl::abs_if_needed(init);
ssq_init.scaled_sum_of_squares = init == Scalar{} ? Scalar{} : Scalar{1.0};

// Compute the sum of squares using an algorithm that avoids
// underflow and overflow by scaling.
auto ssq_res = detail::vector_sum_of_squares(exec, x, ssq_init);
return ssq_res.scaling_factor * sqrt(ssq_res.scaled_sum_of_squares);
}
else {
Scalar result = impl::abs_if_needed(init) * impl::abs_if_needed(init);
for (SizeType i = 0; i < x.extent(0); ++i) {
result += impl::abs_if_needed(x(i)) * impl::abs_if_needed(x(i));
}
return sqrt(result);
}
}

template<class ExecutionPolicy,
Expand Down
23 changes: 15 additions & 8 deletions include/experimental/__p1673_bits/blas1_vector_sum_of_squares.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
inline namespace __p1673_version_0 {
namespace linalg {

namespace detail {

// Scaled sum of squares of a vector's elements
template<class Scalar>
struct sum_of_squares_result {
Expand Down Expand Up @@ -53,19 +55,23 @@ struct is_custom_vector_sum_of_squares_avail<
&& ! impl::is_inline_exec_v<Exec>
>
>
: std::true_type{};
: std::bool_constant<
std::is_floating_point_v<typename x_t::value_type> &&
std::is_floating_point_v<Scalar>
>
{};

} // end anonymous namespace

template<class ElementType,
class SizeType,
class IndexType,
::std::size_t ext0,
class Layout,
class Accessor,
class Scalar>
sum_of_squares_result<Scalar> vector_sum_of_squares(
impl::inline_exec_t&& /* exec */,
mdspan<ElementType, extents<SizeType, ext0>, Layout, Accessor> x,
mdspan<ElementType, extents<IndexType, ext0>, Layout, Accessor> x,
sum_of_squares_result<Scalar> init)
{
using std::abs;
Expand All @@ -79,7 +85,7 @@ sum_of_squares_result<Scalar> vector_sum_of_squares(

Scalar scale = init.scaling_factor;
Scalar ssq = init.scaled_sum_of_squares;
for (SizeType i = 0; i < x.extent(0); ++i) {
for (IndexType i = 0; i < x.extent(0); ++i) {
if (abs(x(i)) != 0.0) {
const auto absxi = abs(x(i));
if (scale < absxi) {
Expand All @@ -102,14 +108,14 @@ sum_of_squares_result<Scalar> vector_sum_of_squares(

template<class ExecutionPolicy,
class ElementType,
class SizeType,
class IndexType,
::std::size_t ext0,
class Layout,
class Accessor,
class Scalar>
sum_of_squares_result<Scalar> vector_sum_of_squares(
ExecutionPolicy&& exec,
mdspan<ElementType, extents<SizeType, ext0>, Layout, Accessor> v,
mdspan<ElementType, extents<IndexType, ext0>, Layout, Accessor> v,
sum_of_squares_result<Scalar> init)
{
constexpr bool use_custom = is_custom_vector_sum_of_squares_avail<
Expand All @@ -125,18 +131,19 @@ sum_of_squares_result<Scalar> vector_sum_of_squares(
}

template<class ElementType,
class SizeType,
class IndexType,
::std::size_t ext0,
class Layout,
class Accessor,
class Scalar>
sum_of_squares_result<Scalar> vector_sum_of_squares(
mdspan<ElementType, extents<SizeType, ext0>, Layout, Accessor> v,
mdspan<ElementType, extents<IndexType, ext0>, Layout, Accessor> v,
sum_of_squares_result<Scalar> init)
{
return vector_sum_of_squares(impl::default_exec_t{}, v, init);
}

} // end namespace detail

} // end namespace linalg
} // end inline namespace __p1673_version_0
Expand Down
3 changes: 0 additions & 3 deletions tests/kokkos-based/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,6 @@ linalg_add_test_kokkos(
linalg_add_test_kokkos(
vector_norm2_kokkos
"vector_norm2: kokkos impl")
linalg_add_test_kokkos(
vector_sum_of_squares_kokkos
"vector_sum_of_squares: kokkos impl")

linalg_add_test_kokkos(
vector_abs_sum_kokkos
Expand Down
111 changes: 0 additions & 111 deletions tests/kokkos-based/vector_sum_of_squares_kokkos.cpp

This file was deleted.

14 changes: 6 additions & 8 deletions tests/native/norm2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,14 @@ namespace {
std::vector<scalar_t> storage(vectorSize);
vector_t x(storage.data(), vectorSize);

// Testing for absolute equality
const auto normResult = vector_two_norm(x, mag_t{});
static_assert( std::is_same_v<std::remove_const_t<decltype(normResult)>, mag_t> );
const mag_t expectedNormResult{};
EXPECT_EQ( expectedNormResult, normResult );

// Make sure that init always gets added to the result.
const mag_t normResultPlusOne = vector_two_norm(x, mag_t(1.0));
EXPECT_EQ( expectedNormResult + mag_t(1.0), normResultPlusOne );
const mag_t normResultPlusOne = vector_two_norm(x, mag_t(3.0));
const mag_t expectedNormResultPlusOne = mag_t(3.0);
EXPECT_EQ( expectedNormResultPlusOne, normResultPlusOne );

// Test 'auto' overload.
const auto normResultAuto = vector_two_norm(x);
Expand All @@ -63,15 +62,14 @@ namespace {

x[0] = -3;

// Testing for absolute equality
const auto normResult = vector_two_norm(x, mag_t{});
static_assert( std::is_same_v<std::remove_const_t<decltype(normResult)>, mag_t> );
const mag_t expectedNormResult = abs( x[0] );
EXPECT_EQ( expectedNormResult, normResult );

// Make sure that init always gets added to the result.
const mag_t normResultPlusOne = vector_two_norm(x, mag_t(1.0));
EXPECT_EQ( expectedNormResult + mag_t(1.0), normResultPlusOne );
const mag_t normResultPlusOne = vector_two_norm(x, mag_t(4.0));
const mag_t expectedNormResultPlusOne = mag_t(5.0);
EXPECT_EQ( expectedNormResultPlusOne, normResultPlusOne );

// Test 'auto' overload.
const auto normResultAuto = vector_two_norm(x);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include "signal_kokkos_impl_called.hpp"

namespace KokkosKernelsSTD {
namespace KokkosKernelsSTD::detail {

template<class ExecSpace,
class ElementType,
Expand Down