From 344878e9dc2ea3ee101d8f1dc2216c44c97ca6ed Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 31 Jan 2022 08:56:48 -0500
Subject: [PATCH 01/41] Allow users to control iteration via the concept of
 iteration spaces.

Changes in the work include:
- [x] Internally use linear_space for iterating
- [x] Simplify type and value iteration in `state_iterator::build_axis_configs`
- [x] Store the iteration space in `axes_metadata`
- [x] Expose `tie` and `user` spaces to user
- [x] Add tests for `linear`, `tie`, and `user`
- [x] Add examples for `tie` and `user`
---
 docs/benchmarks.md                  |  37 +++-
 examples/CMakeLists.txt             |   1 +
 examples/custom_iteration_spaces.cu | 247 +++++++++++++++++++++
 nvbench/CMakeLists.txt              |   1 +
 nvbench/axes_metadata.cuh           |  29 ++-
 nvbench/axes_metadata.cxx           | 192 ++++++++++++++++
 nvbench/axis_iteration_space.cuh    |  93 ++++++++
 nvbench/axis_iteration_space.cxx    | 174 +++++++++++++++
 nvbench/benchmark_base.cuh          |  15 ++
 nvbench/benchmark_base.cxx          |  30 +--
 nvbench/detail/axes_iterator.cuh    | 113 ++++++++++
 nvbench/detail/state_generator.cuh  |  23 +-
 nvbench/detail/state_generator.cxx  | 227 ++++++++-----------
 testing/CMakeLists.txt              |   1 +
 testing/axes_iteration_space.cu     | 326 ++++++++++++++++++++++++++++
 testing/benchmark.cu                |   3 +-
 testing/option_parser.cu            |   3 +
 testing/state_generator.cu          |  37 +++-
 18 files changed, 1373 insertions(+), 179 deletions(-)
 create mode 100644 examples/custom_iteration_spaces.cu
 create mode 100644 nvbench/axis_iteration_space.cuh
 create mode 100644 nvbench/axis_iteration_space.cxx
 create mode 100644 nvbench/detail/axes_iterator.cuh
 create mode 100644 testing/axes_iteration_space.cu

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index ef9fb48f..71a9dca8 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -237,9 +237,34 @@ NVBENCH_BENCH_TYPES(benchmark, NVBENCH_TYPE_AXES(input_types, output_types))
 ```
 
 This would generate a total of 36 configurations and instantiate the benchmark 6
-times. Keep the rapid growth of these combinations in mind when choosing the
-number of values in an axis. See the section about combinatorial explosion for
-more examples and information.
+times.
+
+Keep the rapid growth of combinations due to multiple parameter axes in mind when
+choosing the number of values in an axis. See the section about combinatorial
+explosion for more examples and information.
+
+## Zipped/Tied Iteration of Value Axes
+
+At times multiple value axes need to be iterated like they are actually a tuple
+or zipped together. To enable this behavior you can request axes to be 'tied'
+together.
+
+```cpp
+// InputTypes: {char, int, unsigned int}
+// OutputTypes: {float, double}
+// NumInputs: {2^10, 2^20, 2^30}
+// Quality: {0.5, 1.0}
+
+using input_types = nvbench::type_list<char, int, unsigned int>;
+using output_types = nvbench::type_list<float, double>;
+NVBENCH_BENCH_TYPES(benchmark, NVBENCH_TYPE_AXES(input_types, output_types))
+  .set_type_axes_names({"InputType", "OutputType"})
+  .add_int64_axis("NumInputs", {1000, 10000, 100000, 200000, 200000, 200000})
+  .add_float64_axis("Quality", {0.05, 0.1, 0.25, 0.5, 0.75, 1.});
+```
+
+This tieing reduces the total combinations from 24 to 6, reducing the
+combinatorial explosion.
 
 # Throughput Measurements
 
@@ -426,9 +451,9 @@ NVBENCH_BENCH_TYPES(my_benchmark,
 ```
 
 For large configuration spaces like this, pruning some of the less useful
-combinations (e.g. `sizeof(init_type) < sizeof(output)`) using the techniques
-described in the "Skip Uninteresting / Invalid Benchmarks" section can help
-immensely with keeping compile / run times manageable.
+combinations using the techniques described in the "Zipped/Tied Iteration of Value Axes"
+or "Skip Uninteresting / Invalid Benchmarks" section can help immensely with
+keeping compile / run times manageable.
 
 Splitting a single large configuration space into multiple, more focused
 benchmarks with reduced dimensionality will likely be worth the effort as well.
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 219fc898..67e91327 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -7,6 +7,7 @@ set(example_srcs
   stream.cu
   throughput.cu
   auto_throughput.cu
+  custom_iteration_spaces.cu
 )
 
 # Metatarget for all examples:
diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
new file mode 100644
index 00000000..92323fd9
--- /dev/null
+++ b/examples/custom_iteration_spaces.cu
@@ -0,0 +1,247 @@
+/*
+ *  Copyright 2021 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/nvbench.cuh>
+
+// Grab some testing kernels from NVBench:
+#include <nvbench/test_kernels.cuh>
+
+// Thrust vectors simplify memory management:
+#include <thrust/device_vector.h>
+
+#include <random>
+
+//==============================================================================
+// Multiple parameters:
+// Varies block_size and num_blocks while invoking a naive copy of 256 MiB worth
+// of int32_t.
+void copy_sweep_grid_shape(nvbench::state &state)
+{
+  // Get current parameters:
+  const int block_size = static_cast<int>(state.get_int64("BlockSize"));
+  const int num_blocks = static_cast<int>(state.get_int64("NumBlocks"));
+
+  // Number of int32s in 256 MiB:
+  const std::size_t num_values = 256 * 1024 * 1024 / sizeof(nvbench::int32_t);
+
+  // Report throughput stats:
+  state.add_element_count(num_values);
+  state.add_global_memory_reads<nvbench::int32_t>(num_values);
+  state.add_global_memory_writes<nvbench::int32_t>(num_values);
+
+  // Allocate device memory:
+  thrust::device_vector<nvbench::int32_t> in(num_values, 0);
+  thrust::device_vector<nvbench::int32_t> out(num_values, 0);
+
+  state.exec(
+    [block_size,
+     num_blocks,
+     num_values,
+     in_ptr  = thrust::raw_pointer_cast(in.data()),
+     out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
+      nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(
+        in_ptr,
+        out_ptr,
+        num_values);
+    });
+}
+
+//==============================================================================
+// Tied iteration space allows you to iterate two or more axes at the same
+// time allowing for sparse exploration of the search space. Can also be used
+// to test the diagonal of a square matrix
+//
+void tied_copy_sweep_grid_shape(nvbench::state &state)
+{
+  copy_sweep_grid_shape(state);
+}
+NVBENCH_BENCH(tied_copy_sweep_grid_shape)
+  // Every power of two from  64->1024:
+  .add_int64_axis("BlockSize", {32,64,128,256})
+  .add_int64_axis("NumBlocks", {1024,512,256,128})
+  .tie_axes({"BlockSize", "NumBlocks"});
+
+//==============================================================================
+// under_diag:
+// Custom iterator that only searches the `X` locations of two axi
+// [- - - - X]
+// [- - - X X]
+// [- - X X X]
+// [- X X X X]
+// [X X X X X]
+//
+struct under_diag final : nvbench::user_axis_space
+{
+  under_diag(std::vector<std::size_t> input_indices,
+             std::vector<std::size_t> output_indices)
+      : nvbench::user_axis_space(std::move(input_indices), std::move(output_indices))
+  {}
+
+  mutable std::size_t x_pos   = 0;
+  mutable std::size_t y_pos   = 0;
+  mutable std::size_t x_start = 0;
+
+  nvbench::detail::axis_space_iterator do_iter(axes_info info) const
+  {
+    // generate our increment function
+    auto adv_func = [&, info](std::size_t &inc_index,
+                              std::size_t /*len*/) -> bool {
+      inc_index++;
+      x_pos++;
+      if (x_pos == info[0].size)
+      {
+        x_pos = ++x_start;
+        y_pos = x_start;
+        return true;
+      }
+      return false;
+    };
+
+    // our update function
+    std::vector<std::size_t> locs = m_output_indices;
+    auto diag_under =
+      [&, locs, info](std::size_t,
+                      std::vector<nvbench::detail::axis_index> &indices) {
+        nvbench::detail::axis_index temp = info[0];
+        temp.index                       = x_pos;
+        indices[locs[0]]                 = temp;
+
+        temp             = info[1];
+        temp.index       = y_pos;
+        indices[locs[1]] = temp;
+      };
+
+    const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
+    return nvbench::detail::make_space_iterator(2,
+                                                iteration_length,
+                                                adv_func,
+                                                diag_under);
+  }
+
+  std::size_t do_size(const axes_info &info) const
+  {
+    return ((info[0].size * (info[1].size + 1)) / 2);
+  }
+
+  std::size_t do_valid_count(const axes_info &info) const
+  {
+    return ((info[0].size * (info[1].size + 1)) / 2);
+  }
+
+  std::unique_ptr<nvbench::axis_space_base> do_clone() const
+  {
+    return std::make_unique<under_diag>(*this);
+  }
+};
+
+void user_copy_sweep_grid_shape(nvbench::state &state)
+{
+  copy_sweep_grid_shape(state);
+}
+NVBENCH_BENCH(user_copy_sweep_grid_shape)
+  // Every power of two from  64->1024:
+  .add_int64_power_of_two_axis("BlockSize", nvbench::range(6, 10))
+  .add_int64_power_of_two_axis("NumBlocks", nvbench::range(6, 10))
+  .user_iteration_axes({"NumBlocks", "BlockSize"},
+                       [](auto... args)
+                         -> std::unique_ptr<nvbench::axis_space_base> {
+                         return std::make_unique<under_diag>(args...);
+                       });
+
+
+//==============================================================================
+// gauss:
+// Custom iteration space that uses a gauss distribution to
+// sample the points near the middle of the index space
+//
+struct gauss final : nvbench::user_axis_space
+{
+
+  gauss(std::vector<std::size_t> input_indices,
+        std::vector<std::size_t> output_indices)
+      : nvbench::user_axis_space(std::move(input_indices), std::move(output_indices))
+  {}
+
+  nvbench::detail::axis_space_iterator do_iter(axes_info info) const
+  {
+    const double mid_point = static_cast<double>((info[0].size / 2));
+
+    std::random_device rd{};
+    std::mt19937 gen{rd()};
+    std::normal_distribution<> d{mid_point, 2};
+
+    const size_t iteration_length = info[0].size;
+    std::vector<std::size_t> gauss_indices(iteration_length);
+    for (auto &g : gauss_indices)
+    {
+      auto v = std::min(static_cast<double>(info[0].size), d(gen));
+      v      = std::max(0.0, v);
+      g      = static_cast<std::size_t>(v);
+    }
+
+    // our update function
+    std::vector<std::size_t> locs = m_output_indices;
+    auto gauss_func               = [=](std::size_t index,
+                          std::vector<nvbench::detail::axis_index> &indices) {
+      nvbench::detail::axis_index temp = info[0];
+      temp.index                       = gauss_indices[index];
+      indices[locs[0]]                 = temp;
+    };
+
+    return nvbench::detail::make_space_iterator(1,
+                                                iteration_length,
+                                                gauss_func);
+  }
+
+  std::size_t do_size(const axes_info &info) const { return info[0].size; }
+
+  std::size_t do_valid_count(const axes_info &info) const
+  {
+    return info[0].size;
+  }
+
+  std::unique_ptr<axis_space_base> do_clone() const
+  {
+    return std::make_unique<gauss>(*this);
+  }
+};
+//==============================================================================
+// Dual parameter sweep:
+void dual_float64_axis(nvbench::state &state)
+{
+  const auto duration_A = state.get_float64("Duration_A");
+  const auto duration_B = state.get_float64("Duration_B");
+
+  state.exec([duration_A, duration_B](nvbench::launch &launch) {
+    nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(duration_A +
+                                                            duration_B);
+  });
+}
+NVBENCH_BENCH(dual_float64_axis)
+  .add_float64_axis("Duration_A", nvbench::range(0., 1e-4, 1e-5))
+  .add_float64_axis("Duration_B", nvbench::range(0., 1e-4, 1e-5))
+  .user_iteration_axes({"Duration_A"},
+                       [](auto... args)
+                         -> std::unique_ptr<nvbench::axis_space_base> {
+                         return std::make_unique<gauss>(args...);
+                       })
+  .user_iteration_axes({"Duration_B"},
+                       [](auto... args)
+                         -> std::unique_ptr<nvbench::axis_space_base> {
+                         return std::make_unique<gauss>(args...);
+                       });
diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt
index f86bd415..45438b9d 100644
--- a/nvbench/CMakeLists.txt
+++ b/nvbench/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(srcs
   axes_metadata.cxx
   axis_base.cxx
+  axis_iteration_space.cxx
   benchmark_base.cxx
   benchmark_manager.cxx
   blocking_kernel.cu
diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index 353855a8..cb14e97e 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -18,12 +18,14 @@
 
 #pragma once
 
+#include <nvbench/axis_iteration_space.cuh>
 #include <nvbench/float64_axis.cuh>
 #include <nvbench/int64_axis.cuh>
 #include <nvbench/string_axis.cuh>
 #include <nvbench/type_axis.cuh>
 #include <nvbench/types.cuh>
 
+#include <functional>
 #include <memory>
 #include <stdexcept>
 #include <string>
@@ -37,6 +39,8 @@ namespace nvbench
 struct axes_metadata
 {
   using axes_type = std::vector<std::unique_ptr<nvbench::axis_base>>;
+  using axes_iteration_space =
+    std::vector<std::unique_ptr<nvbench::axis_space_base>>;
 
   template <typename... TypeAxes>
   explicit axes_metadata(nvbench::type_list<TypeAxes...>);
@@ -58,6 +62,21 @@ struct axes_metadata
 
   void add_string_axis(std::string name, std::vector<std::string> data);
 
+  void tie_axes(std::vector<std::string> names);
+
+  void
+  user_iteration_axes(std::vector<std::string> names,
+                      std::function<nvbench::make_user_space_signature> make);
+
+  [[nodiscard]] const axes_iteration_space &get_type_iteration_space() const
+  {
+    return m_type_space;
+  }
+  [[nodiscard]] const axes_iteration_space &get_value_iteration_space() const
+  {
+    return m_value_space;
+  }
+
   [[nodiscard]] const nvbench::int64_axis &
   get_int64_axis(std::string_view name) const;
   [[nodiscard]] nvbench::int64_axis &get_int64_axis(std::string_view name);
@@ -93,6 +112,9 @@ struct axes_metadata
 
 private:
   axes_type m_axes;
+  std::size_t m_type_axe_count = 0;
+  axes_iteration_space m_type_space;
+  axes_iteration_space m_value_space;
 };
 
 template <typename... TypeAxes>
@@ -105,11 +127,15 @@ axes_metadata::axes_metadata(nvbench::type_list<TypeAxes...>)
 
   auto names_iter = names.begin(); // contents will be moved from
   nvbench::tl::foreach<type_axes_list>(
-    [&axes = m_axes, &names_iter]([[maybe_unused]] auto wrapped_type) {
+    [&axes = m_axes, &spaces = m_type_space, &names_iter](
+      [[maybe_unused]] auto wrapped_type) {
       // This is always called before other axes are added, so the length of the
       // axes vector will be the type axis index:
       const std::size_t type_axis_index = axes.size();
 
+      spaces.push_back(
+        std::make_unique<linear_axis_space>(type_axis_index, type_axis_index));
+
       // Note:
       // The word "type" appears 6 times in the next line.
       // Every. Single. Token.
@@ -119,6 +145,7 @@ axes_metadata::axes_metadata(nvbench::type_list<TypeAxes...>)
       axis->template set_inputs<type_list>();
       axes.push_back(std::move(axis));
     });
+  m_type_axe_count = m_axes.size();
 }
 
 } // namespace nvbench
diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx
index 044bc91f..9dd679d4 100644
--- a/nvbench/axes_metadata.cxx
+++ b/nvbench/axes_metadata.cxx
@@ -25,6 +25,9 @@
 
 #include <algorithm>
 #include <stdexcept>
+#include <unordered_set>
+
+#include <iostream>
 
 namespace nvbench
 {
@@ -36,6 +39,19 @@ axes_metadata::axes_metadata(const axes_metadata &other)
   {
     m_axes.push_back(axis->clone());
   }
+
+  m_type_axe_count = other.m_type_axe_count;
+  m_type_space.reserve(other.m_type_space.size());
+  for (const auto &iter : other.m_type_space)
+  {
+    m_type_space.push_back(iter->clone());
+  }
+
+  m_value_space.reserve(other.m_value_space.size());
+  for (const auto &iter : other.m_value_space)
+  {
+    m_value_space.push_back(iter->clone());
+  }
 }
 
 axes_metadata &axes_metadata::operator=(const axes_metadata &other)
@@ -46,6 +62,23 @@ axes_metadata &axes_metadata::operator=(const axes_metadata &other)
   {
     m_axes.push_back(axis->clone());
   }
+
+  m_type_axe_count = other.m_type_axe_count;
+
+  m_type_space.clear();
+  m_type_space.reserve(other.m_type_space.size());
+  for (const auto &iter : other.m_type_space)
+  {
+    m_type_space.push_back(iter->clone());
+  }
+
+  m_value_space.clear();
+  m_value_space.reserve(other.m_value_space.size());
+  for (const auto &iter : other.m_value_space)
+  {
+    m_value_space.push_back(iter->clone());
+  }
+
   return *this;
 }
 
@@ -84,6 +117,10 @@ catch (std::exception &e)
 void axes_metadata::add_float64_axis(std::string name,
                                      std::vector<nvbench::float64_t> data)
 {
+  m_value_space.push_back(
+    std::make_unique<linear_axis_space>(m_axes.size(),
+                                        m_axes.size() - m_type_axe_count));
+
   auto axis = std::make_unique<nvbench::float64_axis>(std::move(name));
   axis->set_inputs(std::move(data));
   m_axes.push_back(std::move(axis));
@@ -93,6 +130,10 @@ void axes_metadata::add_int64_axis(std::string name,
                                    std::vector<nvbench::int64_t> data,
                                    nvbench::int64_axis_flags flags)
 {
+  m_value_space.push_back(
+    std::make_unique<linear_axis_space>(m_axes.size(),
+                                        m_axes.size() - m_type_axe_count));
+
   auto axis = std::make_unique<nvbench::int64_axis>(std::move(name));
   axis->set_inputs(std::move(data), flags);
   m_axes.push_back(std::move(axis));
@@ -101,11 +142,162 @@ void axes_metadata::add_int64_axis(std::string name,
 void axes_metadata::add_string_axis(std::string name,
                                     std::vector<std::string> data)
 {
+  m_value_space.push_back(
+    std::make_unique<linear_axis_space>(m_axes.size(),
+                                        m_axes.size() - m_type_axe_count));
+
   auto axis = std::make_unique<nvbench::string_axis>(std::move(name));
   axis->set_inputs(std::move(data));
   m_axes.push_back(std::move(axis));
 }
 
+namespace
+{
+std::tuple<std::vector<std::size_t>, std::vector<std::size_t>>
+get_axes_indices(std::size_t type_axe_count,
+                 const nvbench::axes_metadata::axes_type &axes,
+                 const std::vector<std::string> &names)
+{
+  std::vector<std::size_t> input_indices;
+  input_indices.reserve(names.size());
+  for (auto &n : names)
+  {
+    auto iter =
+      std::find_if(axes.cbegin(), axes.cend(), [&n](const auto &axis) {
+        return axis->get_name() == n;
+      });
+
+    // iter distance is input_indices
+    if (iter == axes.cend())
+    {
+      NVBENCH_THROW(std::runtime_error,
+                    "Unable to find the axes named ({}).",
+                    n);
+    }
+    auto index = std::distance(axes.cbegin(), iter);
+    input_indices.push_back(index);
+  }
+
+  std::vector<std::size_t> output_indices = input_indices;
+  for (auto &out : output_indices)
+  {
+    out -= type_axe_count;
+  }
+  return std::tie(input_indices, output_indices);
+}
+
+void reset_iteration_space(
+  nvbench::axes_metadata::axes_iteration_space &all_spaces,
+  const std::vector<std::size_t> &indices_to_remove)
+{
+  // 1. Find all spaces indices that
+  nvbench::axes_metadata::axes_iteration_space reset_space;
+  nvbench::axes_metadata::axes_iteration_space to_filter;
+  for (auto &space : all_spaces)
+  {
+    bool added = false;
+    for (auto &i : indices_to_remove)
+    {
+      if (space->contains(i))
+      {
+        // add each item back as linear_axis_space
+        auto as_linear = space->clone_as_linear();
+        to_filter.insert(to_filter.end(),
+                         std::make_move_iterator(as_linear.begin()),
+                         std::make_move_iterator(as_linear.end()));
+        added = true;
+        break;
+      }
+    }
+    if (!added)
+    {
+      // this space doesn't need to be removed
+      reset_space.push_back(std::move(space));
+    }
+  }
+
+  for (auto &iter : to_filter)
+  {
+    bool to_add = true;
+    for (auto &i : indices_to_remove)
+    {
+      if (iter->contains(i))
+      {
+        to_add = false;
+        break;
+      }
+    }
+    if (to_add)
+    {
+      reset_space.push_back(std::move(iter));
+      break;
+    }
+  }
+
+  all_spaces = std::move(reset_space);
+}
+} // namespace
+
+void axes_metadata::tie_axes(std::vector<std::string> names)
+{
+  NVBENCH_THROW_IF((names.size() < 2),
+                   std::runtime_error,
+                   "At least two axi names ( {} provided ) need to be provided "
+                   "when using tie_axes.",
+                   names.size());
+
+  // compute the numeric indice for each name we have
+  auto [input_indices,
+        output_indices] = get_axes_indices(m_type_axe_count, m_axes, names);
+
+  const auto expected_size = m_axes[input_indices[0]]->get_size();
+  for (auto i : input_indices)
+  {
+    NVBENCH_THROW_IF((m_axes[i]->get_type() == nvbench::axis_type::type),
+                     std::runtime_error,
+                     "Currently no support for tieing type axis ( {} ).",
+                     m_axes[i]->get_name());
+
+    NVBENCH_THROW_IF((m_axes[i]->get_size() < expected_size),
+                     std::runtime_error,
+                     "All axes that are tied together must be atleast as long "
+                     "the first axi provided ( {} ).",
+                     expected_size);
+  }
+
+  // remove any iteration spaces that have axes we need
+  reset_iteration_space(m_value_space, input_indices);
+
+  // add the new tied iteration space
+  auto tied = std::make_unique<tie_axis_space>(std::move(input_indices),
+                                               std::move(output_indices));
+  m_value_space.push_back(std::move(tied));
+}
+
+void axes_metadata::user_iteration_axes(
+  std::vector<std::string> names,
+  std::function<nvbench::make_user_space_signature> make)
+{
+  // compute the numeric indice for each name we have
+  auto [input_indices,
+        output_indices] = get_axes_indices(m_type_axe_count, m_axes, names);
+
+  for (auto i : input_indices)
+  {
+    NVBENCH_THROW_IF((m_axes[i]->get_type() == nvbench::axis_type::type),
+                     std::runtime_error,
+                     "Currently no support for using type axis with "
+                     "user_iteration_axes ( {} ).",
+                     m_axes[i]->get_name());
+  }
+
+  // remove any iteration spaces that have axes we need
+  reset_iteration_space(m_value_space, input_indices);
+
+  auto user_func = make(std::move(input_indices), std::move(output_indices));
+  m_value_space.push_back(std::move(user_func));
+}
+
 const int64_axis &axes_metadata::get_int64_axis(std::string_view name) const
 {
   const auto &axis = this->get_axis(name, nvbench::axis_type::int64);
diff --git a/nvbench/axis_iteration_space.cuh b/nvbench/axis_iteration_space.cuh
new file mode 100644
index 00000000..7c045be0
--- /dev/null
+++ b/nvbench/axis_iteration_space.cuh
@@ -0,0 +1,93 @@
+/*
+ *  Copyright 2021 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <nvbench/detail/axes_iterator.cuh>
+
+namespace nvbench
+{
+
+struct axis_space_base
+{
+  using axes_type = std::vector<std::unique_ptr<nvbench::axis_base>>;
+  using axes_info = std::vector<detail::axis_index>;
+
+  using AdvanceSignature =
+    nvbench::detail::axis_space_iterator::AdvanceSignature;
+  using UpdateSignature = nvbench::detail::axis_space_iterator::UpdateSignature;
+
+  axis_space_base(std::vector<std::size_t> input_indices,
+                  std::vector<std::size_t> output_indices);
+  virtual ~axis_space_base();
+
+  [[nodiscard]] std::unique_ptr<axis_space_base> clone() const;
+  [[nodiscard]] std::vector<std::unique_ptr<axis_space_base>>
+  clone_as_linear() const;
+
+  [[nodiscard]] detail::axis_space_iterator iter(const axes_type &axes) const;
+  [[nodiscard]] std::size_t size(const axes_type &axes) const;
+  [[nodiscard]] std::size_t valid_count(const axes_type &axes) const;
+
+  [[nodiscard]] bool contains(std::size_t input_index) const;
+
+protected:
+  std::vector<std::size_t> m_input_indices;
+  std::vector<std::size_t> m_output_indices;
+
+  virtual std::unique_ptr<axis_space_base> do_clone() const         = 0;
+  virtual detail::axis_space_iterator do_iter(axes_info info) const = 0;
+  virtual std::size_t do_size(const axes_info &info) const          = 0;
+  virtual std::size_t do_valid_count(const axes_info &info) const   = 0;
+};
+
+struct linear_axis_space final : axis_space_base
+{
+  linear_axis_space(std::size_t in, std::size_t out);
+  ~linear_axis_space();
+
+  std::unique_ptr<axis_space_base> do_clone() const override;
+  detail::axis_space_iterator do_iter(axes_info info) const override;
+  std::size_t do_size(const axes_info &info) const override;
+  std::size_t do_valid_count(const axes_info &info) const override;
+};
+
+struct tie_axis_space final : axis_space_base
+{
+  tie_axis_space(std::vector<std::size_t> input_indices,
+      std::vector<std::size_t> output_indices);
+  ~tie_axis_space();
+
+  std::unique_ptr<axis_space_base> do_clone() const override;
+  detail::axis_space_iterator do_iter(axes_info info) const override;
+  std::size_t do_size(const axes_info &info) const override;
+  std::size_t do_valid_count(const axes_info &info) const override;
+};
+
+struct user_axis_space : axis_space_base
+{
+  user_axis_space(std::vector<std::size_t> input_indices,
+                  std::vector<std::size_t> output_indices);
+  ~user_axis_space();
+};
+
+using make_user_space_signature =
+  std::unique_ptr<axis_space_base>(std::vector<std::size_t> input_indices,
+                                   std::vector<std::size_t> output_indices);
+
+} // namespace nvbench
diff --git a/nvbench/axis_iteration_space.cxx b/nvbench/axis_iteration_space.cxx
new file mode 100644
index 00000000..2f931398
--- /dev/null
+++ b/nvbench/axis_iteration_space.cxx
@@ -0,0 +1,174 @@
+/*
+ *  Copyright 2021 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include "axis_iteration_space.cuh"
+
+#include <nvbench/type_axis.cuh>
+
+namespace nvbench
+{
+
+axis_space_base::axis_space_base(std::vector<std::size_t> input_indices,
+                                 std::vector<std::size_t> output_indices)
+    : m_input_indices(std::move(input_indices))
+    , m_output_indices(std::move(output_indices))
+{}
+
+axis_space_base::~axis_space_base() = default;
+
+std::unique_ptr<axis_space_base> axis_space_base::clone() const
+{
+  auto clone = this->do_clone();
+  return clone;
+}
+
+std::vector<std::unique_ptr<axis_space_base>>
+axis_space_base::clone_as_linear() const
+{
+  std::vector<std::unique_ptr<axis_space_base>> clones;
+  clones.reserve(m_input_indices.size());
+
+  for (std::size_t i = 0; i < m_input_indices.size(); ++i)
+  {
+    clones.push_back(
+      std::make_unique<nvbench::linear_axis_space>(m_input_indices[i],
+                                                   m_output_indices[i]));
+  }
+
+  return clones;
+}
+
+namespace
+{
+nvbench::axis_space_base::axes_info
+get_axes_info(const nvbench::axis_space_base::axes_type &axes,
+              const std::vector<std::size_t> &indices)
+{
+  nvbench::axis_space_base::axes_info info;
+  info.reserve(indices.size());
+  for (auto &n : indices)
+  {
+    info.emplace_back(axes[n].get());
+  }
+  return info;
+}
+} // namespace
+
+detail::axis_space_iterator axis_space_base::iter(const axes_type &axes) const
+{
+
+  return this->do_iter(get_axes_info(axes, m_input_indices));
+}
+
+std::size_t axis_space_base::size(const axes_type &axes) const
+{
+  return this->do_size(get_axes_info(axes, m_input_indices));
+}
+std::size_t axis_space_base::valid_count(const axes_type &axes) const
+{
+  return this->do_valid_count(get_axes_info(axes, m_input_indices));
+}
+
+bool axis_space_base::contains(std::size_t in_index) const
+{
+  auto iter =
+    std::find_if(m_input_indices.cbegin(),
+                 m_input_indices.cend(),
+                 [&in_index](const auto &i) { return i == in_index; });
+  return iter != m_input_indices.end();
+}
+
+linear_axis_space::linear_axis_space(std::size_t in_index,
+                                     std::size_t out_index)
+    : axis_space_base({std::move(in_index)}, {out_index})
+{}
+
+linear_axis_space::~linear_axis_space() = default;
+
+detail::axis_space_iterator linear_axis_space::do_iter(axes_info info) const
+{
+  std::size_t loc(m_output_indices[0]);
+  auto update_func = [=](std::size_t inc_index,
+                         std::vector<detail::axis_index> &indices) {
+    indices[loc]       = info[0];
+    indices[loc].index = inc_index;
+  };
+
+  return detail::make_space_iterator(1, info[0].size, update_func);
+}
+
+std::size_t linear_axis_space::do_size(const axes_info &info) const
+{
+  return info[0].size;
+}
+
+std::size_t linear_axis_space::do_valid_count(const axes_info &info) const
+{
+  return info[0].active_size;
+}
+
+std::unique_ptr<axis_space_base> linear_axis_space::do_clone() const
+{
+  return std::make_unique<linear_axis_space>(*this);
+}
+
+tie_axis_space::tie_axis_space(std::vector<std::size_t> input_indices,
+                               std::vector<std::size_t> output_indices)
+    : axis_space_base(std::move(input_indices), std::move(output_indices))
+{}
+
+tie_axis_space::~tie_axis_space() = default;
+
+detail::axis_space_iterator tie_axis_space::do_iter(axes_info info) const
+{
+  std::vector<std::size_t> locs = m_output_indices;
+  auto update_func              = [=](std::size_t inc_index,
+                         std::vector<detail::axis_index> &indices) {
+    for (std::size_t i = 0; i < info.size(); ++i)
+    {
+      detail::axis_index temp = info[i];
+      temp.index              = inc_index;
+      indices[locs[i]]        = temp;
+    }
+  };
+
+  return detail::make_space_iterator(locs.size(), info[0].size, update_func);
+}
+
+std::size_t tie_axis_space::do_size(const axes_info &info) const
+{
+  return info[0].size;
+}
+
+std::size_t tie_axis_space::do_valid_count(const axes_info &info) const
+{
+  return info[0].active_size;
+}
+
+std::unique_ptr<axis_space_base> tie_axis_space::do_clone() const
+{
+  return std::make_unique<tie_axis_space>(*this);
+}
+
+user_axis_space::user_axis_space(std::vector<std::size_t> input_indices,
+                                 std::vector<std::size_t> output_indices)
+    : axis_space_base(std::move(input_indices), std::move(output_indices))
+{}
+user_axis_space::~user_axis_space() = default;
+
+} // namespace nvbench
diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh
index 588445df..94908d12 100644
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -111,6 +111,21 @@ struct benchmark_base
     return *this;
   }
 
+  benchmark_base &tie_axes(std::vector<std::string> names)
+  {
+    m_axes.tie_axes(std::move(names));
+    return *this;
+  }
+
+  benchmark_base &
+  user_iteration_axes(std::vector<std::string> names,
+                      std::function<nvbench::make_user_space_signature> make)
+  {
+    m_axes.user_iteration_axes(std::move(names), std::move(make));
+    return *this;
+  }
+
+
   benchmark_base &set_devices(std::vector<int> device_ids);
 
   benchmark_base &set_devices(std::vector<nvbench::device_info> devices)
diff --git a/nvbench/benchmark_base.cxx b/nvbench/benchmark_base.cxx
index 2d08fdbd..296cf6c5 100644
--- a/nvbench/benchmark_base.cxx
+++ b/nvbench/benchmark_base.cxx
@@ -20,6 +20,8 @@
 
 #include <nvbench/detail/transform_reduce.cuh>
 
+#include <algorithm>
+
 namespace nvbench
 {
 
@@ -62,22 +64,22 @@ benchmark_base &benchmark_base::add_device(int device_id)
 
 std::size_t benchmark_base::get_config_count() const
 {
-  const std::size_t per_device_count = nvbench::detail::transform_reduce(
-    m_axes.get_axes().cbegin(),
-    m_axes.get_axes().cend(),
+  const auto& axes = m_axes.get_axes();
+  const std::size_t value_count = nvbench::detail::transform_reduce(
+    m_axes.get_value_iteration_space().cbegin(),
+    m_axes.get_value_iteration_space().cend(),
     std::size_t{1},
     std::multiplies<>{},
-    [](const auto &axis_ptr) {
-      if (const auto *type_axis_ptr =
-            dynamic_cast<const nvbench::type_axis *>(axis_ptr.get());
-          type_axis_ptr != nullptr)
-      {
-        return type_axis_ptr->get_active_count();
-      }
-      return axis_ptr->get_size();
-    });
-
-  return per_device_count * m_devices.size();
+    [&axes](const auto &space) { return space->size(axes); });
+
+  const std::size_t type_count = nvbench::detail::transform_reduce(
+    m_axes.get_type_iteration_space().cbegin(),
+    m_axes.get_type_iteration_space().cend(),
+    std::size_t{1},
+    std::multiplies<>{},
+    [&axes](const auto &space) { return space->valid_count(axes); });
+
+  return (value_count * type_count) * std::max(1UL, m_devices.size());
 }
 
 } // namespace nvbench
diff --git a/nvbench/detail/axes_iterator.cuh b/nvbench/detail/axes_iterator.cuh
new file mode 100644
index 00000000..ea13b19d
--- /dev/null
+++ b/nvbench/detail/axes_iterator.cuh
@@ -0,0 +1,113 @@
+/*
+ *  Copyright 2021 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <nvbench/axis_base.cuh>
+#include <nvbench/type_axis.cuh>
+
+#include <functional>
+#include <utility>
+#include <vector>
+
+namespace nvbench
+{
+namespace detail
+{
+
+struct axis_index
+{
+  axis_index() = default;
+
+  explicit axis_index(const axis_base *axi)
+      : index(0)
+      , name(axi->get_name())
+      , type(axi->get_type())
+      , size(axi->get_size())
+      , active_size(axi->get_size())
+  {
+    if (type == nvbench::axis_type::type)
+    {
+      active_size =
+        static_cast<const nvbench::type_axis *>(axi)->get_active_count();
+    }
+  }
+  std::size_t index;
+  std::string name;
+  nvbench::axis_type type;
+  std::size_t size;
+  std::size_t active_size;
+};
+
+struct axis_space_iterator
+{
+  using AdvanceSignature = bool(std::size_t &current_index, std::size_t length);
+  using UpdateSignature  = void(std::size_t index,
+                               std::vector<axis_index> &indices);
+
+  [[nodiscard]] bool inc()
+  {
+    return this->m_advance(m_current_index, m_iteration_size);
+  }
+
+  void update_indices(std::vector<axis_index> &indices) const
+  {
+    this->m_update(m_current_index, indices);
+  }
+
+  std::size_t m_number_of_axes              = 1;
+  std::size_t m_iteration_size              = 1;
+  std::function<AdvanceSignature> m_advance = [](std::size_t &current_index,
+                                                 std::size_t length) {
+    (current_index + 1 == length) ? current_index = 0 : current_index++;
+    return (current_index == 0); // we rolled over
+  };
+  std::function<UpdateSignature> m_update = nullptr;
+
+private:
+  std::size_t m_current_index = 0;
+};
+
+inline axis_space_iterator make_space_iterator(
+  std::size_t axes_count,
+  std::size_t iter_count,
+  std::function<axis_space_iterator::AdvanceSignature> &&advance,
+  std::function<axis_space_iterator::UpdateSignature> &&update)
+{
+  axis_space_iterator iter;
+  iter.m_number_of_axes = axes_count;
+  iter.m_iteration_size = iter_count;
+  iter.m_advance        = std::move(advance);
+  iter.m_update         = std::move(update);
+  return iter;
+}
+
+inline axis_space_iterator make_space_iterator(
+  std::size_t axes_count,
+  std::size_t iter_count,
+  std::function<axis_space_iterator::UpdateSignature> &&update)
+{
+  axis_space_iterator iter;
+  iter.m_number_of_axes = axes_count;
+  iter.m_iteration_size = iter_count;
+  iter.m_update         = std::move(update);
+  return iter;
+}
+
+} // namespace detail
+} // namespace nvbench
diff --git a/nvbench/detail/state_generator.cuh b/nvbench/detail/state_generator.cuh
index 035f61fc..e1e14117 100644
--- a/nvbench/detail/state_generator.cuh
+++ b/nvbench/detail/state_generator.cuh
@@ -20,6 +20,7 @@
 
 #include <nvbench/axes_metadata.cuh>
 #include <nvbench/axis_base.cuh>
+#include <nvbench/detail/axes_iterator.cuh>
 #include <nvbench/state.cuh>
 
 #include <optional>
@@ -59,7 +60,7 @@ private:
 // Usage:
 // ```
 // state_iterator sg;
-// sg.add_axis(...);
+// sg.add_iteration_space(...);
 // for (sg.init(); sg.iter_valid(); sg.next())
 // {
 //   for (const auto& index : sg.get_current_indices())
@@ -73,25 +74,19 @@ private:
 // ```
 struct state_iterator
 {
-  struct axis_index
-  {
-    std::string axis;
-    nvbench::axis_type type;
-    std::size_t index;
-    std::size_t size;
-  };
+  void add_iteration_space(const nvbench::detail::axis_space_iterator &iter);
 
-  void add_axis(const nvbench::axis_base &axis);
-  void add_axis(std::string axis, nvbench::axis_type type, std::size_t size);
   [[nodiscard]] std::size_t get_number_of_states() const;
   void init();
-  [[nodiscard]] const std::vector<axis_index> &get_current_indices() const;
+  [[nodiscard]] std::vector<axis_index> get_current_indices() const;
   [[nodiscard]] bool iter_valid() const;
   void next();
 
-  std::vector<axis_index> m_indices;
-  std::size_t m_current{};
-  std::size_t m_total{};
+  std::vector<axis_space_iterator> m_space;
+  std::size_t m_axes_count        = 0;
+  std::size_t m_current_space     = 0;
+  std::size_t m_current_iteration = 0;
+  std::size_t m_max_iteration     = 1;
 };
 
 } // namespace detail
diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx
index 8c153bff..68803ba2 100644
--- a/nvbench/detail/state_generator.cxx
+++ b/nvbench/detail/state_generator.cxx
@@ -32,66 +32,55 @@
 
 namespace nvbench::detail
 {
-
 // state_iterator ==============================================================
 
-void state_iterator::add_axis(const nvbench::axis_base &axis)
+void state_iterator::add_iteration_space(const nvbench::detail::axis_space_iterator &iter)
 {
-  this->add_axis(axis.get_name(), axis.get_type(), axis.get_size());
-}
+  m_axes_count += iter.m_number_of_axes;
+  m_max_iteration *= iter.m_iteration_size;
 
-void state_iterator::add_axis(std::string axis,
-                              nvbench::axis_type type,
-                              std::size_t size)
-{
-  m_indices.push_back({std::move(axis), type, std::size_t{0}, size});
+  m_space.push_back(std::move(iter));
 }
 
 [[nodiscard]] std::size_t state_iterator::get_number_of_states() const
 {
-  return nvbench::detail::transform_reduce(m_indices.cbegin(),
-                                           m_indices.cend(),
-                                           std::size_t{1},
-                                           std::multiplies<>{},
-                                           [](const axis_index &size_info) {
-                                             return size_info.size;
-                                           });
+  return this->m_max_iteration;
 }
 
 void state_iterator::init()
 {
-  m_current = 0;
-  m_total   = this->get_number_of_states();
-  for (axis_index &entry : m_indices)
-  {
-    entry.index = 0;
-  }
+  m_current_space     = 0;
+  m_current_iteration = 0;
 }
 
-[[nodiscard]] const std::vector<state_iterator::axis_index> &
-state_iterator::get_current_indices() const
+[[nodiscard]] std::vector<axis_index> state_iterator::get_current_indices() const
 {
-  return m_indices;
+  std::vector<axis_index> indices(m_axes_count);
+  for (auto &m : m_space)
+  {
+    m.update_indices(indices);
+  }
+  return indices;
 }
 
 [[nodiscard]] bool state_iterator::iter_valid() const
 {
-  return m_current < m_total;
+  return m_current_iteration < m_max_iteration;
 }
 
 void state_iterator::next()
 {
-  for (axis_index &axis_info : m_indices)
+  m_current_iteration++;
+
+  for (auto &&space : this->m_space)
   {
-    axis_info.index += 1;
-    if (axis_info.index >= axis_info.size)
+    auto rolled_over = space.inc();
+    if (rolled_over)
     {
-      axis_info.index = 0;
-      continue; // carry the addition to the next entry in m_indices
+      continue;
     }
-    break; // done
+    break;
   }
-  m_current += 1;
 }
 
 // state_generator =============================================================
@@ -103,122 +92,91 @@ state_generator::state_generator(const benchmark_base &bench)
 void state_generator::build_axis_configs()
 {
   const axes_metadata &axes = m_benchmark.get_axes();
-  const std::vector<std::unique_ptr<axis_base>> &axes_vec = axes.get_axes();
+  const auto &type_space    = axes.get_type_iteration_space();
+  const auto &value_space   = axes.get_value_iteration_space();
 
-  // Construct two state_generators:
-  // - Only type_axis objects.
-  // - Only non-type axes.
-  state_iterator type_si;
-  state_iterator non_type_si;
+  state_iterator ti;
+  state_iterator vi;
 
-  // state_iterator initialization:
+  // Reverse add type axes by index. This way the state_generator's cartesian
+  // product of the type axes values will be enumerated in the same order as
+  // nvbench::tl::cartesian_product<type_axes>. This is necessary to ensure
+  // that the correct states are passed to the corresponding benchmark
+  // instantiations.
   {
-    // stage the type axes in a vector to allow sorting:
-    std::vector<std::reference_wrapper<const type_axis>> type_axes;
-    type_axes.reserve(axes_vec.size());
-
-    // Filter all axes by into type and non-type:
-    std::for_each(axes_vec.cbegin(),
-                  axes_vec.cend(),
-                  [&non_type_si, &type_axes](const auto &axis) {
-                    if (axis->get_type() == nvbench::axis_type::type)
-                    {
-                      type_axes.push_back(
-                        std::cref(static_cast<const type_axis &>(*axis)));
-                    }
-                    else
-                    {
-                      non_type_si.add_axis(*axis);
-                    }
+    const auto &axes_vec = axes.get_axes();
+    std::for_each(type_space.crbegin(),
+                  type_space.crend(),
+                  [&ti, &axes_vec](const auto &space) {
+                    ti.add_iteration_space(space->iter(axes_vec));
+                  });
+    std::for_each(value_space.begin(),
+                  value_space.end(),
+                  [&vi, &axes_vec](const auto &space) {
+                    vi.add_iteration_space(space->iter(axes_vec));
                   });
-
-    // Reverse sort type axes by index. This way the state_generator's cartesian
-    // product of the type axes values will be enumerated in the same order as
-    // nvbench::tl::cartesian_product<type_axes>. This is necessary to ensure
-    // that the correct states are passed to the corresponding benchmark
-    // instantiations.
-    std::sort(type_axes.begin(),
-              type_axes.end(),
-              [](const auto &axis_1, const auto &axis_2) {
-                return axis_1.get().get_axis_index() >
-                       axis_2.get().get_axis_index();
-              });
-
-    std::for_each(type_axes.cbegin(),
-                  type_axes.cend(),
-                  [&type_si](const auto &axis) { type_si.add_axis(axis); });
   }
 
-  // type_axis_configs generation:
+  m_type_axis_configs.clear();
+  m_type_axis_configs.reserve(ti.get_number_of_states());
+
+  m_non_type_axis_configs.clear();
+  m_non_type_axis_configs.reserve(vi.get_number_of_states());
+
+  for (ti.init(); ti.iter_valid(); ti.next())
   {
-    m_type_axis_configs.clear();
-    m_type_axis_configs.reserve(type_si.get_number_of_states());
+    auto &[config, active_mask] = m_type_axis_configs.emplace_back(
+      std::make_pair(nvbench::named_values{}, true));
 
-    // Build type_axis_configs
-    for (type_si.init(); type_si.iter_valid(); type_si.next())
+    for (const auto &axis_info : ti.get_current_indices())
     {
-      auto &[config, active_mask] = m_type_axis_configs.emplace_back(
-        std::make_pair(nvbench::named_values{}, true));
+      const auto &axis = axes.get_type_axis(axis_info.name);
 
-      // Reverse the indices so they're once again in the same order as
-      // specified:
-      auto indices = type_si.get_current_indices();
-      std::reverse(indices.begin(), indices.end());
+      active_mask &= axis.get_is_active(axis_info.index);
 
-      for (const auto &axis_info : indices)
-      {
-        const auto &axis = axes.get_type_axis(axis_info.axis);
-        if (!axis.get_is_active(axis_info.index))
-        {
-          active_mask = false;
-        }
-
-        config.set_string(axis_info.axis,
-                          axis.get_input_string(axis_info.index));
-      }
-    } // type_si
-  }   // type_axis_config generation
-
-  // non_type_axis_config generation
+      config.set_string(axis.get_name(),
+                        axis.get_input_string(axis_info.index));
+    }
+  }
+  for (vi.init(); vi.iter_valid(); vi.next())
   {
-    m_non_type_axis_configs.clear();
-    m_non_type_axis_configs.reserve(type_si.get_number_of_states());
+    auto &config = m_non_type_axis_configs.emplace_back();
 
-    for (non_type_si.init(); non_type_si.iter_valid(); non_type_si.next())
+    // Add non-type parameters to state:
+    for (const auto &axis_info : vi.get_current_indices())
     {
-      auto &config = m_non_type_axis_configs.emplace_back();
-
-      // Add non-type parameters to state:
-      for (const auto &axis_info : non_type_si.get_current_indices())
+      switch (axis_info.type)
       {
-        switch (axis_info.type)
-        {
-          default:
-          case axis_type::type:
-            assert("unreachable." && false);
-            break;
-
-          case axis_type::int64:
-            config.set_int64(
-              axis_info.axis,
-              axes.get_int64_axis(axis_info.axis).get_value(axis_info.index));
-            break;
-
-          case axis_type::float64:
-            config.set_float64(
-              axis_info.axis,
-              axes.get_float64_axis(axis_info.axis).get_value(axis_info.index));
-            break;
-
-          case axis_type::string:
-            config.set_string(
-              axis_info.axis,
-              axes.get_string_axis(axis_info.axis).get_value(axis_info.index));
-            break;
-        } // switch (type)
-      }   // for (axis_info : current_indices)
-    }     // for non_type_sg configs
-  }       // non_type_axis_config generation
+        default:
+        case axis_type::type:
+          assert("unreachable." && false);
+          break;
+        case axis_type::int64:
+          config.set_int64(
+            axis_info.name,
+            axes.get_int64_axis(axis_info.name).get_value(axis_info.index));
+          break;
+
+        case axis_type::float64:
+          config.set_float64(
+            axis_info.name,
+            axes.get_float64_axis(axis_info.name).get_value(axis_info.index));
+          break;
+
+        case axis_type::string:
+          config.set_string(
+            axis_info.name,
+            axes.get_string_axis(axis_info.name).get_value(axis_info.index));
+          break;
+      } // switch (type)
+    }   // for (axis_info : current_indices)
+  }
+
+  if (m_type_axis_configs.empty())
+  {
+    m_type_axis_configs.emplace_back(
+      std::make_pair(nvbench::named_values{}, true));
+  }
 }
 
 void state_generator::build_states()
@@ -248,7 +206,6 @@ void state_generator::add_states_for_device(
   {
     const auto &[type_config,
                  axis_mask] = m_type_axis_configs[type_config_index];
-
     if (!axis_mask)
     { // Don't generate inner vector if the type config is masked out.
       continue;
diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt
index 4928ebc9..034fcef8 100644
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(test_srcs
   axes_metadata.cu
+  axes_iteration_space.cu
   benchmark.cu
   create.cu
   cuda_timer.cu
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
new file mode 100644
index 00000000..eb3862e9
--- /dev/null
+++ b/testing/axes_iteration_space.cu
@@ -0,0 +1,326 @@
+/*
+ *  Copyright 2021 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/benchmark.cuh>
+
+#include <nvbench/callable.cuh>
+#include <nvbench/named_values.cuh>
+#include <nvbench/state.cuh>
+#include <nvbench/type_list.cuh>
+#include <nvbench/type_strings.cuh>
+#include <nvbench/types.cuh>
+
+#include "test_asserts.cuh"
+
+#include <fmt/format.h>
+
+#include <algorithm>
+#include <utility>
+#include <variant>
+#include <vector>
+
+template <typename T>
+std::vector<T> sort(std::vector<T> &&vec)
+{
+  std::sort(vec.begin(), vec.end());
+  return std::move(vec);
+}
+
+void no_op_generator(nvbench::state &state)
+{
+  fmt::memory_buffer params;
+  fmt::format_to(params, "Params:");
+  const auto &axis_values = state.get_axis_values();
+  for (const auto &name : sort(axis_values.get_names()))
+  {
+    std::visit(
+      [&params, &name](const auto &value) {
+        fmt::format_to(params, " {}: {}", name, value);
+      },
+      axis_values.get_value(name));
+  }
+
+  // Marking as skipped to signal that this state is run:
+  state.skip(fmt::to_string(std::move(params)));
+}
+NVBENCH_DEFINE_CALLABLE(no_op_generator, no_op_callable);
+
+template <typename Integer, typename Float, typename Other>
+void template_no_op_generator(nvbench::state &state,
+                              nvbench::type_list<Integer, Float, Other>)
+{
+  ASSERT(nvbench::type_strings<Integer>::input_string() ==
+         state.get_string("Integer"));
+  ASSERT(nvbench::type_strings<Float>::input_string() ==
+         state.get_string("Float"));
+  ASSERT(nvbench::type_strings<Other>::input_string() ==
+         state.get_string("Other"));
+
+  // Enum params using non-templated version:
+  no_op_generator(state);
+}
+NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator,
+                                 template_no_op_callable);
+
+void test_tie_axes()
+{
+  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  benchmark_type bench;
+  bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
+  bench.add_int64_axis("I64 Axis", {1, 3, 2, 4, 5});
+  bench.tie_axes({"F64 Axis", "I64 Axis"});
+
+  ASSERT_MSG(bench.get_config_count() == 5 * bench.get_devices().size(),
+             "Got {}",
+             bench.get_config_count());
+}
+
+void test_tie_invalid_names()
+{
+  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  benchmark_type bench;
+  bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
+  bench.add_int64_axis("I64 Axis", {1, 3, 2});
+
+  ASSERT_THROWS_ANY(bench.tie_axes({"F32 Axis", "I64 Axis"}));
+  ASSERT_THROWS_ANY(bench.tie_axes({"F32 Axis"}));
+  ASSERT_THROWS_ANY(bench.tie_axes({""}));
+  ASSERT_THROWS_ANY(bench.tie_axes(std::vector<std::string>()));
+}
+
+void test_tie_unequal_length()
+{
+  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  benchmark_type bench;
+  bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
+  bench.add_int64_axis("I64 Axis", {1, 3, 2});
+
+  bench.tie_axes({"I64 Axis", "F64 Axis"});
+  ASSERT_THROWS_ANY(bench.tie_axes({"F64 Axis", "I64 Axis"}));
+}
+
+void test_tie_type_axi()
+{
+  using benchmark_type =
+    nvbench::benchmark<template_no_op_callable,
+                       nvbench::type_list<nvbench::type_list<nvbench::int8_t>,
+                                          nvbench::type_list<nvbench::float32_t>,
+                                          nvbench::type_list<bool>>>;
+  benchmark_type bench;
+  bench.set_type_axes_names({"Integer", "Float", "Other"});
+  bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
+  bench.add_int64_axis("I64 Axis", {1, 3, 2});
+
+  ASSERT_THROWS_ANY(bench.tie_axes({"F64 Axis", "Float"}));
+}
+
+void test_retie_axes()
+{
+  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  benchmark_type bench;
+  bench.add_int64_axis("IAxis_A", {1, 3, 2, 4, 5});
+  bench.add_int64_axis("IAxis_B", {1, 3, 2, 4, 5});
+  bench.add_float64_axis("FAxis_5", {0., .1, .25, .5, 1.});
+  bench.add_float64_axis("FAxis_2",
+                         {
+                           0.,
+                           .1,
+                         });
+
+  bench.tie_axes({"FAxis_5", "IAxis_A"});
+  bench.tie_axes({"IAxis_B", "FAxis_5", "IAxis_A"}); // re-tie
+
+  ASSERT_MSG(bench.get_config_count() == 10 * bench.get_devices().size(),
+             "Got {}",
+             bench.get_config_count());
+
+  bench.tie_axes({"FAxis_5", "IAxis_A"});
+  ASSERT_MSG(bench.get_config_count() == 50 * bench.get_devices().size(),
+             "Got {}",
+             bench.get_config_count());
+}
+
+void test_retie_axes2()
+{
+  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  benchmark_type bench;
+  bench.add_int64_axis("IAxis_A", {1, 3, 2, 4, 5});
+  bench.add_int64_axis("IAxis_B", {1, 3, 2, 4, 5});
+  bench.add_int64_axis("IAxis_C", {1, 3, 2, 4, 5});
+  bench.add_float64_axis("FAxis_1", {0., .1, .25, .5, 1.});
+  bench.add_float64_axis("FAxis_2", {0., .1, .25, .5, 1.});
+  bench.add_float64_axis("FAxis_3",
+                         {
+                           0.,
+                           .1,
+                         });
+
+  bench.tie_axes({"IAxis_A", "IAxis_B", "IAxis_C"});
+  bench.tie_axes({"FAxis_1", "FAxis_2"});
+  bench.tie_axes(
+    {"IAxis_A", "IAxis_B", "IAxis_C", "FAxis_1", "FAxis_2"}); // re-tie
+
+  ASSERT_MSG(bench.get_config_count() == 10 * bench.get_devices().size(),
+             "Got {}",
+             bench.get_config_count());
+
+  bench.tie_axes({"IAxis_A", "IAxis_B", "IAxis_C"});
+  bench.tie_axes({"FAxis_1", "FAxis_2"});
+  ASSERT_MSG(bench.get_config_count() == 50 * bench.get_devices().size(),
+             "Got {}",
+             bench.get_config_count());
+}
+
+void test_tie_clone()
+{
+  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  benchmark_type bench;
+  bench.set_devices(std::vector<int>{});
+  bench.add_string_axis("Strings", {"string a", "string b", "string c"});
+  bench.add_int64_power_of_two_axis("I64 POT Axis", {10, 20});
+  bench.add_int64_axis("I64 Axis", {10, 20});
+  bench.add_float64_axis("F64 Axis", {0., .1, .25});
+  bench.tie_axes({"F64 Axis", "Strings"});
+
+  const auto expected_count = bench.get_config_count();
+
+  std::unique_ptr<nvbench::benchmark_base> clone_base = bench.clone();
+  ASSERT(clone_base.get() != nullptr);
+
+  ASSERT_MSG(expected_count == clone_base->get_config_count(),
+             "Got {}",
+             clone_base->get_config_count());
+
+  auto *clone = dynamic_cast<benchmark_type *>(clone_base.get());
+  ASSERT(clone != nullptr);
+
+  ASSERT(bench.get_name() == clone->get_name());
+
+  const auto &ref_axes   = bench.get_axes().get_axes();
+  const auto &clone_axes = clone->get_axes().get_axes();
+  ASSERT(ref_axes.size() == clone_axes.size());
+  for (std::size_t i = 0; i < ref_axes.size(); ++i)
+  {
+    const nvbench::axis_base *ref_axis   = ref_axes[i].get();
+    const nvbench::axis_base *clone_axis = clone_axes[i].get();
+    ASSERT(ref_axis != nullptr);
+    ASSERT(clone_axis != nullptr);
+    ASSERT(ref_axis->get_name() == clone_axis->get_name());
+    ASSERT(ref_axis->get_type() == clone_axis->get_type());
+    ASSERT(ref_axis->get_size() == clone_axis->get_size());
+    for (std::size_t j = 0; j < ref_axis->get_size(); ++j)
+    {
+      ASSERT(ref_axis->get_input_string(j) == clone_axis->get_input_string(j));
+      ASSERT(ref_axis->get_description(j) == clone_axis->get_description(j));
+    }
+  }
+
+  ASSERT(clone->get_states().empty());
+}
+
+struct under_diag final : nvbench::user_axis_space
+{
+  under_diag(std::vector<std::size_t> input_indices,
+             std::vector<std::size_t> output_indices)
+      : nvbench::user_axis_space(std::move(input_indices), std::move(output_indices))
+  {}
+
+  mutable std::size_t x_pos   = 0;
+  mutable std::size_t y_pos   = 0;
+  mutable std::size_t x_start = 0;
+
+  nvbench::detail::axis_space_iterator do_iter(axes_info info) const
+  {
+    // generate our increment function
+    auto adv_func = [&, info](std::size_t &inc_index,
+                              std::size_t /*len*/) -> bool {
+      inc_index++;
+      x_pos++;
+      if (x_pos == info[0].size)
+      {
+        x_pos = ++x_start;
+        y_pos = x_start;
+        return true;
+      }
+      return false;
+    };
+
+    // our update function
+    std::vector<std::size_t> locs = m_output_indices;
+    auto diag_under =
+      [&, locs, info](std::size_t,
+                      std::vector<nvbench::detail::axis_index> &indices) {
+        nvbench::detail::axis_index temp = info[0];
+        temp.index                       = x_pos;
+        indices[locs[0]]                 = temp;
+
+        temp             = info[1];
+        temp.index       = y_pos;
+        indices[locs[1]] = temp;
+      };
+
+    const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
+    return nvbench::detail::make_space_iterator(2,
+                                                iteration_length,
+                                                adv_func,
+                                                diag_under);
+  }
+
+  std::size_t do_size(const axes_info &info) const
+  {
+    return ((info[0].size * (info[1].size + 1)) / 2);
+  }
+
+  std::size_t do_valid_count(const axes_info &info) const
+  {
+    return ((info[0].size * (info[1].size + 1)) / 2);
+  }
+
+  std::unique_ptr<nvbench::axis_space_base> do_clone() const
+  {
+    return std::make_unique<under_diag>(*this);
+  }
+};
+
+void test_user_axes()
+{
+  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  benchmark_type bench;
+  bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
+  bench.add_int64_axis("I64 Axis", {1, 3, 2, 4, 5});
+  bench.user_iteration_axes(
+    {"F64 Axis", "I64 Axis"},
+    [](auto... args) -> std::unique_ptr<nvbench::axis_space_base> {
+      return std::make_unique<under_diag>(args...);
+    });
+
+  ASSERT_MSG(bench.get_config_count() == 15 * bench.get_devices().size(),
+             "Got {}",
+             bench.get_config_count());
+}
+
+int main()
+{
+  test_tie_axes();
+  test_tie_invalid_names();
+  test_tie_unequal_length();
+  test_tie_type_axi();
+  test_retie_axes();
+  test_retie_axes2();
+  test_tie_clone();
+}
diff --git a/testing/benchmark.cu b/testing/benchmark.cu
index 71ffe033..7bc759bc 100644
--- a/testing/benchmark.cu
+++ b/testing/benchmark.cu
@@ -296,10 +296,9 @@ void test_get_config_count()
   bench.add_float64_axis("foo", {0.4, 2.3, 4.3});                      // 3, 12
   bench.add_int64_axis("bar", {4, 6, 15});                             // 3, 36
   bench.add_string_axis("baz", {"str", "ing"});                        // 2, 72
-  bench.add_string_axis("baz", {"single"});                            // 1, 72
+  bench.add_string_axis("fez", {"single"});                            // 1, 72
 
   auto const num_devices = bench.get_devices().size();
-
   ASSERT_MSG(bench.get_config_count() == 72 * num_devices,
              "Got {}",
              bench.get_config_count());
diff --git a/testing/option_parser.cu b/testing/option_parser.cu
index 9d7e6a9c..26dcc378 100644
--- a/testing/option_parser.cu
+++ b/testing/option_parser.cu
@@ -25,6 +25,8 @@
 
 #include <fmt/format.h>
 
+#include <iostream>
+
 //==============================================================================
 // Declare a couple benchmarks for testing:
 void DummyBench(nvbench::state &state) { state.skip("Skipping for testing."); }
@@ -96,6 +98,7 @@ states_to_string(const std::vector<nvbench::state> &states)
   ASSERT(bench != nullptr);
 
   bench->run();
+  std::cout << bench->get_config_count() << std::endl;
 
   return bench->get_states();
 }
diff --git a/testing/state_generator.cu b/testing/state_generator.cu
index cb584be5..16e442fe 100644
--- a/testing/state_generator.cu
+++ b/testing/state_generator.cu
@@ -56,12 +56,18 @@ void test_single_state()
 {
   // one single-value axis = one state
   nvbench::detail::state_iterator sg;
-  sg.add_axis("OnlyAxis", nvbench::axis_type::string, 1);
+  nvbench::string_axis si("OnlyAxis");
+  si.set_inputs({""});
+
+  std::vector<std::unique_ptr<nvbench::axis_base>> axes;
+  axes.push_back(std::make_unique<nvbench::string_axis>(si));
+
+  sg.add_iteration_space(nvbench::linear_axis_space{0, 0}.iter(axes));
   ASSERT(sg.get_number_of_states() == 1);
   sg.init();
   ASSERT(sg.iter_valid());
   ASSERT(sg.get_current_indices().size() == 1);
-  ASSERT(sg.get_current_indices()[0].axis == "OnlyAxis");
+  ASSERT(sg.get_current_indices()[0].name == "OnlyAxis");
   ASSERT(sg.get_current_indices()[0].index == 0);
   ASSERT(sg.get_current_indices()[0].size == 1);
   ASSERT(sg.get_current_indices()[0].type == nvbench::axis_type::string);
@@ -73,10 +79,27 @@ void test_single_state()
 void test_basic()
 {
   nvbench::detail::state_iterator sg;
-  sg.add_axis("Axis1", nvbench::axis_type::string, 2);
-  sg.add_axis("Axis2", nvbench::axis_type::string, 3);
-  sg.add_axis("Axis3", nvbench::axis_type::string, 3);
-  sg.add_axis("Axis4", nvbench::axis_type::string, 2);
+
+  nvbench::string_axis si1("Axis1");
+  nvbench::string_axis si2("Axis2");
+  nvbench::string_axis si3("Axis3");
+  nvbench::string_axis si4("Axis4");
+
+  si1.set_inputs({"", ""});
+  si2.set_inputs({"", "", ""});
+  si3.set_inputs({"", "", ""});
+  si4.set_inputs({"", ""});
+
+  std::vector<std::unique_ptr<nvbench::axis_base>> axes;
+  axes.emplace_back(std::make_unique<nvbench::string_axis>(si1));
+  axes.emplace_back(std::make_unique<nvbench::string_axis>(si2));
+  axes.emplace_back(std::make_unique<nvbench::string_axis>(si3));
+  axes.emplace_back(std::make_unique<nvbench::string_axis>(si4));
+
+  sg.add_iteration_space(nvbench::linear_axis_space{0, 0}.iter(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{1, 1}.iter(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{2, 2}.iter(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{3, 3}.iter(axes));
 
   ASSERT_MSG(sg.get_number_of_states() == (2 * 3 * 3 * 2),
              "Actual: {} Expected: {}",
@@ -95,7 +118,7 @@ void test_basic()
       ASSERT(axis_index.type == nvbench::axis_type::string);
       fmt::format_to(line,
                      " | {}: {}/{}",
-                     axis_index.axis,
+                     axis_index.name,
                      axis_index.index,
                      axis_index.size);
     }

From a25f57889190d8f918b2b46a33cc4b70b1c4f0b9 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 28 Feb 2022 12:26:59 -0500
Subject: [PATCH 02/41] Rename tie_axes to zip_axes

---
 docs/benchmarks.md                  |  3 +-
 examples/custom_iteration_spaces.cu |  2 +-
 nvbench/axes_metadata.cuh           |  2 +-
 nvbench/axes_metadata.cxx           |  6 ++--
 nvbench/axis_iteration_space.cuh    |  6 ++--
 nvbench/axis_iteration_space.cxx    | 14 ++++-----
 nvbench/benchmark_base.cuh          |  4 +--
 testing/axes_iteration_space.cu     | 46 ++++++++++++++---------------
 8 files changed, 42 insertions(+), 41 deletions(-)

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index 71a9dca8..a8b4ac88 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -260,7 +260,8 @@ using output_types = nvbench::type_list<float, double>;
 NVBENCH_BENCH_TYPES(benchmark, NVBENCH_TYPE_AXES(input_types, output_types))
   .set_type_axes_names({"InputType", "OutputType"})
   .add_int64_axis("NumInputs", {1000, 10000, 100000, 200000, 200000, 200000})
-  .add_float64_axis("Quality", {0.05, 0.1, 0.25, 0.5, 0.75, 1.});
+  .add_float64_axis("Quality", {0.05, 0.1, 0.25, 0.5, 0.75, 1.})
+  .zip_axes({"NumInputs", "Quality"});
 ```
 
 This tieing reduces the total combinations from 24 to 6, reducing the
diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index 92323fd9..c733890b 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -74,7 +74,7 @@ NVBENCH_BENCH(tied_copy_sweep_grid_shape)
   // Every power of two from  64->1024:
   .add_int64_axis("BlockSize", {32,64,128,256})
   .add_int64_axis("NumBlocks", {1024,512,256,128})
-  .tie_axes({"BlockSize", "NumBlocks"});
+  .zip_axes({"BlockSize", "NumBlocks"});
 
 //==============================================================================
 // under_diag:
diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index cb14e97e..053ebe6b 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -62,7 +62,7 @@ struct axes_metadata
 
   void add_string_axis(std::string name, std::vector<std::string> data);
 
-  void tie_axes(std::vector<std::string> names);
+  void zip_axes(std::vector<std::string> names);
 
   void
   user_iteration_axes(std::vector<std::string> names,
diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx
index 9dd679d4..c39fed92 100644
--- a/nvbench/axes_metadata.cxx
+++ b/nvbench/axes_metadata.cxx
@@ -238,12 +238,12 @@ void reset_iteration_space(
 }
 } // namespace
 
-void axes_metadata::tie_axes(std::vector<std::string> names)
+void axes_metadata::zip_axes(std::vector<std::string> names)
 {
   NVBENCH_THROW_IF((names.size() < 2),
                    std::runtime_error,
                    "At least two axi names ( {} provided ) need to be provided "
-                   "when using tie_axes.",
+                   "when using zip_axes.",
                    names.size());
 
   // compute the numeric indice for each name we have
@@ -269,7 +269,7 @@ void axes_metadata::tie_axes(std::vector<std::string> names)
   reset_iteration_space(m_value_space, input_indices);
 
   // add the new tied iteration space
-  auto tied = std::make_unique<tie_axis_space>(std::move(input_indices),
+  auto tied = std::make_unique<zip_axis_space>(std::move(input_indices),
                                                std::move(output_indices));
   m_value_space.push_back(std::move(tied));
 }
diff --git a/nvbench/axis_iteration_space.cuh b/nvbench/axis_iteration_space.cuh
index 7c045be0..a17634e5 100644
--- a/nvbench/axis_iteration_space.cuh
+++ b/nvbench/axis_iteration_space.cuh
@@ -67,11 +67,11 @@ struct linear_axis_space final : axis_space_base
   std::size_t do_valid_count(const axes_info &info) const override;
 };
 
-struct tie_axis_space final : axis_space_base
+struct zip_axis_space final : axis_space_base
 {
-  tie_axis_space(std::vector<std::size_t> input_indices,
+  zip_axis_space(std::vector<std::size_t> input_indices,
       std::vector<std::size_t> output_indices);
-  ~tie_axis_space();
+  ~zip_axis_space();
 
   std::unique_ptr<axis_space_base> do_clone() const override;
   detail::axis_space_iterator do_iter(axes_info info) const override;
diff --git a/nvbench/axis_iteration_space.cxx b/nvbench/axis_iteration_space.cxx
index 2f931398..885a1ea2 100644
--- a/nvbench/axis_iteration_space.cxx
+++ b/nvbench/axis_iteration_space.cxx
@@ -127,14 +127,14 @@ std::unique_ptr<axis_space_base> linear_axis_space::do_clone() const
   return std::make_unique<linear_axis_space>(*this);
 }
 
-tie_axis_space::tie_axis_space(std::vector<std::size_t> input_indices,
+zip_axis_space::zip_axis_space(std::vector<std::size_t> input_indices,
                                std::vector<std::size_t> output_indices)
     : axis_space_base(std::move(input_indices), std::move(output_indices))
 {}
 
-tie_axis_space::~tie_axis_space() = default;
+zip_axis_space::~zip_axis_space() = default;
 
-detail::axis_space_iterator tie_axis_space::do_iter(axes_info info) const
+detail::axis_space_iterator zip_axis_space::do_iter(axes_info info) const
 {
   std::vector<std::size_t> locs = m_output_indices;
   auto update_func              = [=](std::size_t inc_index,
@@ -150,19 +150,19 @@ detail::axis_space_iterator tie_axis_space::do_iter(axes_info info) const
   return detail::make_space_iterator(locs.size(), info[0].size, update_func);
 }
 
-std::size_t tie_axis_space::do_size(const axes_info &info) const
+std::size_t zip_axis_space::do_size(const axes_info &info) const
 {
   return info[0].size;
 }
 
-std::size_t tie_axis_space::do_valid_count(const axes_info &info) const
+std::size_t zip_axis_space::do_valid_count(const axes_info &info) const
 {
   return info[0].active_size;
 }
 
-std::unique_ptr<axis_space_base> tie_axis_space::do_clone() const
+std::unique_ptr<axis_space_base> zip_axis_space::do_clone() const
 {
-  return std::make_unique<tie_axis_space>(*this);
+  return std::make_unique<zip_axis_space>(*this);
 }
 
 user_axis_space::user_axis_space(std::vector<std::size_t> input_indices,
diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh
index 94908d12..61269e18 100644
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -111,9 +111,9 @@ struct benchmark_base
     return *this;
   }
 
-  benchmark_base &tie_axes(std::vector<std::string> names)
+  benchmark_base &zip_axes(std::vector<std::string> names)
   {
-    m_axes.tie_axes(std::move(names));
+    m_axes.zip_axes(std::move(names));
     return *this;
   }
 
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index eb3862e9..fca5757f 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -77,13 +77,13 @@ void template_no_op_generator(nvbench::state &state,
 NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator,
                                  template_no_op_callable);
 
-void test_tie_axes()
+void test_zip_axes()
 {
   using benchmark_type = nvbench::benchmark<no_op_callable>;
   benchmark_type bench;
   bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
   bench.add_int64_axis("I64 Axis", {1, 3, 2, 4, 5});
-  bench.tie_axes({"F64 Axis", "I64 Axis"});
+  bench.zip_axes({"F64 Axis", "I64 Axis"});
 
   ASSERT_MSG(bench.get_config_count() == 5 * bench.get_devices().size(),
              "Got {}",
@@ -97,10 +97,10 @@ void test_tie_invalid_names()
   bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
   bench.add_int64_axis("I64 Axis", {1, 3, 2});
 
-  ASSERT_THROWS_ANY(bench.tie_axes({"F32 Axis", "I64 Axis"}));
-  ASSERT_THROWS_ANY(bench.tie_axes({"F32 Axis"}));
-  ASSERT_THROWS_ANY(bench.tie_axes({""}));
-  ASSERT_THROWS_ANY(bench.tie_axes(std::vector<std::string>()));
+  ASSERT_THROWS_ANY(bench.zip_axes({"F32 Axis", "I64 Axis"}));
+  ASSERT_THROWS_ANY(bench.zip_axes({"F32 Axis"}));
+  ASSERT_THROWS_ANY(bench.zip_axes({""}));
+  ASSERT_THROWS_ANY(bench.zip_axes(std::vector<std::string>()));
 }
 
 void test_tie_unequal_length()
@@ -110,8 +110,8 @@ void test_tie_unequal_length()
   bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
   bench.add_int64_axis("I64 Axis", {1, 3, 2});
 
-  bench.tie_axes({"I64 Axis", "F64 Axis"});
-  ASSERT_THROWS_ANY(bench.tie_axes({"F64 Axis", "I64 Axis"}));
+  bench.zip_axes({"I64 Axis", "F64 Axis"});
+  ASSERT_THROWS_ANY(bench.zip_axes({"F64 Axis", "I64 Axis"}));
 }
 
 void test_tie_type_axi()
@@ -126,10 +126,10 @@ void test_tie_type_axi()
   bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
   bench.add_int64_axis("I64 Axis", {1, 3, 2});
 
-  ASSERT_THROWS_ANY(bench.tie_axes({"F64 Axis", "Float"}));
+  ASSERT_THROWS_ANY(bench.zip_axes({"F64 Axis", "Float"}));
 }
 
-void test_retie_axes()
+void test_rezip_axes()
 {
   using benchmark_type = nvbench::benchmark<no_op_callable>;
   benchmark_type bench;
@@ -142,20 +142,20 @@ void test_retie_axes()
                            .1,
                          });
 
-  bench.tie_axes({"FAxis_5", "IAxis_A"});
-  bench.tie_axes({"IAxis_B", "FAxis_5", "IAxis_A"}); // re-tie
+  bench.zip_axes({"FAxis_5", "IAxis_A"});
+  bench.zip_axes({"IAxis_B", "FAxis_5", "IAxis_A"}); // re-tie
 
   ASSERT_MSG(bench.get_config_count() == 10 * bench.get_devices().size(),
              "Got {}",
              bench.get_config_count());
 
-  bench.tie_axes({"FAxis_5", "IAxis_A"});
+  bench.zip_axes({"FAxis_5", "IAxis_A"});
   ASSERT_MSG(bench.get_config_count() == 50 * bench.get_devices().size(),
              "Got {}",
              bench.get_config_count());
 }
 
-void test_retie_axes2()
+void test_rezip_axes2()
 {
   using benchmark_type = nvbench::benchmark<no_op_callable>;
   benchmark_type bench;
@@ -170,17 +170,17 @@ void test_retie_axes2()
                            .1,
                          });
 
-  bench.tie_axes({"IAxis_A", "IAxis_B", "IAxis_C"});
-  bench.tie_axes({"FAxis_1", "FAxis_2"});
-  bench.tie_axes(
+  bench.zip_axes({"IAxis_A", "IAxis_B", "IAxis_C"});
+  bench.zip_axes({"FAxis_1", "FAxis_2"});
+  bench.zip_axes(
     {"IAxis_A", "IAxis_B", "IAxis_C", "FAxis_1", "FAxis_2"}); // re-tie
 
   ASSERT_MSG(bench.get_config_count() == 10 * bench.get_devices().size(),
              "Got {}",
              bench.get_config_count());
 
-  bench.tie_axes({"IAxis_A", "IAxis_B", "IAxis_C"});
-  bench.tie_axes({"FAxis_1", "FAxis_2"});
+  bench.zip_axes({"IAxis_A", "IAxis_B", "IAxis_C"});
+  bench.zip_axes({"FAxis_1", "FAxis_2"});
   ASSERT_MSG(bench.get_config_count() == 50 * bench.get_devices().size(),
              "Got {}",
              bench.get_config_count());
@@ -195,7 +195,7 @@ void test_tie_clone()
   bench.add_int64_power_of_two_axis("I64 POT Axis", {10, 20});
   bench.add_int64_axis("I64 Axis", {10, 20});
   bench.add_float64_axis("F64 Axis", {0., .1, .25});
-  bench.tie_axes({"F64 Axis", "Strings"});
+  bench.zip_axes({"F64 Axis", "Strings"});
 
   const auto expected_count = bench.get_config_count();
 
@@ -316,11 +316,11 @@ void test_user_axes()
 
 int main()
 {
-  test_tie_axes();
+  test_zip_axes();
   test_tie_invalid_names();
   test_tie_unequal_length();
   test_tie_type_axi();
-  test_retie_axes();
-  test_retie_axes2();
+  test_rezip_axes();
+  test_rezip_axes2();
   test_tie_clone();
 }

From c3c86e1f2684f6560e48cda52ddfbe7de1fa54b0 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 28 Feb 2022 16:39:38 -0500
Subject: [PATCH 03/41] implement easier API to add axis and zip/user iteration
 at the same time

---
 examples/custom_iteration_spaces.cu | 48 +++++++++++++----------------
 nvbench/axes_metadata.cuh           | 18 +++++++++++
 nvbench/axes_metadata.cxx           | 26 +++++-----------
 nvbench/benchmark_base.cuh          | 14 +++++++++
 nvbench/float64_axis.cuh            |  5 +++
 nvbench/int64_axis.cuh              |  4 +++
 nvbench/int64_axis.cxx              | 47 +++++++++++++++++++++-------
 nvbench/string_axis.cuh             |  5 +++
 testing/axes_iteration_space.cu     | 21 ++++++-------
 9 files changed, 122 insertions(+), 66 deletions(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index c733890b..ced264d1 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -72,9 +72,8 @@ void tied_copy_sweep_grid_shape(nvbench::state &state)
 }
 NVBENCH_BENCH(tied_copy_sweep_grid_shape)
   // Every power of two from  64->1024:
-  .add_int64_axis("BlockSize", {32,64,128,256})
-  .add_int64_axis("NumBlocks", {1024,512,256,128})
-  .zip_axes({"BlockSize", "NumBlocks"});
+  .add_zip_axes(nvbench::int64_axis{"BlockSize", {32, 64, 128, 256}},
+                nvbench::int64_axis{"NumBlocks", {1024, 512, 256, 128}});
 
 //==============================================================================
 // under_diag:
@@ -89,7 +88,8 @@ struct under_diag final : nvbench::user_axis_space
 {
   under_diag(std::vector<std::size_t> input_indices,
              std::vector<std::size_t> output_indices)
-      : nvbench::user_axis_space(std::move(input_indices), std::move(output_indices))
+      : nvbench::user_axis_space(std::move(input_indices),
+                                 std::move(output_indices))
   {}
 
   mutable std::size_t x_pos   = 0;
@@ -154,15 +154,12 @@ void user_copy_sweep_grid_shape(nvbench::state &state)
   copy_sweep_grid_shape(state);
 }
 NVBENCH_BENCH(user_copy_sweep_grid_shape)
-  // Every power of two from  64->1024:
-  .add_int64_power_of_two_axis("BlockSize", nvbench::range(6, 10))
-  .add_int64_power_of_two_axis("NumBlocks", nvbench::range(6, 10))
-  .user_iteration_axes({"NumBlocks", "BlockSize"},
-                       [](auto... args)
-                         -> std::unique_ptr<nvbench::axis_space_base> {
-                         return std::make_unique<under_diag>(args...);
-                       });
-
+  .add_user_iteration_axes(
+    [](auto... args) -> std::unique_ptr<nvbench::axis_space_base> {
+      return std::make_unique<under_diag>(args...);
+    },
+    nvbench::int64_axis("BlockSize", {64, 128, 256, 512, 1024}),
+    nvbench::int64_axis("NumBlocks", {1024, 521, 256, 128, 64}));
 
 //==============================================================================
 // gauss:
@@ -174,7 +171,8 @@ struct gauss final : nvbench::user_axis_space
 
   gauss(std::vector<std::size_t> input_indices,
         std::vector<std::size_t> output_indices)
-      : nvbench::user_axis_space(std::move(input_indices), std::move(output_indices))
+      : nvbench::user_axis_space(std::move(input_indices),
+                                 std::move(output_indices))
   {}
 
   nvbench::detail::axis_space_iterator do_iter(axes_info info) const
@@ -233,15 +231,13 @@ void dual_float64_axis(nvbench::state &state)
   });
 }
 NVBENCH_BENCH(dual_float64_axis)
-  .add_float64_axis("Duration_A", nvbench::range(0., 1e-4, 1e-5))
-  .add_float64_axis("Duration_B", nvbench::range(0., 1e-4, 1e-5))
-  .user_iteration_axes({"Duration_A"},
-                       [](auto... args)
-                         -> std::unique_ptr<nvbench::axis_space_base> {
-                         return std::make_unique<gauss>(args...);
-                       })
-  .user_iteration_axes({"Duration_B"},
-                       [](auto... args)
-                         -> std::unique_ptr<nvbench::axis_space_base> {
-                         return std::make_unique<gauss>(args...);
-                       });
+  .add_user_iteration_axes(
+    [](auto... args) -> std::unique_ptr<nvbench::axis_space_base> {
+      return std::make_unique<gauss>(args...);
+    },
+    nvbench::float64_axis("Duration_A", nvbench::range(0., 1e-4, 1e-5)))
+  .add_user_iteration_axes(
+    [](auto... args) -> std::unique_ptr<nvbench::axis_space_base> {
+      return std::make_unique<gauss>(args...);
+    },
+    nvbench::float64_axis("Duration_B", nvbench::range(0., 1e-4, 1e-5)));
diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index 053ebe6b..ff5adaca 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -62,6 +62,24 @@ struct axes_metadata
 
   void add_string_axis(std::string name, std::vector<std::string> data);
 
+  void add_axis(const axis_base& axis);
+
+  template <typename... Args>
+  void add_zip_axes(Args &&...args)
+  {
+    (this->add_axis(std::forward<Args>(args)), ...);
+    this->zip_axes({args.get_name()...});
+  }
+
+  template <typename... Args>
+  void add_user_iteration_axes(
+    std::function<nvbench::make_user_space_signature> make,
+    Args &&...args)
+  {
+    (this->add_axis(std::forward<Args>(args)), ...);
+    this->user_iteration_axes({args.get_name()...}, std::move(make));
+  }
+
   void zip_axes(std::vector<std::string> names);
 
   void
diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx
index c39fed92..7e758bd5 100644
--- a/nvbench/axes_metadata.cxx
+++ b/nvbench/axes_metadata.cxx
@@ -117,38 +117,28 @@ catch (std::exception &e)
 void axes_metadata::add_float64_axis(std::string name,
                                      std::vector<nvbench::float64_t> data)
 {
-  m_value_space.push_back(
-    std::make_unique<linear_axis_space>(m_axes.size(),
-                                        m_axes.size() - m_type_axe_count));
-
-  auto axis = std::make_unique<nvbench::float64_axis>(std::move(name));
-  axis->set_inputs(std::move(data));
-  m_axes.push_back(std::move(axis));
+  this->add_axis(nvbench::float64_axis{name,data});
 }
 
 void axes_metadata::add_int64_axis(std::string name,
                                    std::vector<nvbench::int64_t> data,
                                    nvbench::int64_axis_flags flags)
 {
-  m_value_space.push_back(
-    std::make_unique<linear_axis_space>(m_axes.size(),
-                                        m_axes.size() - m_type_axe_count));
-
-  auto axis = std::make_unique<nvbench::int64_axis>(std::move(name));
-  axis->set_inputs(std::move(data), flags);
-  m_axes.push_back(std::move(axis));
+  this->add_axis(nvbench::int64_axis{name,data,flags});
 }
 
 void axes_metadata::add_string_axis(std::string name,
                                     std::vector<std::string> data)
+{
+  this->add_axis(nvbench::string_axis{name,data});
+}
+
+void axes_metadata::add_axis(const axis_base& axis)
 {
   m_value_space.push_back(
     std::make_unique<linear_axis_space>(m_axes.size(),
                                         m_axes.size() - m_type_axe_count));
-
-  auto axis = std::make_unique<nvbench::string_axis>(std::move(name));
-  axis->set_inputs(std::move(data));
-  m_axes.push_back(std::move(axis));
+  m_axes.push_back(axis.clone());
 }
 
 namespace
diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh
index 61269e18..d1c7a3fa 100644
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -111,12 +111,26 @@ struct benchmark_base
     return *this;
   }
 
+  template<typename... Args>
+  benchmark_base &add_zip_axes(Args&&... args)
+  {
+    m_axes.add_zip_axes(std::forward<Args>(args)...);
+    return *this;
+  }
+
   benchmark_base &zip_axes(std::vector<std::string> names)
   {
     m_axes.zip_axes(std::move(names));
     return *this;
   }
 
+  template<typename... Args>
+  benchmark_base &add_user_iteration_axes(Args&&... args)
+  {
+    m_axes.add_user_iteration_axes(std::forward<Args>(args)...);
+    return *this;
+  }
+
   benchmark_base &
   user_iteration_axes(std::vector<std::string> names,
                       std::function<nvbench::make_user_space_signature> make)
diff --git a/nvbench/float64_axis.cuh b/nvbench/float64_axis.cuh
index 0d606512..b9bcdc8d 100644
--- a/nvbench/float64_axis.cuh
+++ b/nvbench/float64_axis.cuh
@@ -34,6 +34,11 @@ struct float64_axis final : public axis_base
       , m_values{}
   {}
 
+  explicit float64_axis(std::string name, std::vector<nvbench::float64_t> inputs)
+      : axis_base{std::move(name), axis_type::float64}
+      , m_values{std::move(inputs)}
+  {}
+
   ~float64_axis() final;
 
   void set_inputs(std::vector<nvbench::float64_t> inputs)
diff --git a/nvbench/int64_axis.cuh b/nvbench/int64_axis.cuh
index a6cec2e3..08d66867 100644
--- a/nvbench/int64_axis.cuh
+++ b/nvbench/int64_axis.cuh
@@ -51,6 +51,10 @@ struct int64_axis final : public axis_base
       , m_flags{int64_axis_flags::none}
   {}
 
+  explicit int64_axis(std::string name,
+                      std::vector<int64_t> inputs,
+                      int64_axis_flags flags = int64_axis_flags::none);
+
   ~int64_axis() final;
 
   [[nodiscard]] bool is_power_of_two() const
diff --git a/nvbench/int64_axis.cxx b/nvbench/int64_axis.cxx
index 24ff913d..271f93ce 100644
--- a/nvbench/int64_axis.cxx
+++ b/nvbench/int64_axis.cxx
@@ -26,23 +26,24 @@
 #include <stdexcept>
 #include <vector>
 
-namespace nvbench
+namespace
 {
 
-int64_axis::~int64_axis() = default;
-
-void int64_axis::set_inputs(std::vector<int64_t> inputs, int64_axis_flags flags)
+std::vector<nvbench::int64_t>
+construct_values(nvbench::int64_axis_flags flags,
+                 const std::vector<nvbench::int64_t> &inputs)
 {
-  m_inputs = std::move(inputs);
-  m_flags  = flags;
 
-  if (!this->is_power_of_two())
+  std::vector<int64_t> values;
+  const bool is_power_of_two =
+    static_cast<bool>(flags & nvbench::int64_axis_flags::power_of_two);
+  if (!is_power_of_two)
   {
-    m_values = m_inputs;
+    values = inputs;
   }
   else
   {
-    m_values.resize(m_inputs.size());
+    values.resize(inputs.size());
 
     auto conv = [](int64_t in) -> int64_t {
       if (in < 0 || in >= 64)
@@ -52,11 +53,35 @@ void int64_axis::set_inputs(std::vector<int64_t> inputs, int64_axis_flags flags)
                       "Input={} ValidRange=[0, 63]",
                       in);
       }
-      return int64_axis::compute_pow2(in);
+      return nvbench::int64_axis::compute_pow2(in);
     };
 
-    std::transform(m_inputs.cbegin(), m_inputs.cend(), m_values.begin(), conv);
+    std::transform(inputs.cbegin(), inputs.cend(), values.begin(), conv);
   }
+
+  return values;
+}
+} // namespace
+
+namespace nvbench
+{
+
+int64_axis::int64_axis(std::string name,
+                       std::vector<int64_t> inputs,
+                       int64_axis_flags flags)
+    : axis_base{std::move(name), axis_type::int64}
+    , m_inputs{std::move(inputs)}
+    , m_values{construct_values(flags, m_inputs)}
+    , m_flags{flags}
+{}
+
+int64_axis::~int64_axis() = default;
+
+void int64_axis::set_inputs(std::vector<int64_t> inputs, int64_axis_flags flags)
+{
+  m_inputs = std::move(inputs);
+  m_flags  = flags;
+  m_values = construct_values(flags, m_inputs);
 }
 
 std::string int64_axis::do_get_input_string(std::size_t i) const
diff --git a/nvbench/string_axis.cuh b/nvbench/string_axis.cuh
index 2f526e7a..d2a3bde1 100644
--- a/nvbench/string_axis.cuh
+++ b/nvbench/string_axis.cuh
@@ -34,6 +34,11 @@ struct string_axis final : public axis_base
       , m_values{}
   {}
 
+  explicit string_axis(std::string name, std::vector<std::string> inputs)
+      : axis_base{std::move(name), axis_type::string}
+      , m_values{std::move(inputs)}
+  {}
+
   ~string_axis() final;
 
   void set_inputs(std::vector<std::string> inputs)
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index fca5757f..4e9ec93f 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -81,9 +81,8 @@ void test_zip_axes()
 {
   using benchmark_type = nvbench::benchmark<no_op_callable>;
   benchmark_type bench;
-  bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
-  bench.add_int64_axis("I64 Axis", {1, 3, 2, 4, 5});
-  bench.zip_axes({"F64 Axis", "I64 Axis"});
+  bench.add_zip_axes(nvbench::float64_axis("F64 Axis", {0., .1, .25, .5, 1.}),
+                     nvbench::int64_axis("I64 Axis", {1, 3, 2, 4, 5}));
 
   ASSERT_MSG(bench.get_config_count() == 5 * bench.get_devices().size(),
              "Got {}",
@@ -107,11 +106,10 @@ void test_tie_unequal_length()
 {
   using benchmark_type = nvbench::benchmark<no_op_callable>;
   benchmark_type bench;
-  bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
-  bench.add_int64_axis("I64 Axis", {1, 3, 2});
 
-  bench.zip_axes({"I64 Axis", "F64 Axis"});
-  ASSERT_THROWS_ANY(bench.zip_axes({"F64 Axis", "I64 Axis"}));
+  ASSERT_THROWS_ANY(
+    bench.add_zip_axes(nvbench::float64_axis("F64 Axis", {0., .1, .25, .5, 1.}),
+                       nvbench::int64_axis("I64 Axis", {1, 3, 2})));
 }
 
 void test_tie_type_axi()
@@ -191,11 +189,11 @@ void test_tie_clone()
   using benchmark_type = nvbench::benchmark<no_op_callable>;
   benchmark_type bench;
   bench.set_devices(std::vector<int>{});
-  bench.add_string_axis("Strings", {"string a", "string b", "string c"});
   bench.add_int64_power_of_two_axis("I64 POT Axis", {10, 20});
   bench.add_int64_axis("I64 Axis", {10, 20});
-  bench.add_float64_axis("F64 Axis", {0., .1, .25});
-  bench.zip_axes({"F64 Axis", "Strings"});
+  bench.add_zip_axes(nvbench::string_axis("Strings",
+                                          {"string a", "string b", "string c"}),
+                     nvbench::float64_axis("F64 Axis", {0., .1, .25}));
 
   const auto expected_count = bench.get_config_count();
 
@@ -237,7 +235,8 @@ struct under_diag final : nvbench::user_axis_space
 {
   under_diag(std::vector<std::size_t> input_indices,
              std::vector<std::size_t> output_indices)
-      : nvbench::user_axis_space(std::move(input_indices), std::move(output_indices))
+      : nvbench::user_axis_space(std::move(input_indices),
+                                 std::move(output_indices))
   {}
 
   mutable std::size_t x_pos   = 0;

From 91c8f43d152e9a12ded97b117b71a61662545bea Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 1 Mar 2022 09:11:40 -0500
Subject: [PATCH 04/41] Show zip versus linear iteration

---
 examples/custom_iteration_spaces.cu | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index ced264d1..9968476a 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -60,16 +60,27 @@ void copy_sweep_grid_shape(nvbench::state &state)
         num_values);
     });
 }
-
-//==============================================================================
-// Tied iteration space allows you to iterate two or more axes at the same
-// time allowing for sparse exploration of the search space. Can also be used
-// to test the diagonal of a square matrix
-//
+void naive_copy_sweep_grid_shape(nvbench::state &state)
+{
+  copy_sweep_grid_shape(state);
+}
 void tied_copy_sweep_grid_shape(nvbench::state &state)
 {
   copy_sweep_grid_shape(state);
 }
+
+//==============================================================================
+// Naive iteration of both the BlockSize and NumBlocks axis.
+// Will generate the full cross product of the two axis for a total of
+// 16 invocations of copy_sweep_grid_shape.
+NVBENCH_BENCH(naive_copy_sweep_grid_shape)
+  // Full combinatorial of Every power of two from  64->1024:
+  .add_int64_axis("BlockSize", {32, 64, 128, 256})
+  .add_int64_axis("NumBlocks", {1024, 512, 256, 128});
+
+//==============================================================================
+// Zipped iteration of BlockSize and Numblocks axes.
+// Will generate only 4 invocations of copy_sweep_grid_shape
 NVBENCH_BENCH(tied_copy_sweep_grid_shape)
   // Every power of two from  64->1024:
   .add_zip_axes(nvbench::int64_axis{"BlockSize", {32, 64, 128, 256}},

From f4570d43cffa2bf118fee10b4a6790dc42190a71 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 12 Apr 2022 09:46:55 -0400
Subject: [PATCH 05/41] Update docs/benchmarks.md

Co-authored-by: Allison Vacanti <alliepiper16@gmail.com>
---
 docs/benchmarks.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index a8b4ac88..3519bf4f 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -246,7 +246,7 @@ explosion for more examples and information.
 ## Zipped/Tied Iteration of Value Axes
 
 At times multiple value axes need to be iterated like they are actually a tuple
-or zipped together. To enable this behavior you can request axes to be 'tied'
+or zipped together. To enable this behavior you can request axes to be 'zipped'
 together.
 
 ```cpp

From f791475941a653c1071048e4830f761536af0775 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 12 Apr 2022 09:47:05 -0400
Subject: [PATCH 06/41] Update docs/benchmarks.md

Co-authored-by: Allison Vacanti <alliepiper16@gmail.com>
---
 docs/benchmarks.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index 3519bf4f..c23597f0 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -264,7 +264,7 @@ NVBENCH_BENCH_TYPES(benchmark, NVBENCH_TYPE_AXES(input_types, output_types))
   .zip_axes({"NumInputs", "Quality"});
 ```
 
-This tieing reduces the total combinations from 24 to 6, reducing the
+Zipping these two axes reduces the total combinations from 216 to 36, reducing the
 combinatorial explosion.
 
 # Throughput Measurements

From f50a6ddefb39654737d2bf3f3e94a95c44d6f4e2 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 12 Apr 2022 09:47:17 -0400
Subject: [PATCH 07/41] Update nvbench/axis_iteration_space.cxx

Co-authored-by: Allison Vacanti <alliepiper16@gmail.com>
---
 nvbench/axis_iteration_space.cxx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nvbench/axis_iteration_space.cxx b/nvbench/axis_iteration_space.cxx
index 885a1ea2..d15b0f8e 100644
--- a/nvbench/axis_iteration_space.cxx
+++ b/nvbench/axis_iteration_space.cxx
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2021 NVIDIA Corporation
+ *  Copyright 2022 NVIDIA Corporation
  *
  *  Licensed under the Apache License, Version 2.0 with the LLVM exception
  *  (the "License"); you may not use this file except in compliance with

From 796f7f7b990db80f433ebea0d8c80ce476dbec1d Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 12 Apr 2022 09:47:22 -0400
Subject: [PATCH 08/41] Update nvbench/axis_iteration_space.cuh

Co-authored-by: Allison Vacanti <alliepiper16@gmail.com>
---
 nvbench/axis_iteration_space.cuh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nvbench/axis_iteration_space.cuh b/nvbench/axis_iteration_space.cuh
index a17634e5..9fd9a99a 100644
--- a/nvbench/axis_iteration_space.cuh
+++ b/nvbench/axis_iteration_space.cuh
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2021 NVIDIA Corporation
+ *  Copyright 2022 NVIDIA Corporation
  *
  *  Licensed under the Apache License, Version 2.0 with the LLVM exception
  *  (the "License"); you may not use this file except in compliance with

From edba47795c38ddd779d02dddb41c9e25e2942f59 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 12 Apr 2022 09:47:30 -0400
Subject: [PATCH 09/41] Update nvbench/axis_iteration_space.cxx

Co-authored-by: Allison Vacanti <alliepiper16@gmail.com>
---
 nvbench/axis_iteration_space.cxx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nvbench/axis_iteration_space.cxx b/nvbench/axis_iteration_space.cxx
index d15b0f8e..b7aee8ef 100644
--- a/nvbench/axis_iteration_space.cxx
+++ b/nvbench/axis_iteration_space.cxx
@@ -95,7 +95,7 @@ bool axis_space_base::contains(std::size_t in_index) const
 
 linear_axis_space::linear_axis_space(std::size_t in_index,
                                      std::size_t out_index)
-    : axis_space_base({std::move(in_index)}, {out_index})
+    : axis_space_base({in_index}, {out_index})
 {}
 
 linear_axis_space::~linear_axis_space() = default;

From 9337ba9af1494f871d0fbd5d32420f86c5e57d4d Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 12 Apr 2022 09:47:48 -0400
Subject: [PATCH 10/41] Update examples/custom_iteration_spaces.cu

Co-authored-by: Allison Vacanti <alliepiper16@gmail.com>
---
 examples/custom_iteration_spaces.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index 9968476a..47a9dd93 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -70,7 +70,7 @@ void tied_copy_sweep_grid_shape(nvbench::state &state)
 }
 
 //==============================================================================
-// Naive iteration of both the BlockSize and NumBlocks axis.
+// Naive iteration of both the BlockSize and NumBlocks axes.
 // Will generate the full cross product of the two axis for a total of
 // 16 invocations of copy_sweep_grid_shape.
 NVBENCH_BENCH(naive_copy_sweep_grid_shape)

From a02d6485cbf38568a361141fd619183cfe91831b Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 12 Apr 2022 09:48:10 -0400
Subject: [PATCH 11/41] Update examples/custom_iteration_spaces.cu

Co-authored-by: Allison Vacanti <alliepiper16@gmail.com>
---
 examples/custom_iteration_spaces.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index 47a9dd93..e10c716c 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -71,7 +71,7 @@ void tied_copy_sweep_grid_shape(nvbench::state &state)
 
 //==============================================================================
 // Naive iteration of both the BlockSize and NumBlocks axes.
-// Will generate the full cross product of the two axis for a total of
+// Will generate the full cartesian product of the two axes for a total of
 // 16 invocations of copy_sweep_grid_shape.
 NVBENCH_BENCH(naive_copy_sweep_grid_shape)
   // Full combinatorial of Every power of two from  64->1024:

From e80392e104eade6e105857d55aef6979aa05063f Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 12 Apr 2022 09:48:28 -0400
Subject: [PATCH 12/41] Update examples/custom_iteration_spaces.cu

Co-authored-by: Allison Vacanti <alliepiper16@gmail.com>
---
 examples/custom_iteration_spaces.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index e10c716c..704d3c5d 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -79,7 +79,7 @@ NVBENCH_BENCH(naive_copy_sweep_grid_shape)
   .add_int64_axis("NumBlocks", {1024, 512, 256, 128});
 
 //==============================================================================
-// Zipped iteration of BlockSize and Numblocks axes.
+// Zipped iteration of BlockSize and NumBlocks axes.
 // Will generate only 4 invocations of copy_sweep_grid_shape
 NVBENCH_BENCH(tied_copy_sweep_grid_shape)
   // Every power of two from  64->1024:

From 4c964d2923e51aca044ae2ace54f66a5fef05fc7 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 12 Apr 2022 09:48:44 -0400
Subject: [PATCH 13/41] Update examples/custom_iteration_spaces.cu

Co-authored-by: Allison Vacanti <alliepiper16@gmail.com>
---
 examples/custom_iteration_spaces.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index 704d3c5d..b632b2b5 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -88,7 +88,7 @@ NVBENCH_BENCH(tied_copy_sweep_grid_shape)
 
 //==============================================================================
 // under_diag:
-// Custom iterator that only searches the `X` locations of two axi
+// Custom iterator that only searches the `X` locations of two axes:
 // [- - - - X]
 // [- - - X X]
 // [- - X X X]

From 26467f385504c84bbe8b10131fec5b9a0de812f3 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 12 Apr 2022 10:54:36 -0400
Subject: [PATCH 14/41] More cleanup

---
 nvbench/axes_metadata.cuh       |  6 ++--
 nvbench/axes_metadata.cxx       |  4 +--
 nvbench/benchmark_base.cuh      | 47 +++++++++++++++++---------
 testing/axes_iteration_space.cu | 58 ++++++++++++++++++++++++++-------
 4 files changed, 84 insertions(+), 31 deletions(-)

diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index ff5adaca..36dc5943 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -77,14 +77,14 @@ struct axes_metadata
     Args &&...args)
   {
     (this->add_axis(std::forward<Args>(args)), ...);
-    this->user_iteration_axes({args.get_name()...}, std::move(make));
+    this->user_iteration_axes(std::move(make), {args.get_name()...});
   }
 
   void zip_axes(std::vector<std::string> names);
 
   void
-  user_iteration_axes(std::vector<std::string> names,
-                      std::function<nvbench::make_user_space_signature> make);
+  user_iteration_axes(std::function<nvbench::make_user_space_signature> make,
+                      std::vector<std::string> names);
 
   [[nodiscard]] const axes_iteration_space &get_type_iteration_space() const
   {
diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx
index 7e758bd5..5a803aaf 100644
--- a/nvbench/axes_metadata.cxx
+++ b/nvbench/axes_metadata.cxx
@@ -265,8 +265,8 @@ void axes_metadata::zip_axes(std::vector<std::string> names)
 }
 
 void axes_metadata::user_iteration_axes(
-  std::vector<std::string> names,
-  std::function<nvbench::make_user_space_signature> make)
+  std::function<nvbench::make_user_space_signature> make,
+  std::vector<std::string> names)
 {
   // compute the numeric indice for each name we have
   auto [input_indices,
diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh
index d1c7a3fa..dc72ad82 100644
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -118,12 +118,6 @@ struct benchmark_base
     return *this;
   }
 
-  benchmark_base &zip_axes(std::vector<std::string> names)
-  {
-    m_axes.zip_axes(std::move(names));
-    return *this;
-  }
-
   template<typename... Args>
   benchmark_base &add_user_iteration_axes(Args&&... args)
   {
@@ -131,15 +125,6 @@ struct benchmark_base
     return *this;
   }
 
-  benchmark_base &
-  user_iteration_axes(std::vector<std::string> names,
-                      std::function<nvbench::make_user_space_signature> make)
-  {
-    m_axes.user_iteration_axes(std::move(names), std::move(make));
-    return *this;
-  }
-
-
   benchmark_base &set_devices(std::vector<int> device_ids);
 
   benchmark_base &set_devices(std::vector<nvbench::device_info> devices)
@@ -272,6 +257,38 @@ struct benchmark_base
   /// @}
 
 protected:
+
+  /// Move existing Axis to being part of zip axis iteration space.
+  /// This will remove any existing iteration spaces that the named axis
+  /// are part of, while restoring all other axis in those spaces to
+  /// the default linear space
+  ///
+  /// This is meant to be used only by the option_parser
+  ///  @{
+  benchmark_base &zip_axes(std::vector<std::string> names)
+  {
+    m_axes.zip_axes(std::move(names));
+    return *this;
+  }
+  /// @}
+
+
+  /// Move existing Axis to being part of user axis iteration space.
+  /// This will remove any existing iteration spaces that the named axis
+  /// are part of, while restoring all other axis in those spaces to
+  /// the default linear space
+  ///
+  /// This is meant to be used only by the option_parser
+  ///  @{
+  benchmark_base &
+  user_iteration_axes(std::function<nvbench::make_user_space_signature> make,
+                      std::vector<std::string> names)
+  {
+    m_axes.user_iteration_axes(std::move(make), std::move(names));
+    return *this;
+  }
+  /// @}
+
   friend struct nvbench::runner_base;
 
   template <typename BenchmarkType>
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index 4e9ec93f..85681248 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -60,6 +60,42 @@ void no_op_generator(nvbench::state &state)
 }
 NVBENCH_DEFINE_CALLABLE(no_op_generator, no_op_callable);
 
+template <typename KernelGenerator, typename TypeAxes = nvbench::type_list<>>
+struct rezippable_benchmark final : public nvbench::benchmark_base
+{
+  using kernel_generator = KernelGenerator;
+  using type_axes        = TypeAxes;
+  using type_configs     = nvbench::tl::cartesian_product<type_axes>;
+
+  static constexpr std::size_t num_type_configs =
+    nvbench::tl::size<type_configs>{};
+
+  rezippable_benchmark()
+      : benchmark_base(type_axes{})
+  {}
+
+  using nvbench::benchmark_base::zip_axes;
+  using nvbench::benchmark_base::user_iteration_axes;
+
+private:
+  std::unique_ptr<benchmark_base> do_clone() const final
+  {
+    return std::make_unique<rezippable_benchmark>();
+  }
+
+  void do_set_type_axes_names(std::vector<std::string> names) final
+  {
+    m_axes.set_type_axes_names(std::move(names));
+  }
+
+  void do_run() final
+  {
+    nvbench::runner<rezippable_benchmark> runner{*this};
+    runner.generate_states();
+    runner.run();
+  }
+};
+
 template <typename Integer, typename Float, typename Other>
 void template_no_op_generator(nvbench::state &state,
                               nvbench::type_list<Integer, Float, Other>)
@@ -91,7 +127,7 @@ void test_zip_axes()
 
 void test_tie_invalid_names()
 {
-  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  using benchmark_type = rezippable_benchmark<no_op_callable>;
   benchmark_type bench;
   bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
   bench.add_int64_axis("I64 Axis", {1, 3, 2});
@@ -114,11 +150,11 @@ void test_tie_unequal_length()
 
 void test_tie_type_axi()
 {
-  using benchmark_type =
-    nvbench::benchmark<template_no_op_callable,
-                       nvbench::type_list<nvbench::type_list<nvbench::int8_t>,
-                                          nvbench::type_list<nvbench::float32_t>,
-                                          nvbench::type_list<bool>>>;
+  using benchmark_type = rezippable_benchmark<
+    template_no_op_callable,
+    nvbench::type_list<nvbench::type_list<nvbench::int8_t>,
+                       nvbench::type_list<nvbench::float32_t>,
+                       nvbench::type_list<bool>>>;
   benchmark_type bench;
   bench.set_type_axes_names({"Integer", "Float", "Other"});
   bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
@@ -129,7 +165,7 @@ void test_tie_type_axi()
 
 void test_rezip_axes()
 {
-  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  using benchmark_type = rezippable_benchmark<no_op_callable>;
   benchmark_type bench;
   bench.add_int64_axis("IAxis_A", {1, 3, 2, 4, 5});
   bench.add_int64_axis("IAxis_B", {1, 3, 2, 4, 5});
@@ -155,7 +191,7 @@ void test_rezip_axes()
 
 void test_rezip_axes2()
 {
-  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  using benchmark_type = rezippable_benchmark<no_op_callable>;
   benchmark_type bench;
   bench.add_int64_axis("IAxis_A", {1, 3, 2, 4, 5});
   bench.add_int64_axis("IAxis_B", {1, 3, 2, 4, 5});
@@ -298,15 +334,15 @@ struct under_diag final : nvbench::user_axis_space
 
 void test_user_axes()
 {
-  using benchmark_type = nvbench::benchmark<no_op_callable>;
+  using benchmark_type = rezippable_benchmark<no_op_callable>;
   benchmark_type bench;
   bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
   bench.add_int64_axis("I64 Axis", {1, 3, 2, 4, 5});
   bench.user_iteration_axes(
-    {"F64 Axis", "I64 Axis"},
     [](auto... args) -> std::unique_ptr<nvbench::axis_space_base> {
       return std::make_unique<under_diag>(args...);
-    });
+    },
+    {"F64 Axis", "I64 Axis"});
 
   ASSERT_MSG(bench.get_config_count() == 15 * bench.get_devices().size(),
              "Got {}",

From 5b000e8988a407f6ff1af8cf2cac73f6799f1808 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 12 Apr 2022 10:54:36 -0400
Subject: [PATCH 15/41] More cleanup

---
 docs/benchmarks.md                  |  8 +++++---
 examples/custom_iteration_spaces.cu | 23 ++++++-----------------
 2 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index c23597f0..19b98eb7 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -243,7 +243,7 @@ Keep the rapid growth of combinations due to multiple parameter axes in mind whe
 choosing the number of values in an axis. See the section about combinatorial
 explosion for more examples and information.
 
-## Zipped/Tied Iteration of Value Axes
+## Zipped Iteration of Value Axes
 
 At times multiple value axes need to be iterated like they are actually a tuple
 or zipped together. To enable this behavior you can request axes to be 'zipped'
@@ -252,8 +252,8 @@ together.
 ```cpp
 // InputTypes: {char, int, unsigned int}
 // OutputTypes: {float, double}
-// NumInputs: {2^10, 2^20, 2^30}
-// Quality: {0.5, 1.0}
+// NumInputs: {1000, 10000, 100000, 200000, 200000, 200000}
+// Quality: {0.05, 0.1, 0.25, 0.5, 0.75, 1.}
 
 using input_types = nvbench::type_list<char, int, unsigned int>;
 using output_types = nvbench::type_list<float, double>;
@@ -267,6 +267,8 @@ NVBENCH_BENCH_TYPES(benchmark, NVBENCH_TYPE_AXES(input_types, output_types))
 Zipping these two axes reduces the total combinations from 216 to 36, reducing the
 combinatorial explosion.
 
+Note: Only value axes may be zipped together.
+
 # Throughput Measurements
 
 In additional to raw timing information, NVBench can track a kernel's
diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index b632b2b5..89632609 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -60,29 +60,21 @@ void copy_sweep_grid_shape(nvbench::state &state)
         num_values);
     });
 }
-void naive_copy_sweep_grid_shape(nvbench::state &state)
-{
-  copy_sweep_grid_shape(state);
-}
-void tied_copy_sweep_grid_shape(nvbench::state &state)
-{
-  copy_sweep_grid_shape(state);
-}
 
 //==============================================================================
 // Naive iteration of both the BlockSize and NumBlocks axes.
 // Will generate the full cartesian product of the two axes for a total of
 // 16 invocations of copy_sweep_grid_shape.
-NVBENCH_BENCH(naive_copy_sweep_grid_shape)
-  // Full combinatorial of Every power of two from  64->1024:
+NVBENCH_BENCH(copy_sweep_grid_shape)
+  .set_name("naive_copy_sweep_grid_shape")
   .add_int64_axis("BlockSize", {32, 64, 128, 256})
   .add_int64_axis("NumBlocks", {1024, 512, 256, 128});
 
 //==============================================================================
 // Zipped iteration of BlockSize and NumBlocks axes.
 // Will generate only 4 invocations of copy_sweep_grid_shape
-NVBENCH_BENCH(tied_copy_sweep_grid_shape)
-  // Every power of two from  64->1024:
+NVBENCH_BENCH(copy_sweep_grid_shape)
+  .set_name("tied_copy_sweep_grid_shape")
   .add_zip_axes(nvbench::int64_axis{"BlockSize", {32, 64, 128, 256}},
                 nvbench::int64_axis{"NumBlocks", {1024, 512, 256, 128}});
 
@@ -160,11 +152,8 @@ struct under_diag final : nvbench::user_axis_space
   }
 };
 
-void user_copy_sweep_grid_shape(nvbench::state &state)
-{
-  copy_sweep_grid_shape(state);
-}
-NVBENCH_BENCH(user_copy_sweep_grid_shape)
+NVBENCH_BENCH(copy_sweep_grid_shape)
+  .set_name("user_copy_sweep_grid_shape")
   .add_user_iteration_axes(
     [](auto... args) -> std::unique_ptr<nvbench::axis_space_base> {
       return std::make_unique<under_diag>(args...);

From ba8356f8111bcbe748a4bfa34084f9d1dc513c2d Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 12 Apr 2022 11:36:44 -0400
Subject: [PATCH 16/41] Refactor names

---
 examples/custom_iteration_spaces.cu | 22 ++++++------
 nvbench/axes_metadata.cuh           | 12 +++----
 nvbench/axes_metadata.cxx           |  6 ++--
 nvbench/axis_iteration_space.cuh    | 48 ++++++++++++-------------
 nvbench/axis_iteration_space.cxx    | 54 ++++++++++++++---------------
 nvbench/benchmark_base.cxx          |  4 +--
 nvbench/detail/state_generator.cxx  |  4 +--
 testing/axes_iteration_space.cu     | 10 +++---
 testing/state_generator.cu          | 10 +++---
 9 files changed, 85 insertions(+), 85 deletions(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index 89632609..26be7610 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -99,7 +99,7 @@ struct under_diag final : nvbench::user_axis_space
   mutable std::size_t y_pos   = 0;
   mutable std::size_t x_start = 0;
 
-  nvbench::detail::axis_space_iterator do_iter(axes_info info) const
+  nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
   {
     // generate our increment function
     auto adv_func = [&, info](std::size_t &inc_index,
@@ -136,17 +136,17 @@ struct under_diag final : nvbench::user_axis_space
                                                 diag_under);
   }
 
-  std::size_t do_size(const axes_info &info) const
+  std::size_t do_get_size(const axes_info &info) const
   {
     return ((info[0].size * (info[1].size + 1)) / 2);
   }
 
-  std::size_t do_valid_count(const axes_info &info) const
+  std::size_t do_get_active_count(const axes_info &info) const
   {
     return ((info[0].size * (info[1].size + 1)) / 2);
   }
 
-  std::unique_ptr<nvbench::axis_space_base> do_clone() const
+  std::unique_ptr<nvbench::iteration_space_base> do_clone() const
   {
     return std::make_unique<under_diag>(*this);
   }
@@ -155,7 +155,7 @@ struct under_diag final : nvbench::user_axis_space
 NVBENCH_BENCH(copy_sweep_grid_shape)
   .set_name("user_copy_sweep_grid_shape")
   .add_user_iteration_axes(
-    [](auto... args) -> std::unique_ptr<nvbench::axis_space_base> {
+    [](auto... args) -> std::unique_ptr<nvbench::iteration_space_base> {
       return std::make_unique<under_diag>(args...);
     },
     nvbench::int64_axis("BlockSize", {64, 128, 256, 512, 1024}),
@@ -175,7 +175,7 @@ struct gauss final : nvbench::user_axis_space
                                  std::move(output_indices))
   {}
 
-  nvbench::detail::axis_space_iterator do_iter(axes_info info) const
+  nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
   {
     const double mid_point = static_cast<double>((info[0].size / 2));
 
@@ -206,14 +206,14 @@ struct gauss final : nvbench::user_axis_space
                                                 gauss_func);
   }
 
-  std::size_t do_size(const axes_info &info) const { return info[0].size; }
+  std::size_t do_get_size(const axes_info &info) const { return info[0].size; }
 
-  std::size_t do_valid_count(const axes_info &info) const
+  std::size_t do_get_active_count(const axes_info &info) const
   {
     return info[0].size;
   }
 
-  std::unique_ptr<axis_space_base> do_clone() const
+  std::unique_ptr<iteration_space_base> do_clone() const
   {
     return std::make_unique<gauss>(*this);
   }
@@ -232,12 +232,12 @@ void dual_float64_axis(nvbench::state &state)
 }
 NVBENCH_BENCH(dual_float64_axis)
   .add_user_iteration_axes(
-    [](auto... args) -> std::unique_ptr<nvbench::axis_space_base> {
+    [](auto... args) -> std::unique_ptr<nvbench::iteration_space_base> {
       return std::make_unique<gauss>(args...);
     },
     nvbench::float64_axis("Duration_A", nvbench::range(0., 1e-4, 1e-5)))
   .add_user_iteration_axes(
-    [](auto... args) -> std::unique_ptr<nvbench::axis_space_base> {
+    [](auto... args) -> std::unique_ptr<nvbench::iteration_space_base> {
       return std::make_unique<gauss>(args...);
     },
     nvbench::float64_axis("Duration_B", nvbench::range(0., 1e-4, 1e-5)));
diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index 36dc5943..9454b52b 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -39,8 +39,8 @@ namespace nvbench
 struct axes_metadata
 {
   using axes_type = std::vector<std::unique_ptr<nvbench::axis_base>>;
-  using axes_iteration_space =
-    std::vector<std::unique_ptr<nvbench::axis_space_base>>;
+  using iteration_space_type =
+    std::vector<std::unique_ptr<nvbench::iteration_space_base>>;
 
   template <typename... TypeAxes>
   explicit axes_metadata(nvbench::type_list<TypeAxes...>);
@@ -86,11 +86,11 @@ struct axes_metadata
   user_iteration_axes(std::function<nvbench::make_user_space_signature> make,
                       std::vector<std::string> names);
 
-  [[nodiscard]] const axes_iteration_space &get_type_iteration_space() const
+  [[nodiscard]] const iteration_space_type &get_type_iteration_space() const
   {
     return m_type_space;
   }
-  [[nodiscard]] const axes_iteration_space &get_value_iteration_space() const
+  [[nodiscard]] const iteration_space_type &get_value_iteration_space() const
   {
     return m_value_space;
   }
@@ -131,8 +131,8 @@ struct axes_metadata
 private:
   axes_type m_axes;
   std::size_t m_type_axe_count = 0;
-  axes_iteration_space m_type_space;
-  axes_iteration_space m_value_space;
+  iteration_space_type m_type_space;
+  iteration_space_type m_value_space;
 };
 
 template <typename... TypeAxes>
diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx
index 5a803aaf..f78c4789 100644
--- a/nvbench/axes_metadata.cxx
+++ b/nvbench/axes_metadata.cxx
@@ -177,12 +177,12 @@ get_axes_indices(std::size_t type_axe_count,
 }
 
 void reset_iteration_space(
-  nvbench::axes_metadata::axes_iteration_space &all_spaces,
+  nvbench::axes_metadata::iteration_space_type &all_spaces,
   const std::vector<std::size_t> &indices_to_remove)
 {
   // 1. Find all spaces indices that
-  nvbench::axes_metadata::axes_iteration_space reset_space;
-  nvbench::axes_metadata::axes_iteration_space to_filter;
+  nvbench::axes_metadata::iteration_space_type reset_space;
+  nvbench::axes_metadata::iteration_space_type to_filter;
   for (auto &space : all_spaces)
   {
     bool added = false;
diff --git a/nvbench/axis_iteration_space.cuh b/nvbench/axis_iteration_space.cuh
index 9fd9a99a..913e5df4 100644
--- a/nvbench/axis_iteration_space.cuh
+++ b/nvbench/axis_iteration_space.cuh
@@ -23,7 +23,7 @@
 namespace nvbench
 {
 
-struct axis_space_base
+struct iteration_space_base
 {
   using axes_type = std::vector<std::unique_ptr<nvbench::axis_base>>;
   using axes_info = std::vector<detail::axis_index>;
@@ -32,17 +32,17 @@ struct axis_space_base
     nvbench::detail::axis_space_iterator::AdvanceSignature;
   using UpdateSignature = nvbench::detail::axis_space_iterator::UpdateSignature;
 
-  axis_space_base(std::vector<std::size_t> input_indices,
+  iteration_space_base(std::vector<std::size_t> input_indices,
                   std::vector<std::size_t> output_indices);
-  virtual ~axis_space_base();
+  virtual ~iteration_space_base();
 
-  [[nodiscard]] std::unique_ptr<axis_space_base> clone() const;
-  [[nodiscard]] std::vector<std::unique_ptr<axis_space_base>>
+  [[nodiscard]] std::unique_ptr<iteration_space_base> clone() const;
+  [[nodiscard]] std::vector<std::unique_ptr<iteration_space_base>>
   clone_as_linear() const;
 
-  [[nodiscard]] detail::axis_space_iterator iter(const axes_type &axes) const;
-  [[nodiscard]] std::size_t size(const axes_type &axes) const;
-  [[nodiscard]] std::size_t valid_count(const axes_type &axes) const;
+  [[nodiscard]] detail::axis_space_iterator get_iterator(const axes_type &axes) const;
+  [[nodiscard]] std::size_t get_size(const axes_type &axes) const;
+  [[nodiscard]] std::size_t get_active_count(const axes_type &axes) const;
 
   [[nodiscard]] bool contains(std::size_t input_index) const;
 
@@ -50,36 +50,36 @@ protected:
   std::vector<std::size_t> m_input_indices;
   std::vector<std::size_t> m_output_indices;
 
-  virtual std::unique_ptr<axis_space_base> do_clone() const         = 0;
-  virtual detail::axis_space_iterator do_iter(axes_info info) const = 0;
-  virtual std::size_t do_size(const axes_info &info) const          = 0;
-  virtual std::size_t do_valid_count(const axes_info &info) const   = 0;
+  virtual std::unique_ptr<iteration_space_base> do_clone() const         = 0;
+  virtual detail::axis_space_iterator do_get_iterator(axes_info info) const = 0;
+  virtual std::size_t do_get_size(const axes_info &info) const          = 0;
+  virtual std::size_t do_get_active_count(const axes_info &info) const   = 0;
 };
 
-struct linear_axis_space final : axis_space_base
+struct linear_axis_space final : iteration_space_base
 {
   linear_axis_space(std::size_t in, std::size_t out);
   ~linear_axis_space();
 
-  std::unique_ptr<axis_space_base> do_clone() const override;
-  detail::axis_space_iterator do_iter(axes_info info) const override;
-  std::size_t do_size(const axes_info &info) const override;
-  std::size_t do_valid_count(const axes_info &info) const override;
+  std::unique_ptr<iteration_space_base> do_clone() const override;
+  detail::axis_space_iterator do_get_iterator(axes_info info) const override;
+  std::size_t do_get_size(const axes_info &info) const override;
+  std::size_t do_get_active_count(const axes_info &info) const override;
 };
 
-struct zip_axis_space final : axis_space_base
+struct zip_axis_space final : iteration_space_base
 {
   zip_axis_space(std::vector<std::size_t> input_indices,
       std::vector<std::size_t> output_indices);
   ~zip_axis_space();
 
-  std::unique_ptr<axis_space_base> do_clone() const override;
-  detail::axis_space_iterator do_iter(axes_info info) const override;
-  std::size_t do_size(const axes_info &info) const override;
-  std::size_t do_valid_count(const axes_info &info) const override;
+  std::unique_ptr<iteration_space_base> do_clone() const override;
+  detail::axis_space_iterator do_get_iterator(axes_info info) const override;
+  std::size_t do_get_size(const axes_info &info) const override;
+  std::size_t do_get_active_count(const axes_info &info) const override;
 };
 
-struct user_axis_space : axis_space_base
+struct user_axis_space : iteration_space_base
 {
   user_axis_space(std::vector<std::size_t> input_indices,
                   std::vector<std::size_t> output_indices);
@@ -87,7 +87,7 @@ struct user_axis_space : axis_space_base
 };
 
 using make_user_space_signature =
-  std::unique_ptr<axis_space_base>(std::vector<std::size_t> input_indices,
+  std::unique_ptr<iteration_space_base>(std::vector<std::size_t> input_indices,
                                    std::vector<std::size_t> output_indices);
 
 } // namespace nvbench
diff --git a/nvbench/axis_iteration_space.cxx b/nvbench/axis_iteration_space.cxx
index b7aee8ef..c8a158a7 100644
--- a/nvbench/axis_iteration_space.cxx
+++ b/nvbench/axis_iteration_space.cxx
@@ -23,24 +23,24 @@
 namespace nvbench
 {
 
-axis_space_base::axis_space_base(std::vector<std::size_t> input_indices,
+iteration_space_base::iteration_space_base(std::vector<std::size_t> input_indices,
                                  std::vector<std::size_t> output_indices)
     : m_input_indices(std::move(input_indices))
     , m_output_indices(std::move(output_indices))
 {}
 
-axis_space_base::~axis_space_base() = default;
+iteration_space_base::~iteration_space_base() = default;
 
-std::unique_ptr<axis_space_base> axis_space_base::clone() const
+std::unique_ptr<iteration_space_base> iteration_space_base::clone() const
 {
   auto clone = this->do_clone();
   return clone;
 }
 
-std::vector<std::unique_ptr<axis_space_base>>
-axis_space_base::clone_as_linear() const
+std::vector<std::unique_ptr<iteration_space_base>>
+iteration_space_base::clone_as_linear() const
 {
-  std::vector<std::unique_ptr<axis_space_base>> clones;
+  std::vector<std::unique_ptr<iteration_space_base>> clones;
   clones.reserve(m_input_indices.size());
 
   for (std::size_t i = 0; i < m_input_indices.size(); ++i)
@@ -55,11 +55,11 @@ axis_space_base::clone_as_linear() const
 
 namespace
 {
-nvbench::axis_space_base::axes_info
-get_axes_info(const nvbench::axis_space_base::axes_type &axes,
+nvbench::iteration_space_base::axes_info
+get_axes_info(const nvbench::iteration_space_base::axes_type &axes,
               const std::vector<std::size_t> &indices)
 {
-  nvbench::axis_space_base::axes_info info;
+  nvbench::iteration_space_base::axes_info info;
   info.reserve(indices.size());
   for (auto &n : indices)
   {
@@ -69,22 +69,22 @@ get_axes_info(const nvbench::axis_space_base::axes_type &axes,
 }
 } // namespace
 
-detail::axis_space_iterator axis_space_base::iter(const axes_type &axes) const
+detail::axis_space_iterator iteration_space_base::get_iterator(const axes_type &axes) const
 {
 
-  return this->do_iter(get_axes_info(axes, m_input_indices));
+  return this->do_get_iterator(get_axes_info(axes, m_input_indices));
 }
 
-std::size_t axis_space_base::size(const axes_type &axes) const
+std::size_t iteration_space_base::get_size(const axes_type &axes) const
 {
-  return this->do_size(get_axes_info(axes, m_input_indices));
+  return this->do_get_size(get_axes_info(axes, m_input_indices));
 }
-std::size_t axis_space_base::valid_count(const axes_type &axes) const
+std::size_t iteration_space_base::get_active_count(const axes_type &axes) const
 {
-  return this->do_valid_count(get_axes_info(axes, m_input_indices));
+  return this->do_get_active_count(get_axes_info(axes, m_input_indices));
 }
 
-bool axis_space_base::contains(std::size_t in_index) const
+bool iteration_space_base::contains(std::size_t in_index) const
 {
   auto iter =
     std::find_if(m_input_indices.cbegin(),
@@ -95,12 +95,12 @@ bool axis_space_base::contains(std::size_t in_index) const
 
 linear_axis_space::linear_axis_space(std::size_t in_index,
                                      std::size_t out_index)
-    : axis_space_base({in_index}, {out_index})
+    : iteration_space_base({in_index}, {out_index})
 {}
 
 linear_axis_space::~linear_axis_space() = default;
 
-detail::axis_space_iterator linear_axis_space::do_iter(axes_info info) const
+detail::axis_space_iterator linear_axis_space::do_get_iterator(axes_info info) const
 {
   std::size_t loc(m_output_indices[0]);
   auto update_func = [=](std::size_t inc_index,
@@ -112,29 +112,29 @@ detail::axis_space_iterator linear_axis_space::do_iter(axes_info info) const
   return detail::make_space_iterator(1, info[0].size, update_func);
 }
 
-std::size_t linear_axis_space::do_size(const axes_info &info) const
+std::size_t linear_axis_space::do_get_size(const axes_info &info) const
 {
   return info[0].size;
 }
 
-std::size_t linear_axis_space::do_valid_count(const axes_info &info) const
+std::size_t linear_axis_space::do_get_active_count(const axes_info &info) const
 {
   return info[0].active_size;
 }
 
-std::unique_ptr<axis_space_base> linear_axis_space::do_clone() const
+std::unique_ptr<iteration_space_base> linear_axis_space::do_clone() const
 {
   return std::make_unique<linear_axis_space>(*this);
 }
 
 zip_axis_space::zip_axis_space(std::vector<std::size_t> input_indices,
                                std::vector<std::size_t> output_indices)
-    : axis_space_base(std::move(input_indices), std::move(output_indices))
+    : iteration_space_base(std::move(input_indices), std::move(output_indices))
 {}
 
 zip_axis_space::~zip_axis_space() = default;
 
-detail::axis_space_iterator zip_axis_space::do_iter(axes_info info) const
+detail::axis_space_iterator zip_axis_space::do_get_iterator(axes_info info) const
 {
   std::vector<std::size_t> locs = m_output_indices;
   auto update_func              = [=](std::size_t inc_index,
@@ -150,24 +150,24 @@ detail::axis_space_iterator zip_axis_space::do_iter(axes_info info) const
   return detail::make_space_iterator(locs.size(), info[0].size, update_func);
 }
 
-std::size_t zip_axis_space::do_size(const axes_info &info) const
+std::size_t zip_axis_space::do_get_size(const axes_info &info) const
 {
   return info[0].size;
 }
 
-std::size_t zip_axis_space::do_valid_count(const axes_info &info) const
+std::size_t zip_axis_space::do_get_active_count(const axes_info &info) const
 {
   return info[0].active_size;
 }
 
-std::unique_ptr<axis_space_base> zip_axis_space::do_clone() const
+std::unique_ptr<iteration_space_base> zip_axis_space::do_clone() const
 {
   return std::make_unique<zip_axis_space>(*this);
 }
 
 user_axis_space::user_axis_space(std::vector<std::size_t> input_indices,
                                  std::vector<std::size_t> output_indices)
-    : axis_space_base(std::move(input_indices), std::move(output_indices))
+    : iteration_space_base(std::move(input_indices), std::move(output_indices))
 {}
 user_axis_space::~user_axis_space() = default;
 
diff --git a/nvbench/benchmark_base.cxx b/nvbench/benchmark_base.cxx
index 296cf6c5..0a736a9d 100644
--- a/nvbench/benchmark_base.cxx
+++ b/nvbench/benchmark_base.cxx
@@ -70,14 +70,14 @@ std::size_t benchmark_base::get_config_count() const
     m_axes.get_value_iteration_space().cend(),
     std::size_t{1},
     std::multiplies<>{},
-    [&axes](const auto &space) { return space->size(axes); });
+    [&axes](const auto &space) { return space->get_size(axes); });
 
   const std::size_t type_count = nvbench::detail::transform_reduce(
     m_axes.get_type_iteration_space().cbegin(),
     m_axes.get_type_iteration_space().cend(),
     std::size_t{1},
     std::multiplies<>{},
-    [&axes](const auto &space) { return space->valid_count(axes); });
+    [&axes](const auto &space) { return space->get_active_count(axes); });
 
   return (value_count * type_count) * std::max(1UL, m_devices.size());
 }
diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx
index 68803ba2..404a891d 100644
--- a/nvbench/detail/state_generator.cxx
+++ b/nvbench/detail/state_generator.cxx
@@ -108,12 +108,12 @@ void state_generator::build_axis_configs()
     std::for_each(type_space.crbegin(),
                   type_space.crend(),
                   [&ti, &axes_vec](const auto &space) {
-                    ti.add_iteration_space(space->iter(axes_vec));
+                    ti.add_iteration_space(space->get_iterator(axes_vec));
                   });
     std::for_each(value_space.begin(),
                   value_space.end(),
                   [&vi, &axes_vec](const auto &space) {
-                    vi.add_iteration_space(space->iter(axes_vec));
+                    vi.add_iteration_space(space->get_iterator(axes_vec));
                   });
   }
 
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index 85681248..eb50af77 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -279,7 +279,7 @@ struct under_diag final : nvbench::user_axis_space
   mutable std::size_t y_pos   = 0;
   mutable std::size_t x_start = 0;
 
-  nvbench::detail::axis_space_iterator do_iter(axes_info info) const
+  nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
   {
     // generate our increment function
     auto adv_func = [&, info](std::size_t &inc_index,
@@ -316,17 +316,17 @@ struct under_diag final : nvbench::user_axis_space
                                                 diag_under);
   }
 
-  std::size_t do_size(const axes_info &info) const
+  std::size_t do_get_size(const axes_info &info) const
   {
     return ((info[0].size * (info[1].size + 1)) / 2);
   }
 
-  std::size_t do_valid_count(const axes_info &info) const
+  std::size_t do_get_active_count(const axes_info &info) const
   {
     return ((info[0].size * (info[1].size + 1)) / 2);
   }
 
-  std::unique_ptr<nvbench::axis_space_base> do_clone() const
+  std::unique_ptr<nvbench::iteration_space_base> do_clone() const
   {
     return std::make_unique<under_diag>(*this);
   }
@@ -339,7 +339,7 @@ void test_user_axes()
   bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
   bench.add_int64_axis("I64 Axis", {1, 3, 2, 4, 5});
   bench.user_iteration_axes(
-    [](auto... args) -> std::unique_ptr<nvbench::axis_space_base> {
+    [](auto... args) -> std::unique_ptr<nvbench::iteration_space_base> {
       return std::make_unique<under_diag>(args...);
     },
     {"F64 Axis", "I64 Axis"});
diff --git a/testing/state_generator.cu b/testing/state_generator.cu
index 16e442fe..26dc0e38 100644
--- a/testing/state_generator.cu
+++ b/testing/state_generator.cu
@@ -62,7 +62,7 @@ void test_single_state()
   std::vector<std::unique_ptr<nvbench::axis_base>> axes;
   axes.push_back(std::make_unique<nvbench::string_axis>(si));
 
-  sg.add_iteration_space(nvbench::linear_axis_space{0, 0}.iter(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{0, 0}.get_iterator(axes));
   ASSERT(sg.get_number_of_states() == 1);
   sg.init();
   ASSERT(sg.iter_valid());
@@ -96,10 +96,10 @@ void test_basic()
   axes.emplace_back(std::make_unique<nvbench::string_axis>(si3));
   axes.emplace_back(std::make_unique<nvbench::string_axis>(si4));
 
-  sg.add_iteration_space(nvbench::linear_axis_space{0, 0}.iter(axes));
-  sg.add_iteration_space(nvbench::linear_axis_space{1, 1}.iter(axes));
-  sg.add_iteration_space(nvbench::linear_axis_space{2, 2}.iter(axes));
-  sg.add_iteration_space(nvbench::linear_axis_space{3, 3}.iter(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{0, 0}.get_iterator(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{1, 1}.get_iterator(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{2, 2}.get_iterator(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{3, 3}.get_iterator(axes));
 
   ASSERT_MSG(sg.get_number_of_states() == (2 * 3 * 3 * 2),
              "Actual: {} Expected: {}",

From 40a6711272a36ee5eaa3d5748e7972039acc3f06 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 12 Apr 2022 13:23:48 -0400
Subject: [PATCH 17/41] Document benchmark iteration space methods

---
 nvbench/benchmark_base.cuh | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh
index dc72ad82..a8c03840 100644
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -111,19 +111,42 @@ struct benchmark_base
     return *this;
   }
 
-  template<typename... Args>
-  benchmark_base &add_zip_axes(Args&&... args)
+  /// Construct a zip iteration space from the provided value axes.
+  ///
+  /// When axes are zipped together they are iterated like a tuple
+  /// of values instead of separate parameters. For example two
+  /// value axes of 5 entries will generate 25 combinations, but
+  /// when zipped will generate 5 combinations.
+  ///
+  /// @param[axes] a set of axis_base to be added to the benchmark
+  /// and zipped together
+  ///
+  template<typename... Axes>
+  benchmark_base &add_zip_axes(Axes&&... axes)
   {
-    m_axes.add_zip_axes(std::forward<Args>(args)...);
+    m_axes.add_zip_axes(std::forward<Axes>(axes)...);
     return *this;
   }
+  /// @}
 
-  template<typename... Args>
-  benchmark_base &add_user_iteration_axes(Args&&... args)
+  /// Construct a user iteration space from the provided value axes.
+  ///
+  /// Instead of using the standard iteration over each axes, they
+  /// are iterated using the custom user iterator that was provided.
+  /// This allows for fancy iteration such as using every other
+  /// value, random sampling, etc.
+  ///
+  /// @param[args] First argument is a `std::function<nvbench::make_user_space_signature>`
+  /// which constructs the user iteration space, and the reseet are axis_base to be
+  /// added to the benchmark and iterated using the user iteration space
+  ///
+  template<typename... ConstructorAndAxes>
+  benchmark_base &add_user_iteration_axes(ConstructorAndAxes&&... args)
   {
-    m_axes.add_user_iteration_axes(std::forward<Args>(args)...);
+    m_axes.add_user_iteration_axes(std::forward<ConstructorAndAxes>(args)...);
     return *this;
   }
+  /// @}
 
   benchmark_base &set_devices(std::vector<int> device_ids);
 

From e7b4800b63a2ca35e635fb1baed9fda621349f12 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 12 Apr 2022 13:39:34 -0400
Subject: [PATCH 18/41] Refactor axis spaces into separate TUs

---
 nvbench/CMakeLists.txt           |  3 ++
 nvbench/axes_metadata.cuh        |  3 ++
 nvbench/axis_iteration_space.cuh | 84 +++++++++++++++++---------------
 nvbench/axis_iteration_space.cxx | 79 +-----------------------------
 nvbench/linear_axis_space.cuh    | 37 ++++++++++++++
 nvbench/linear_axis_space.cxx    | 60 +++++++++++++++++++++++
 nvbench/user_axis_space.cuh      | 37 ++++++++++++++
 nvbench/user_axis_space.cxx      | 32 ++++++++++++
 nvbench/zip_axis_space.cuh       | 38 +++++++++++++++
 nvbench/zip_axis_space.cxx       | 64 ++++++++++++++++++++++++
 10 files changed, 320 insertions(+), 117 deletions(-)
 create mode 100644 nvbench/linear_axis_space.cuh
 create mode 100644 nvbench/linear_axis_space.cxx
 create mode 100644 nvbench/user_axis_space.cuh
 create mode 100644 nvbench/user_axis_space.cxx
 create mode 100644 nvbench/zip_axis_space.cuh
 create mode 100644 nvbench/zip_axis_space.cxx

diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt
index 45438b9d..d29b1451 100644
--- a/nvbench/CMakeLists.txt
+++ b/nvbench/CMakeLists.txt
@@ -11,6 +11,7 @@ set(srcs
   device_manager.cu
   float64_axis.cxx
   int64_axis.cxx
+  linear_axis_space.cxx
   markdown_printer.cu
   named_values.cxx
   option_parser.cu
@@ -21,6 +22,8 @@ set(srcs
   string_axis.cxx
   type_axis.cxx
   type_strings.cxx
+  user_axis_space.cxx
+  zip_axis_space.cxx
 
   detail/measure_cold.cu
   detail/measure_hot.cu
diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index 9454b52b..4f04bb04 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -21,9 +21,12 @@
 #include <nvbench/axis_iteration_space.cuh>
 #include <nvbench/float64_axis.cuh>
 #include <nvbench/int64_axis.cuh>
+#include <nvbench/linear_axis_space.cuh>
 #include <nvbench/string_axis.cuh>
 #include <nvbench/type_axis.cuh>
 #include <nvbench/types.cuh>
+#include <nvbench/user_axis_space.cuh>
+#include <nvbench/zip_axis_space.cuh>
 
 #include <functional>
 #include <memory>
diff --git a/nvbench/axis_iteration_space.cuh b/nvbench/axis_iteration_space.cuh
index 913e5df4..a32b0540 100644
--- a/nvbench/axis_iteration_space.cuh
+++ b/nvbench/axis_iteration_space.cuh
@@ -23,6 +23,21 @@
 namespace nvbench
 {
 
+/*!
+ * Base class for all axi and axes iteration spaces.
+ *
+ * If we consider an axi to be a container of values, iteration_spaces
+ * would be the different types of iterators supported by that container.
+ *
+ * With that in mind we get the following mapping:
+ * * linear_axis_space is equivalant to a forward iterator.
+ *
+ * * zip_axis_space is equivalant to a zip iterator.
+ *
+ * * user_axis_space is equivalant to a transform iterator.
+ *
+ *
+ */
 struct iteration_space_base
 {
   using axes_type = std::vector<std::unique_ptr<nvbench::axis_base>>;
@@ -32,62 +47,53 @@ struct iteration_space_base
     nvbench::detail::axis_space_iterator::AdvanceSignature;
   using UpdateSignature = nvbench::detail::axis_space_iterator::UpdateSignature;
 
+  /*!
+   * Construct a new iteration_space_base
+   *
+   * @param[input_indices]
+   * @param[output_indices]
+   */
   iteration_space_base(std::vector<std::size_t> input_indices,
-                  std::vector<std::size_t> output_indices);
+                       std::vector<std::size_t> output_indices);
   virtual ~iteration_space_base();
 
   [[nodiscard]] std::unique_ptr<iteration_space_base> clone() const;
   [[nodiscard]] std::vector<std::unique_ptr<iteration_space_base>>
   clone_as_linear() const;
 
-  [[nodiscard]] detail::axis_space_iterator get_iterator(const axes_type &axes) const;
+  /*!
+   * Construct a new iteration_space_base
+   *
+   */
+  [[nodiscard]] detail::axis_space_iterator
+  get_iterator(const axes_type &axes) const;
+
+  /*!
+   * Construct a new iteration_space_base
+   *
+   */
   [[nodiscard]] std::size_t get_size(const axes_type &axes) const;
+
+  /*!
+   * Construct a new iteration_space_base
+   *
+   */
   [[nodiscard]] std::size_t get_active_count(const axes_type &axes) const;
 
+  /*!
+   * Construct a new iteration_space_base
+   *
+   */
   [[nodiscard]] bool contains(std::size_t input_index) const;
 
 protected:
   std::vector<std::size_t> m_input_indices;
   std::vector<std::size_t> m_output_indices;
 
-  virtual std::unique_ptr<iteration_space_base> do_clone() const         = 0;
+  virtual std::unique_ptr<iteration_space_base> do_clone() const            = 0;
   virtual detail::axis_space_iterator do_get_iterator(axes_info info) const = 0;
-  virtual std::size_t do_get_size(const axes_info &info) const          = 0;
-  virtual std::size_t do_get_active_count(const axes_info &info) const   = 0;
-};
-
-struct linear_axis_space final : iteration_space_base
-{
-  linear_axis_space(std::size_t in, std::size_t out);
-  ~linear_axis_space();
-
-  std::unique_ptr<iteration_space_base> do_clone() const override;
-  detail::axis_space_iterator do_get_iterator(axes_info info) const override;
-  std::size_t do_get_size(const axes_info &info) const override;
-  std::size_t do_get_active_count(const axes_info &info) const override;
-};
-
-struct zip_axis_space final : iteration_space_base
-{
-  zip_axis_space(std::vector<std::size_t> input_indices,
-      std::vector<std::size_t> output_indices);
-  ~zip_axis_space();
-
-  std::unique_ptr<iteration_space_base> do_clone() const override;
-  detail::axis_space_iterator do_get_iterator(axes_info info) const override;
-  std::size_t do_get_size(const axes_info &info) const override;
-  std::size_t do_get_active_count(const axes_info &info) const override;
+  virtual std::size_t do_get_size(const axes_info &info) const              = 0;
+  virtual std::size_t do_get_active_count(const axes_info &info) const      = 0;
 };
 
-struct user_axis_space : iteration_space_base
-{
-  user_axis_space(std::vector<std::size_t> input_indices,
-                  std::vector<std::size_t> output_indices);
-  ~user_axis_space();
-};
-
-using make_user_space_signature =
-  std::unique_ptr<iteration_space_base>(std::vector<std::size_t> input_indices,
-                                   std::vector<std::size_t> output_indices);
-
 } // namespace nvbench
diff --git a/nvbench/axis_iteration_space.cxx b/nvbench/axis_iteration_space.cxx
index c8a158a7..12010ca3 100644
--- a/nvbench/axis_iteration_space.cxx
+++ b/nvbench/axis_iteration_space.cxx
@@ -19,6 +19,7 @@
 #include "axis_iteration_space.cuh"
 
 #include <nvbench/type_axis.cuh>
+#include <nvbench/linear_axis_space.cuh>
 
 namespace nvbench
 {
@@ -93,82 +94,4 @@ bool iteration_space_base::contains(std::size_t in_index) const
   return iter != m_input_indices.end();
 }
 
-linear_axis_space::linear_axis_space(std::size_t in_index,
-                                     std::size_t out_index)
-    : iteration_space_base({in_index}, {out_index})
-{}
-
-linear_axis_space::~linear_axis_space() = default;
-
-detail::axis_space_iterator linear_axis_space::do_get_iterator(axes_info info) const
-{
-  std::size_t loc(m_output_indices[0]);
-  auto update_func = [=](std::size_t inc_index,
-                         std::vector<detail::axis_index> &indices) {
-    indices[loc]       = info[0];
-    indices[loc].index = inc_index;
-  };
-
-  return detail::make_space_iterator(1, info[0].size, update_func);
-}
-
-std::size_t linear_axis_space::do_get_size(const axes_info &info) const
-{
-  return info[0].size;
-}
-
-std::size_t linear_axis_space::do_get_active_count(const axes_info &info) const
-{
-  return info[0].active_size;
-}
-
-std::unique_ptr<iteration_space_base> linear_axis_space::do_clone() const
-{
-  return std::make_unique<linear_axis_space>(*this);
-}
-
-zip_axis_space::zip_axis_space(std::vector<std::size_t> input_indices,
-                               std::vector<std::size_t> output_indices)
-    : iteration_space_base(std::move(input_indices), std::move(output_indices))
-{}
-
-zip_axis_space::~zip_axis_space() = default;
-
-detail::axis_space_iterator zip_axis_space::do_get_iterator(axes_info info) const
-{
-  std::vector<std::size_t> locs = m_output_indices;
-  auto update_func              = [=](std::size_t inc_index,
-                         std::vector<detail::axis_index> &indices) {
-    for (std::size_t i = 0; i < info.size(); ++i)
-    {
-      detail::axis_index temp = info[i];
-      temp.index              = inc_index;
-      indices[locs[i]]        = temp;
-    }
-  };
-
-  return detail::make_space_iterator(locs.size(), info[0].size, update_func);
-}
-
-std::size_t zip_axis_space::do_get_size(const axes_info &info) const
-{
-  return info[0].size;
-}
-
-std::size_t zip_axis_space::do_get_active_count(const axes_info &info) const
-{
-  return info[0].active_size;
-}
-
-std::unique_ptr<iteration_space_base> zip_axis_space::do_clone() const
-{
-  return std::make_unique<zip_axis_space>(*this);
-}
-
-user_axis_space::user_axis_space(std::vector<std::size_t> input_indices,
-                                 std::vector<std::size_t> output_indices)
-    : iteration_space_base(std::move(input_indices), std::move(output_indices))
-{}
-user_axis_space::~user_axis_space() = default;
-
 } // namespace nvbench
diff --git a/nvbench/linear_axis_space.cuh b/nvbench/linear_axis_space.cuh
new file mode 100644
index 00000000..603937bc
--- /dev/null
+++ b/nvbench/linear_axis_space.cuh
@@ -0,0 +1,37 @@
+/*
+ *  Copyright 2022 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <nvbench/axis_iteration_space.cuh>
+
+namespace nvbench
+{
+
+struct linear_axis_space final : iteration_space_base
+{
+  linear_axis_space(std::size_t in, std::size_t out);
+  ~linear_axis_space();
+
+  std::unique_ptr<iteration_space_base> do_clone() const override;
+  detail::axis_space_iterator do_get_iterator(axes_info info) const override;
+  std::size_t do_get_size(const axes_info &info) const override;
+  std::size_t do_get_active_count(const axes_info &info) const override;
+};
+
+} // namespace nvbench
diff --git a/nvbench/linear_axis_space.cxx b/nvbench/linear_axis_space.cxx
new file mode 100644
index 00000000..7e08065b
--- /dev/null
+++ b/nvbench/linear_axis_space.cxx
@@ -0,0 +1,60 @@
+/*
+ *  Copyright 2022 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include "linear_axis_space.cuh"
+
+#include <nvbench/type_axis.cuh>
+
+namespace nvbench
+{
+
+linear_axis_space::linear_axis_space(std::size_t in_index,
+                                     std::size_t out_index)
+    : iteration_space_base({in_index}, {out_index})
+{}
+
+linear_axis_space::~linear_axis_space() = default;
+
+detail::axis_space_iterator linear_axis_space::do_get_iterator(axes_info info) const
+{
+  std::size_t loc(m_output_indices[0]);
+  auto update_func = [=](std::size_t inc_index,
+                         std::vector<detail::axis_index> &indices) {
+    indices[loc]       = info[0];
+    indices[loc].index = inc_index;
+  };
+
+  return detail::make_space_iterator(1, info[0].size, update_func);
+}
+
+std::size_t linear_axis_space::do_get_size(const axes_info &info) const
+{
+  return info[0].size;
+}
+
+std::size_t linear_axis_space::do_get_active_count(const axes_info &info) const
+{
+  return info[0].active_size;
+}
+
+std::unique_ptr<iteration_space_base> linear_axis_space::do_clone() const
+{
+  return std::make_unique<linear_axis_space>(*this);
+}
+
+} // namespace nvbench
diff --git a/nvbench/user_axis_space.cuh b/nvbench/user_axis_space.cuh
new file mode 100644
index 00000000..a4df3b08
--- /dev/null
+++ b/nvbench/user_axis_space.cuh
@@ -0,0 +1,37 @@
+/*
+ *  Copyright 2022 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <nvbench/axis_iteration_space.cuh>
+
+namespace nvbench
+{
+
+struct user_axis_space : iteration_space_base
+{
+  user_axis_space(std::vector<std::size_t> input_indices,
+                  std::vector<std::size_t> output_indices);
+  ~user_axis_space();
+};
+
+using make_user_space_signature =
+  std::unique_ptr<iteration_space_base>(std::vector<std::size_t> input_indices,
+                                   std::vector<std::size_t> output_indices);
+
+} // namespace nvbench
diff --git a/nvbench/user_axis_space.cxx b/nvbench/user_axis_space.cxx
new file mode 100644
index 00000000..31769070
--- /dev/null
+++ b/nvbench/user_axis_space.cxx
@@ -0,0 +1,32 @@
+/*
+ *  Copyright 2022 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include "user_axis_space.cuh"
+
+#include <nvbench/type_axis.cuh>
+
+namespace nvbench
+{
+
+user_axis_space::user_axis_space(std::vector<std::size_t> input_indices,
+                                 std::vector<std::size_t> output_indices)
+    : iteration_space_base(std::move(input_indices), std::move(output_indices))
+{}
+user_axis_space::~user_axis_space() = default;
+
+} // namespace nvbench
diff --git a/nvbench/zip_axis_space.cuh b/nvbench/zip_axis_space.cuh
new file mode 100644
index 00000000..94aa1af4
--- /dev/null
+++ b/nvbench/zip_axis_space.cuh
@@ -0,0 +1,38 @@
+/*
+ *  Copyright 2022 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <nvbench/axis_iteration_space.cuh>
+
+namespace nvbench
+{
+
+struct zip_axis_space final : iteration_space_base
+{
+  zip_axis_space(std::vector<std::size_t> input_indices,
+      std::vector<std::size_t> output_indices);
+  ~zip_axis_space();
+
+  std::unique_ptr<iteration_space_base> do_clone() const override;
+  detail::axis_space_iterator do_get_iterator(axes_info info) const override;
+  std::size_t do_get_size(const axes_info &info) const override;
+  std::size_t do_get_active_count(const axes_info &info) const override;
+};
+
+} // namespace nvbench
diff --git a/nvbench/zip_axis_space.cxx b/nvbench/zip_axis_space.cxx
new file mode 100644
index 00000000..6f2edbd7
--- /dev/null
+++ b/nvbench/zip_axis_space.cxx
@@ -0,0 +1,64 @@
+/*
+ *  Copyright 2022 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include "zip_axis_space.cuh"
+
+#include <nvbench/type_axis.cuh>
+
+namespace nvbench
+{
+
+zip_axis_space::zip_axis_space(std::vector<std::size_t> input_indices,
+                               std::vector<std::size_t> output_indices)
+    : iteration_space_base(std::move(input_indices), std::move(output_indices))
+{}
+
+zip_axis_space::~zip_axis_space() = default;
+
+detail::axis_space_iterator zip_axis_space::do_get_iterator(axes_info info) const
+{
+  std::vector<std::size_t> locs = m_output_indices;
+  auto update_func              = [=](std::size_t inc_index,
+                         std::vector<detail::axis_index> &indices) {
+    for (std::size_t i = 0; i < info.size(); ++i)
+    {
+      detail::axis_index temp = info[i];
+      temp.index              = inc_index;
+      indices[locs[i]]        = temp;
+    }
+  };
+
+  return detail::make_space_iterator(locs.size(), info[0].size, update_func);
+}
+
+std::size_t zip_axis_space::do_get_size(const axes_info &info) const
+{
+  return info[0].size;
+}
+
+std::size_t zip_axis_space::do_get_active_count(const axes_info &info) const
+{
+  return info[0].active_size;
+}
+
+std::unique_ptr<iteration_space_base> zip_axis_space::do_clone() const
+{
+  return std::make_unique<zip_axis_space>(*this);
+}
+
+} // namespace nvbench

From 9aa2feb204019b15695fdba25f148a516eee4a1c Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Wed, 13 Apr 2022 17:17:27 -0400
Subject: [PATCH 19/41] Add iteration_space_base docs

---
 nvbench/axis_iteration_space.cuh | 35 ++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/nvbench/axis_iteration_space.cuh b/nvbench/axis_iteration_space.cuh
index a32b0540..2cc41732 100644
--- a/nvbench/axis_iteration_space.cuh
+++ b/nvbench/axis_iteration_space.cuh
@@ -27,7 +27,10 @@ namespace nvbench
  * Base class for all axi and axes iteration spaces.
  *
  * If we consider an axi to be a container of values, iteration_spaces
- * would be the different types of iterators supported by that container.
+ * would be how we can create iterators over that container.
+ *
+ * Construction of the iterators needs to be deferred, to execution
+ * as the axes can change, therefore this class.........
  *
  * With that in mind we get the following mapping:
  * * linear_axis_space is equivalant to a forward iterator.
@@ -48,7 +51,7 @@ struct iteration_space_base
   using UpdateSignature = nvbench::detail::axis_space_iterator::UpdateSignature;
 
   /*!
-   * Construct a new iteration_space_base
+   * Construct a new derived iteration_space
    *
    * @param[input_indices]
    * @param[output_indices]
@@ -62,26 +65,46 @@ struct iteration_space_base
   clone_as_linear() const;
 
   /*!
-   * Construct a new iteration_space_base
+   * Returns the iterator over the @a axis provided
+   *
+   * @param[axes]
    *
    */
   [[nodiscard]] detail::axis_space_iterator
   get_iterator(const axes_type &axes) const;
 
   /*!
-   * Construct a new iteration_space_base
+   * Returns the number of active and inactive elements the iterator will have
+   * when executed over @a axes
    *
+   * Note:
+   *  Type Axis support inactive elements
    */
   [[nodiscard]] std::size_t get_size(const axes_type &axes) const;
 
   /*!
-   * Construct a new iteration_space_base
+   * Returns the number of active elements the iterator will over when
+   * executed over @a axes
    *
+   * Note:
+   *  Type Axis support inactive elements
    */
   [[nodiscard]] std::size_t get_active_count(const axes_type &axes) const;
 
   /*!
-   * Construct a new iteration_space_base
+   * Returns if this space was constructed with the input index specified
+   * by @a input_index.
+   *
+   * The `nvbench::axes_metadata` stores all axes in a std::vector. To represent
+   * which axes each space is 'over' we store those indices. We don't store
+   * the pointers or names for the following reasons:
+   *
+   * * The names of an axis can change after being added. The `nvbench::axes_metadata`
+   * is not aware of the name change, and can't inform this class of it.
+   *
+   * * The `nvbench::axes_metadata` can be deep copied, which would invalidate
+   * any pointers held by this class. By holding onto the index we remove the need
+   * to do any form of fixup on deep copies of `nvbench::axes_metadata`.
    *
    */
   [[nodiscard]] bool contains(std::size_t input_index) const;

From 06a4c8fd32b5f128b2ef21ee2fb8e6a23e16653e Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Wed, 13 Apr 2022 17:18:30 -0400
Subject: [PATCH 20/41] rename files holding iteration_space_base to match name
 of class

---
 nvbench/CMakeLists.txt                                         | 2 +-
 nvbench/axes_metadata.cuh                                      | 2 +-
 nvbench/{axis_iteration_space.cuh => iteration_space_base.cuh} | 0
 nvbench/{axis_iteration_space.cxx => iteration_space_base.cxx} | 2 +-
 nvbench/linear_axis_space.cuh                                  | 2 +-
 nvbench/user_axis_space.cuh                                    | 2 +-
 nvbench/zip_axis_space.cuh                                     | 2 +-
 7 files changed, 6 insertions(+), 6 deletions(-)
 rename nvbench/{axis_iteration_space.cuh => iteration_space_base.cuh} (100%)
 rename nvbench/{axis_iteration_space.cxx => iteration_space_base.cxx} (98%)

diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt
index d29b1451..3fa320c4 100644
--- a/nvbench/CMakeLists.txt
+++ b/nvbench/CMakeLists.txt
@@ -1,7 +1,7 @@
 set(srcs
   axes_metadata.cxx
   axis_base.cxx
-  axis_iteration_space.cxx
+  iteration_space_base.cxx
   benchmark_base.cxx
   benchmark_manager.cxx
   blocking_kernel.cu
diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index 4f04bb04..68932ab4 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <nvbench/axis_iteration_space.cuh>
+#include <nvbench/iteration_space_base.cuh>
 #include <nvbench/float64_axis.cuh>
 #include <nvbench/int64_axis.cuh>
 #include <nvbench/linear_axis_space.cuh>
diff --git a/nvbench/axis_iteration_space.cuh b/nvbench/iteration_space_base.cuh
similarity index 100%
rename from nvbench/axis_iteration_space.cuh
rename to nvbench/iteration_space_base.cuh
diff --git a/nvbench/axis_iteration_space.cxx b/nvbench/iteration_space_base.cxx
similarity index 98%
rename from nvbench/axis_iteration_space.cxx
rename to nvbench/iteration_space_base.cxx
index 12010ca3..ad6b2154 100644
--- a/nvbench/axis_iteration_space.cxx
+++ b/nvbench/iteration_space_base.cxx
@@ -16,7 +16,7 @@
  *  limitations under the License.
  */
 
-#include "axis_iteration_space.cuh"
+#include "iteration_space_base.cuh"
 
 #include <nvbench/type_axis.cuh>
 #include <nvbench/linear_axis_space.cuh>
diff --git a/nvbench/linear_axis_space.cuh b/nvbench/linear_axis_space.cuh
index 603937bc..31b6e8c5 100644
--- a/nvbench/linear_axis_space.cuh
+++ b/nvbench/linear_axis_space.cuh
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <nvbench/axis_iteration_space.cuh>
+#include <nvbench/iteration_space_base.cuh>
 
 namespace nvbench
 {
diff --git a/nvbench/user_axis_space.cuh b/nvbench/user_axis_space.cuh
index a4df3b08..15e77476 100644
--- a/nvbench/user_axis_space.cuh
+++ b/nvbench/user_axis_space.cuh
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <nvbench/axis_iteration_space.cuh>
+#include <nvbench/iteration_space_base.cuh>
 
 namespace nvbench
 {
diff --git a/nvbench/zip_axis_space.cuh b/nvbench/zip_axis_space.cuh
index 94aa1af4..0b7800f8 100644
--- a/nvbench/zip_axis_space.cuh
+++ b/nvbench/zip_axis_space.cuh
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <nvbench/axis_iteration_space.cuh>
+#include <nvbench/iteration_space_base.cuh>
 
 namespace nvbench
 {

From 454d1bf6977feb1ed48322fb81c2673bcb549841 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 2 May 2022 11:28:27 -0400
Subject: [PATCH 21/41] Add more docs

---
 nvbench/iteration_space_base.cuh | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/nvbench/iteration_space_base.cuh b/nvbench/iteration_space_base.cuh
index 2cc41732..d6ac1b83 100644
--- a/nvbench/iteration_space_base.cuh
+++ b/nvbench/iteration_space_base.cuh
@@ -29,9 +29,6 @@ namespace nvbench
  * If we consider an axi to be a container of values, iteration_spaces
  * would be how we can create iterators over that container.
  *
- * Construction of the iterators needs to be deferred, to execution
- * as the axes can change, therefore this class.........
- *
  * With that in mind we get the following mapping:
  * * linear_axis_space is equivalant to a forward iterator.
  *
@@ -39,6 +36,9 @@ namespace nvbench
  *
  * * user_axis_space is equivalant to a transform iterator.
  *
+ * We don't immediately construct the iterators as the active elements,
+ * name, etc can be changed before execution. This class allows for
+ * the deferred iterator creation while keeping the meta data insyc.
  *
  */
 struct iteration_space_base
@@ -61,6 +61,13 @@ struct iteration_space_base
   virtual ~iteration_space_base();
 
   [[nodiscard]] std::unique_ptr<iteration_space_base> clone() const;
+
+   /*!
+   * Returns a vector of linear spaces one for each axi held.
+   * This is required when a iteration_space is removed as we need
+   * to restore all the assoicated axes to default.
+   *
+   */
   [[nodiscard]] std::vector<std::unique_ptr<iteration_space_base>>
   clone_as_linear() const;
 

From 8af9453dbe0d092debe777023aae6ab85533b8de Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 2 May 2022 13:00:09 -0400
Subject: [PATCH 22/41] Add more docs

---
 nvbench/linear_axis_space.cuh |  6 +++++
 nvbench/user_axis_space.cuh   | 44 +++++++++++++++++++++++++++++++++++
 nvbench/zip_axis_space.cuh    | 12 ++++++++++
 3 files changed, 62 insertions(+)

diff --git a/nvbench/linear_axis_space.cuh b/nvbench/linear_axis_space.cuh
index 31b6e8c5..d3593d53 100644
--- a/nvbench/linear_axis_space.cuh
+++ b/nvbench/linear_axis_space.cuh
@@ -23,6 +23,12 @@
 namespace nvbench
 {
 
+/*!
+ * Provides linear forward iteration over a single axi
+ *
+ * The default for all axi added to a benchmark
+ *
+ */
 struct linear_axis_space final : iteration_space_base
 {
   linear_axis_space(std::size_t in, std::size_t out);
diff --git a/nvbench/user_axis_space.cuh b/nvbench/user_axis_space.cuh
index 15e77476..236966c2 100644
--- a/nvbench/user_axis_space.cuh
+++ b/nvbench/user_axis_space.cuh
@@ -23,6 +23,50 @@
 namespace nvbench
 {
 
+/*!
+ * Provides user defined iteration over multiple axes
+ *
+ * Consider two axi with the following values:
+ * { 0, 1, 2, 3, 4, 5 }
+ * { 0, 1, 2, 3, 4, 5 }
+ *
+ * If we wanted to provide an axis space that skipped every third value
+ * We would implement it like this:
+ *
+ * struct every_third final : nvbench::user_axis_space
+ * {
+ *   every_third(std::vector<std::size_t> input_indices,
+ *               std::vector<std::size_t> output_indices)
+ *       : nvbench::user_axis_space(std::move(input_indices),
+ *                                  std::move(output_indices))
+ *   {}
+ *
+ *   nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
+ *   {
+ *     // our increment function
+ *     auto adv_func = [&, info](std::size_t &inc_index, std::size_t len) -> bool {
+ *       inc_index += 3;
+ *       return inc_index >= len;
+ *     };
+ *
+ *     // our update function
+ *     std::vector<std::size_t> locs = m_output_indices;
+ *     auto update_func              = [=](std::size_t inc_index,
+ *                            std::vector<detail::axis_index> &indices) {
+ *       for (std::size_t i = 0; i < info.size(); ++i)
+ *       {
+ *         detail::axis_index temp = info[i];
+ *         temp.index              = inc_index;
+ *         indices[locs[i]]        = temp;
+ *       }
+ *     };
+ *    return detail::make_space_iterator(locs.size(), (info[0].size/3), adv_func, update_func);
+ *   }
+ *
+ *   std::size_t do_get_size(const axes_info &info) const { return (info[0].size/3); }
+ *   ...
+ *
+ */
 struct user_axis_space : iteration_space_base
 {
   user_axis_space(std::vector<std::size_t> input_indices,
diff --git a/nvbench/zip_axis_space.cuh b/nvbench/zip_axis_space.cuh
index 0b7800f8..9f041428 100644
--- a/nvbench/zip_axis_space.cuh
+++ b/nvbench/zip_axis_space.cuh
@@ -23,6 +23,18 @@
 namespace nvbench
 {
 
+/*!
+ * Provides linear forward iteration over multiple axes in lockstep
+ *
+ * Consider two axi with the following values:
+ * { 0, 1, 2, 3, 4, 5 }
+ * { 0, 1, 2, 3, 4, 5 }
+ *
+ * Using a zip_axis_space over these two axi will generate 6 values
+ * ( {0,0}, {1,1}, {2,2}, ... ) instead of the default 36 values
+ * ( {0,0}, {0,1}, {0,2}, ...).
+ *
+ */
 struct zip_axis_space final : iteration_space_base
 {
   zip_axis_space(std::vector<std::size_t> input_indices,

From 6fd0883c8a310ba1eff5f01c3b1182fe79d84d88 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 23 Aug 2022 14:34:50 -0400
Subject: [PATCH 23/41] drop usage of std::tie in nvbench/axes_metadata.cxx

Co-authored-by: Jake Hemstad <jhemstad@nvidia.com>
---
 nvbench/axes_metadata.cxx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx
index f78c4789..69d1b148 100644
--- a/nvbench/axes_metadata.cxx
+++ b/nvbench/axes_metadata.cxx
@@ -173,7 +173,7 @@ get_axes_indices(std::size_t type_axe_count,
   {
     out -= type_axe_count;
   }
-  return std::tie(input_indices, output_indices);
+  return {std::move(input_indices), std::move(output_indices)};
 }
 
 void reset_iteration_space(

From 99395df1364f9ac32613cdd62a27a6a52484b184 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 23 Aug 2022 14:46:03 -0400
Subject: [PATCH 24/41] Update to cross reference docs

---
 docs/benchmarks.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index 19b98eb7..1db6aa8d 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -454,8 +454,8 @@ NVBENCH_BENCH_TYPES(my_benchmark,
 ```
 
 For large configuration spaces like this, pruning some of the less useful
-combinations using the techniques described in the "Zipped/Tied Iteration of Value Axes"
-or "Skip Uninteresting / Invalid Benchmarks" section can help immensely with
+combinations using the techniques described in the [Zipped/Tied Iteration of Value Axes](#zipped-iteration-of-value-axes)
+or [Skip Uninteresting / Invalid Benchmarks](#skip-uninteresting--invalid-benchmarks) section can help immensely with
 keeping compile / run times manageable.
 
 Splitting a single large configuration space into multiple, more focused

From 5ebe7fe8dfb968e53ad2db09ccfde476a053d74b Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Tue, 23 Aug 2022 15:08:42 -0400
Subject: [PATCH 25/41] Update docs around iteration_space_base constructor

---
 nvbench/iteration_space_base.cuh | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/nvbench/iteration_space_base.cuh b/nvbench/iteration_space_base.cuh
index d6ac1b83..c74d2218 100644
--- a/nvbench/iteration_space_base.cuh
+++ b/nvbench/iteration_space_base.cuh
@@ -53,8 +53,11 @@ struct iteration_space_base
   /*!
    * Construct a new derived iteration_space
    *
-   * @param[input_indices]
-   * @param[output_indices]
+   * The input_indices and output_indices combine together to allow the iteration space to know
+   * what axi they should query from axes_metadata and where each of those map to in the output
+   * iteration space.
+   * @param[input_indices] recorded indices of each axi from the axes metadata value space
+   * @param[output_indices] requested indices of each axi for output when iterating the type+value space
    */
   iteration_space_base(std::vector<std::size_t> input_indices,
                        std::vector<std::size_t> output_indices);

From dc7e2b789d575be6638c5b2188d1904c538c97b9 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 29 Aug 2022 09:42:38 -0400
Subject: [PATCH 26/41] Drop ability to zip axii after construction

---
 docs/benchmarks.md               |   5 +-
 nvbench/axes_metadata.cuh        |  29 ++++---
 nvbench/axes_metadata.cxx        | 131 ++++++-------------------------
 nvbench/benchmark_base.cuh       |  32 --------
 nvbench/iteration_space_base.cuh |  41 +++-------
 nvbench/iteration_space_base.cxx |  26 ------
 testing/axes_iteration_space.cu  | 109 +++----------------------
 7 files changed, 63 insertions(+), 310 deletions(-)

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index 1db6aa8d..0c2f0c42 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -259,9 +259,8 @@ using input_types = nvbench::type_list<char, int, unsigned int>;
 using output_types = nvbench::type_list<float, double>;
 NVBENCH_BENCH_TYPES(benchmark, NVBENCH_TYPE_AXES(input_types, output_types))
   .set_type_axes_names({"InputType", "OutputType"})
-  .add_int64_axis("NumInputs", {1000, 10000, 100000, 200000, 200000, 200000})
-  .add_float64_axis("Quality", {0.05, 0.1, 0.25, 0.5, 0.75, 1.})
-  .zip_axes({"NumInputs", "Quality"});
+  .add_zip_axes(nvbench::int64_axis{"NumInputs", {1000, 10000, 100000, 200000, 200000, 200000}},
+                nvbench::float64_axis{"Quality", {0.05, 0.1, 0.25, 0.5, 0.75, 1.}});
 ```
 
 Zipping these two axes reduces the total combinations from 216 to 36, reducing the
diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index 68932ab4..103ab9a6 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -18,9 +18,9 @@
 
 #pragma once
 
-#include <nvbench/iteration_space_base.cuh>
 #include <nvbench/float64_axis.cuh>
 #include <nvbench/int64_axis.cuh>
+#include <nvbench/iteration_space_base.cuh>
 #include <nvbench/linear_axis_space.cuh>
 #include <nvbench/string_axis.cuh>
 #include <nvbench/type_axis.cuh>
@@ -65,13 +65,16 @@ struct axes_metadata
 
   void add_string_axis(std::string name, std::vector<std::string> data);
 
-  void add_axis(const axis_base& axis);
+  void add_axis(const axis_base &axis);
 
   template <typename... Args>
   void add_zip_axes(Args &&...args)
   {
-    (this->add_axis(std::forward<Args>(args)), ...);
-    this->zip_axes({args.get_name()...});
+    const std::size_t start = this->m_axes.size();
+    const std::size_t count = sizeof...(Args);
+    // (this->add_axis(std::forward<Args>(args)), ...);
+    (m_axes.push_back(args.clone()), ...);
+    this->add_zip_space(start, count);
   }
 
   template <typename... Args>
@@ -79,16 +82,12 @@ struct axes_metadata
     std::function<nvbench::make_user_space_signature> make,
     Args &&...args)
   {
-    (this->add_axis(std::forward<Args>(args)), ...);
-    this->user_iteration_axes(std::move(make), {args.get_name()...});
+    const std::size_t start = this->m_axes.size();
+    const std::size_t count = sizeof...(Args);
+    (m_axes.push_back(args.clone()), ...);
+    this->add_user_iteration_space(std::move(make), start, count);
   }
 
-  void zip_axes(std::vector<std::string> names);
-
-  void
-  user_iteration_axes(std::function<nvbench::make_user_space_signature> make,
-                      std::vector<std::string> names);
-
   [[nodiscard]] const iteration_space_type &get_type_iteration_space() const
   {
     return m_type_space;
@@ -136,6 +135,12 @@ private:
   std::size_t m_type_axe_count = 0;
   iteration_space_type m_type_space;
   iteration_space_type m_value_space;
+
+  void add_zip_space(std::size_t first_index, std::size_t count);
+  void add_user_iteration_space(
+    std::function<nvbench::make_user_space_signature> make,
+    std::size_t first_index,
+    std::size_t count);
 };
 
 template <typename... TypeAxes>
diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx
index 69d1b148..e0a0312e 100644
--- a/nvbench/axes_metadata.cxx
+++ b/nvbench/axes_metadata.cxx
@@ -24,6 +24,7 @@
 #include <fmt/ranges.h>
 
 #include <algorithm>
+#include <numeric>
 #include <stdexcept>
 #include <unordered_set>
 
@@ -117,23 +118,23 @@ catch (std::exception &e)
 void axes_metadata::add_float64_axis(std::string name,
                                      std::vector<nvbench::float64_t> data)
 {
-  this->add_axis(nvbench::float64_axis{name,data});
+  this->add_axis(nvbench::float64_axis{name, data});
 }
 
 void axes_metadata::add_int64_axis(std::string name,
                                    std::vector<nvbench::int64_t> data,
                                    nvbench::int64_axis_flags flags)
 {
-  this->add_axis(nvbench::int64_axis{name,data,flags});
+  this->add_axis(nvbench::int64_axis{name, data, flags});
 }
 
 void axes_metadata::add_string_axis(std::string name,
                                     std::vector<std::string> data)
 {
-  this->add_axis(nvbench::string_axis{name,data});
+  this->add_axis(nvbench::string_axis{name, data});
 }
 
-void axes_metadata::add_axis(const axis_base& axis)
+void axes_metadata::add_axis(const axis_base &axis)
 {
   m_value_space.push_back(
     std::make_unique<linear_axis_space>(m_axes.size(),
@@ -141,104 +142,21 @@ void axes_metadata::add_axis(const axis_base& axis)
   m_axes.push_back(axis.clone());
 }
 
-namespace
+void axes_metadata::add_zip_space(std::size_t first_index, std::size_t count)
 {
-std::tuple<std::vector<std::size_t>, std::vector<std::size_t>>
-get_axes_indices(std::size_t type_axe_count,
-                 const nvbench::axes_metadata::axes_type &axes,
-                 const std::vector<std::string> &names)
-{
-  std::vector<std::size_t> input_indices;
-  input_indices.reserve(names.size());
-  for (auto &n : names)
-  {
-    auto iter =
-      std::find_if(axes.cbegin(), axes.cend(), [&n](const auto &axis) {
-        return axis->get_name() == n;
-      });
-
-    // iter distance is input_indices
-    if (iter == axes.cend())
-    {
-      NVBENCH_THROW(std::runtime_error,
-                    "Unable to find the axes named ({}).",
-                    n);
-    }
-    auto index = std::distance(axes.cbegin(), iter);
-    input_indices.push_back(index);
-  }
-
-  std::vector<std::size_t> output_indices = input_indices;
-  for (auto &out : output_indices)
-  {
-    out -= type_axe_count;
-  }
-  return {std::move(input_indices), std::move(output_indices)};
-}
-
-void reset_iteration_space(
-  nvbench::axes_metadata::iteration_space_type &all_spaces,
-  const std::vector<std::size_t> &indices_to_remove)
-{
-  // 1. Find all spaces indices that
-  nvbench::axes_metadata::iteration_space_type reset_space;
-  nvbench::axes_metadata::iteration_space_type to_filter;
-  for (auto &space : all_spaces)
-  {
-    bool added = false;
-    for (auto &i : indices_to_remove)
-    {
-      if (space->contains(i))
-      {
-        // add each item back as linear_axis_space
-        auto as_linear = space->clone_as_linear();
-        to_filter.insert(to_filter.end(),
-                         std::make_move_iterator(as_linear.begin()),
-                         std::make_move_iterator(as_linear.end()));
-        added = true;
-        break;
-      }
-    }
-    if (!added)
-    {
-      // this space doesn't need to be removed
-      reset_space.push_back(std::move(space));
-    }
-  }
-
-  for (auto &iter : to_filter)
-  {
-    bool to_add = true;
-    for (auto &i : indices_to_remove)
-    {
-      if (iter->contains(i))
-      {
-        to_add = false;
-        break;
-      }
-    }
-    if (to_add)
-    {
-      reset_space.push_back(std::move(iter));
-      break;
-    }
-  }
-
-  all_spaces = std::move(reset_space);
-}
-} // namespace
-
-void axes_metadata::zip_axes(std::vector<std::string> names)
-{
-  NVBENCH_THROW_IF((names.size() < 2),
+  NVBENCH_THROW_IF((count < 2),
                    std::runtime_error,
-                   "At least two axi names ( {} provided ) need to be provided "
+                   "At least two axi ( {} provided ) need to be provided "
                    "when using zip_axes.",
-                   names.size());
+                   count);
 
   // compute the numeric indice for each name we have
-  auto [input_indices,
-        output_indices] = get_axes_indices(m_type_axe_count, m_axes, names);
+  std::vector<std::size_t> input_indices(count);
+  std::vector<std::size_t> output_indices(count);
+  std::iota(input_indices.begin(), input_indices.end(), first_index);
+  std::iota(input_indices.begin(),
+            input_indices.end(),
+            first_index - m_type_axe_count);
 
   const auto expected_size = m_axes[input_indices[0]]->get_size();
   for (auto i : input_indices)
@@ -255,22 +173,24 @@ void axes_metadata::zip_axes(std::vector<std::string> names)
                      expected_size);
   }
 
-  // remove any iteration spaces that have axes we need
-  reset_iteration_space(m_value_space, input_indices);
-
   // add the new tied iteration space
   auto tied = std::make_unique<zip_axis_space>(std::move(input_indices),
                                                std::move(output_indices));
   m_value_space.push_back(std::move(tied));
 }
 
-void axes_metadata::user_iteration_axes(
+void axes_metadata::add_user_iteration_space(
   std::function<nvbench::make_user_space_signature> make,
-  std::vector<std::string> names)
+  std::size_t first_index,
+  std::size_t count)
 {
   // compute the numeric indice for each name we have
-  auto [input_indices,
-        output_indices] = get_axes_indices(m_type_axe_count, m_axes, names);
+  std::vector<std::size_t> input_indices(count);
+  std::vector<std::size_t> output_indices(count);
+  std::iota(input_indices.begin(), input_indices.end(), first_index);
+  std::iota(input_indices.begin(),
+            input_indices.end(),
+            first_index - m_type_axe_count);
 
   for (auto i : input_indices)
   {
@@ -281,9 +201,6 @@ void axes_metadata::user_iteration_axes(
                      m_axes[i]->get_name());
   }
 
-  // remove any iteration spaces that have axes we need
-  reset_iteration_space(m_value_space, input_indices);
-
   auto user_func = make(std::move(input_indices), std::move(output_indices));
   m_value_space.push_back(std::move(user_func));
 }
diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh
index a8c03840..d35e1fa7 100644
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -280,38 +280,6 @@ struct benchmark_base
   /// @}
 
 protected:
-
-  /// Move existing Axis to being part of zip axis iteration space.
-  /// This will remove any existing iteration spaces that the named axis
-  /// are part of, while restoring all other axis in those spaces to
-  /// the default linear space
-  ///
-  /// This is meant to be used only by the option_parser
-  ///  @{
-  benchmark_base &zip_axes(std::vector<std::string> names)
-  {
-    m_axes.zip_axes(std::move(names));
-    return *this;
-  }
-  /// @}
-
-
-  /// Move existing Axis to being part of user axis iteration space.
-  /// This will remove any existing iteration spaces that the named axis
-  /// are part of, while restoring all other axis in those spaces to
-  /// the default linear space
-  ///
-  /// This is meant to be used only by the option_parser
-  ///  @{
-  benchmark_base &
-  user_iteration_axes(std::function<nvbench::make_user_space_signature> make,
-                      std::vector<std::string> names)
-  {
-    m_axes.user_iteration_axes(std::move(make), std::move(names));
-    return *this;
-  }
-  /// @}
-
   friend struct nvbench::runner_base;
 
   template <typename BenchmarkType>
diff --git a/nvbench/iteration_space_base.cuh b/nvbench/iteration_space_base.cuh
index c74d2218..607b98be 100644
--- a/nvbench/iteration_space_base.cuh
+++ b/nvbench/iteration_space_base.cuh
@@ -36,9 +36,17 @@ namespace nvbench
  *
  * * user_axis_space is equivalant to a transform iterator.
  *
- * We don't immediately construct the iterators as the active elements,
- * name, etc can be changed before execution. This class allows for
- * the deferred iterator creation while keeping the meta data insyc.
+ * The `nvbench::axes_metadata` stores all axes in a std::vector. To represent
+ * which axes each space is 'over' we store those indices. We don't store
+ * the pointers or names for the following reasons:
+ *
+ * * The names of an axis can change after being added. The `nvbench::axes_metadata`
+ * is not aware of the name change, and can't inform this class of it.
+ *
+ * * The `nvbench::axes_metadata` can be deep copied, which would invalidate
+ * any pointers held by this class. By holding onto the index we remove the need
+ * to do any form of fixup on deep copies of `nvbench::axes_metadata`.
+ *
  *
  */
 struct iteration_space_base
@@ -65,15 +73,6 @@ struct iteration_space_base
 
   [[nodiscard]] std::unique_ptr<iteration_space_base> clone() const;
 
-   /*!
-   * Returns a vector of linear spaces one for each axi held.
-   * This is required when a iteration_space is removed as we need
-   * to restore all the assoicated axes to default.
-   *
-   */
-  [[nodiscard]] std::vector<std::unique_ptr<iteration_space_base>>
-  clone_as_linear() const;
-
   /*!
    * Returns the iterator over the @a axis provided
    *
@@ -101,24 +100,6 @@ struct iteration_space_base
    */
   [[nodiscard]] std::size_t get_active_count(const axes_type &axes) const;
 
-  /*!
-   * Returns if this space was constructed with the input index specified
-   * by @a input_index.
-   *
-   * The `nvbench::axes_metadata` stores all axes in a std::vector. To represent
-   * which axes each space is 'over' we store those indices. We don't store
-   * the pointers or names for the following reasons:
-   *
-   * * The names of an axis can change after being added. The `nvbench::axes_metadata`
-   * is not aware of the name change, and can't inform this class of it.
-   *
-   * * The `nvbench::axes_metadata` can be deep copied, which would invalidate
-   * any pointers held by this class. By holding onto the index we remove the need
-   * to do any form of fixup on deep copies of `nvbench::axes_metadata`.
-   *
-   */
-  [[nodiscard]] bool contains(std::size_t input_index) const;
-
 protected:
   std::vector<std::size_t> m_input_indices;
   std::vector<std::size_t> m_output_indices;
diff --git a/nvbench/iteration_space_base.cxx b/nvbench/iteration_space_base.cxx
index ad6b2154..0ecd6e3d 100644
--- a/nvbench/iteration_space_base.cxx
+++ b/nvbench/iteration_space_base.cxx
@@ -19,7 +19,6 @@
 #include "iteration_space_base.cuh"
 
 #include <nvbench/type_axis.cuh>
-#include <nvbench/linear_axis_space.cuh>
 
 namespace nvbench
 {
@@ -38,22 +37,6 @@ std::unique_ptr<iteration_space_base> iteration_space_base::clone() const
   return clone;
 }
 
-std::vector<std::unique_ptr<iteration_space_base>>
-iteration_space_base::clone_as_linear() const
-{
-  std::vector<std::unique_ptr<iteration_space_base>> clones;
-  clones.reserve(m_input_indices.size());
-
-  for (std::size_t i = 0; i < m_input_indices.size(); ++i)
-  {
-    clones.push_back(
-      std::make_unique<nvbench::linear_axis_space>(m_input_indices[i],
-                                                   m_output_indices[i]));
-  }
-
-  return clones;
-}
-
 namespace
 {
 nvbench::iteration_space_base::axes_info
@@ -85,13 +68,4 @@ std::size_t iteration_space_base::get_active_count(const axes_type &axes) const
   return this->do_get_active_count(get_axes_info(axes, m_input_indices));
 }
 
-bool iteration_space_base::contains(std::size_t in_index) const
-{
-  auto iter =
-    std::find_if(m_input_indices.cbegin(),
-                 m_input_indices.cend(),
-                 [&in_index](const auto &i) { return i == in_index; });
-  return iter != m_input_indices.end();
-}
-
 } // namespace nvbench
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index eb50af77..4ac7a95e 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -74,9 +74,6 @@ struct rezippable_benchmark final : public nvbench::benchmark_base
       : benchmark_base(type_axes{})
   {}
 
-  using nvbench::benchmark_base::zip_axes;
-  using nvbench::benchmark_base::user_iteration_axes;
-
 private:
   std::unique_ptr<benchmark_base> do_clone() const final
   {
@@ -125,20 +122,7 @@ void test_zip_axes()
              bench.get_config_count());
 }
 
-void test_tie_invalid_names()
-{
-  using benchmark_type = rezippable_benchmark<no_op_callable>;
-  benchmark_type bench;
-  bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
-  bench.add_int64_axis("I64 Axis", {1, 3, 2});
-
-  ASSERT_THROWS_ANY(bench.zip_axes({"F32 Axis", "I64 Axis"}));
-  ASSERT_THROWS_ANY(bench.zip_axes({"F32 Axis"}));
-  ASSERT_THROWS_ANY(bench.zip_axes({""}));
-  ASSERT_THROWS_ANY(bench.zip_axes(std::vector<std::string>()));
-}
-
-void test_tie_unequal_length()
+void test_zip_unequal_length()
 {
   using benchmark_type = nvbench::benchmark<no_op_callable>;
   benchmark_type bench;
@@ -148,79 +132,7 @@ void test_tie_unequal_length()
                        nvbench::int64_axis("I64 Axis", {1, 3, 2})));
 }
 
-void test_tie_type_axi()
-{
-  using benchmark_type = rezippable_benchmark<
-    template_no_op_callable,
-    nvbench::type_list<nvbench::type_list<nvbench::int8_t>,
-                       nvbench::type_list<nvbench::float32_t>,
-                       nvbench::type_list<bool>>>;
-  benchmark_type bench;
-  bench.set_type_axes_names({"Integer", "Float", "Other"});
-  bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
-  bench.add_int64_axis("I64 Axis", {1, 3, 2});
-
-  ASSERT_THROWS_ANY(bench.zip_axes({"F64 Axis", "Float"}));
-}
-
-void test_rezip_axes()
-{
-  using benchmark_type = rezippable_benchmark<no_op_callable>;
-  benchmark_type bench;
-  bench.add_int64_axis("IAxis_A", {1, 3, 2, 4, 5});
-  bench.add_int64_axis("IAxis_B", {1, 3, 2, 4, 5});
-  bench.add_float64_axis("FAxis_5", {0., .1, .25, .5, 1.});
-  bench.add_float64_axis("FAxis_2",
-                         {
-                           0.,
-                           .1,
-                         });
-
-  bench.zip_axes({"FAxis_5", "IAxis_A"});
-  bench.zip_axes({"IAxis_B", "FAxis_5", "IAxis_A"}); // re-tie
-
-  ASSERT_MSG(bench.get_config_count() == 10 * bench.get_devices().size(),
-             "Got {}",
-             bench.get_config_count());
-
-  bench.zip_axes({"FAxis_5", "IAxis_A"});
-  ASSERT_MSG(bench.get_config_count() == 50 * bench.get_devices().size(),
-             "Got {}",
-             bench.get_config_count());
-}
-
-void test_rezip_axes2()
-{
-  using benchmark_type = rezippable_benchmark<no_op_callable>;
-  benchmark_type bench;
-  bench.add_int64_axis("IAxis_A", {1, 3, 2, 4, 5});
-  bench.add_int64_axis("IAxis_B", {1, 3, 2, 4, 5});
-  bench.add_int64_axis("IAxis_C", {1, 3, 2, 4, 5});
-  bench.add_float64_axis("FAxis_1", {0., .1, .25, .5, 1.});
-  bench.add_float64_axis("FAxis_2", {0., .1, .25, .5, 1.});
-  bench.add_float64_axis("FAxis_3",
-                         {
-                           0.,
-                           .1,
-                         });
-
-  bench.zip_axes({"IAxis_A", "IAxis_B", "IAxis_C"});
-  bench.zip_axes({"FAxis_1", "FAxis_2"});
-  bench.zip_axes(
-    {"IAxis_A", "IAxis_B", "IAxis_C", "FAxis_1", "FAxis_2"}); // re-tie
-
-  ASSERT_MSG(bench.get_config_count() == 10 * bench.get_devices().size(),
-             "Got {}",
-             bench.get_config_count());
-
-  bench.zip_axes({"IAxis_A", "IAxis_B", "IAxis_C"});
-  bench.zip_axes({"FAxis_1", "FAxis_2"});
-  ASSERT_MSG(bench.get_config_count() == 50 * bench.get_devices().size(),
-             "Got {}",
-             bench.get_config_count());
-}
-
-void test_tie_clone()
+void test_zip_clone()
 {
   using benchmark_type = nvbench::benchmark<no_op_callable>;
   benchmark_type bench;
@@ -336,13 +248,12 @@ void test_user_axes()
 {
   using benchmark_type = rezippable_benchmark<no_op_callable>;
   benchmark_type bench;
-  bench.add_float64_axis("F64 Axis", {0., .1, .25, .5, 1.});
-  bench.add_int64_axis("I64 Axis", {1, 3, 2, 4, 5});
-  bench.user_iteration_axes(
+  bench.add_user_iteration_axes(
     [](auto... args) -> std::unique_ptr<nvbench::iteration_space_base> {
       return std::make_unique<under_diag>(args...);
     },
-    {"F64 Axis", "I64 Axis"});
+    nvbench::float64_axis("F64 Axis", {0., .1, .25, .5, 1.}),
+    nvbench::int64_axis("I64 Axis", {1, 3, 2, 4, 5}));
 
   ASSERT_MSG(bench.get_config_count() == 15 * bench.get_devices().size(),
              "Got {}",
@@ -352,10 +263,8 @@ void test_user_axes()
 int main()
 {
   test_zip_axes();
-  test_tie_invalid_names();
-  test_tie_unequal_length();
-  test_tie_type_axi();
-  test_rezip_axes();
-  test_rezip_axes2();
-  test_tie_clone();
+  test_zip_unequal_length();
+  test_zip_clone();
+
+  test_user_axes();
 }

From 5708e6cd927e2de97447e83e1c3ceda25711368a Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Wed, 31 Aug 2022 11:06:40 -0400
Subject: [PATCH 27/41] remove need for make_space_iterator

---
 examples/custom_iteration_spaces.cu |  4 +--
 nvbench/detail/axes_iterator.cuh    | 46 +++++++++++++----------------
 nvbench/linear_axis_space.cxx       |  4 +--
 nvbench/user_axis_space.cuh         |  2 +-
 nvbench/zip_axis_space.cxx          |  2 +-
 testing/axes_iteration_space.cu     |  2 +-
 6 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index 26be7610..828e9c49 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -130,7 +130,7 @@ struct under_diag final : nvbench::user_axis_space
       };
 
     const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
-    return nvbench::detail::make_space_iterator(2,
+    return nvbench::detail::axis_space_iterator(2,
                                                 iteration_length,
                                                 adv_func,
                                                 diag_under);
@@ -201,7 +201,7 @@ struct gauss final : nvbench::user_axis_space
       indices[locs[0]]                 = temp;
     };
 
-    return nvbench::detail::make_space_iterator(1,
+    return nvbench::detail::axis_space_iterator(1,
                                                 iteration_length,
                                                 gauss_func);
   }
diff --git a/nvbench/detail/axes_iterator.cuh b/nvbench/detail/axes_iterator.cuh
index ea13b19d..67ac1d50 100644
--- a/nvbench/detail/axes_iterator.cuh
+++ b/nvbench/detail/axes_iterator.cuh
@@ -60,6 +60,26 @@ struct axis_space_iterator
   using UpdateSignature  = void(std::size_t index,
                                std::vector<axis_index> &indices);
 
+  axis_space_iterator(
+    std::size_t axes_count,
+    std::size_t iter_count,
+    std::function<axis_space_iterator::AdvanceSignature> &&advance,
+    std::function<axis_space_iterator::UpdateSignature> &&update)
+      : m_number_of_axes(axes_count)
+      , m_iteration_size(iter_count)
+      , m_advance(std::move(advance))
+      , m_update(std::move(update))
+  {}
+
+  axis_space_iterator(
+    std::size_t axes_count,
+    std::size_t iter_count,
+    std::function<axis_space_iterator::UpdateSignature> &&update)
+      : m_number_of_axes(axes_count)
+      , m_iteration_size(iter_count)
+      , m_update(std::move(update))
+  {}
+
   [[nodiscard]] bool inc()
   {
     return this->m_advance(m_current_index, m_iteration_size);
@@ -83,31 +103,5 @@ private:
   std::size_t m_current_index = 0;
 };
 
-inline axis_space_iterator make_space_iterator(
-  std::size_t axes_count,
-  std::size_t iter_count,
-  std::function<axis_space_iterator::AdvanceSignature> &&advance,
-  std::function<axis_space_iterator::UpdateSignature> &&update)
-{
-  axis_space_iterator iter;
-  iter.m_number_of_axes = axes_count;
-  iter.m_iteration_size = iter_count;
-  iter.m_advance        = std::move(advance);
-  iter.m_update         = std::move(update);
-  return iter;
-}
-
-inline axis_space_iterator make_space_iterator(
-  std::size_t axes_count,
-  std::size_t iter_count,
-  std::function<axis_space_iterator::UpdateSignature> &&update)
-{
-  axis_space_iterator iter;
-  iter.m_number_of_axes = axes_count;
-  iter.m_iteration_size = iter_count;
-  iter.m_update         = std::move(update);
-  return iter;
-}
-
 } // namespace detail
 } // namespace nvbench
diff --git a/nvbench/linear_axis_space.cxx b/nvbench/linear_axis_space.cxx
index 7e08065b..38218c97 100644
--- a/nvbench/linear_axis_space.cxx
+++ b/nvbench/linear_axis_space.cxx
@@ -32,14 +32,14 @@ linear_axis_space::~linear_axis_space() = default;
 
 detail::axis_space_iterator linear_axis_space::do_get_iterator(axes_info info) const
 {
-  std::size_t loc(m_output_indices[0]);
+  std::size_t loc{m_output_indices[0]};
   auto update_func = [=](std::size_t inc_index,
                          std::vector<detail::axis_index> &indices) {
     indices[loc]       = info[0];
     indices[loc].index = inc_index;
   };
 
-  return detail::make_space_iterator(1, info[0].size, update_func);
+  return detail::axis_space_iterator(1, info[0].size, update_func);
 }
 
 std::size_t linear_axis_space::do_get_size(const axes_info &info) const
diff --git a/nvbench/user_axis_space.cuh b/nvbench/user_axis_space.cuh
index 236966c2..76662464 100644
--- a/nvbench/user_axis_space.cuh
+++ b/nvbench/user_axis_space.cuh
@@ -60,7 +60,7 @@ namespace nvbench
  *         indices[locs[i]]        = temp;
  *       }
  *     };
- *    return detail::make_space_iterator(locs.size(), (info[0].size/3), adv_func, update_func);
+ *    return detail::axis_space_iterator(locs.size(), (info[0].size/3), adv_func, update_func);
  *   }
  *
  *   std::size_t do_get_size(const axes_info &info) const { return (info[0].size/3); }
diff --git a/nvbench/zip_axis_space.cxx b/nvbench/zip_axis_space.cxx
index 6f2edbd7..4fe64cde 100644
--- a/nvbench/zip_axis_space.cxx
+++ b/nvbench/zip_axis_space.cxx
@@ -43,7 +43,7 @@ detail::axis_space_iterator zip_axis_space::do_get_iterator(axes_info info) cons
     }
   };
 
-  return detail::make_space_iterator(locs.size(), info[0].size, update_func);
+  return detail::axis_space_iterator(locs.size(), info[0].size, update_func);
 }
 
 std::size_t zip_axis_space::do_get_size(const axes_info &info) const
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index 4ac7a95e..73e1c23f 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -222,7 +222,7 @@ struct under_diag final : nvbench::user_axis_space
       };
 
     const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
-    return nvbench::detail::make_space_iterator(2,
+    return nvbench::detail::axis_space_iterator(2,
                                                 iteration_length,
                                                 adv_func,
                                                 diag_under);

From 3ad3d657916e2d573a7b49504c0ccf0d225efda7 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Wed, 31 Aug 2022 11:13:45 -0400
Subject: [PATCH 28/41] update axis_space_iterator to use same method names as
 state_iterator

---
 nvbench/detail/axes_iterator.cuh   | 2 +-
 nvbench/detail/state_generator.cxx | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/nvbench/detail/axes_iterator.cuh b/nvbench/detail/axes_iterator.cuh
index 67ac1d50..5259bfbd 100644
--- a/nvbench/detail/axes_iterator.cuh
+++ b/nvbench/detail/axes_iterator.cuh
@@ -80,7 +80,7 @@ struct axis_space_iterator
       , m_update(std::move(update))
   {}
 
-  [[nodiscard]] bool inc()
+  [[nodiscard]] bool next()
   {
     return this->m_advance(m_current_index, m_iteration_size);
   }
diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx
index 404a891d..fab3fe43 100644
--- a/nvbench/detail/state_generator.cxx
+++ b/nvbench/detail/state_generator.cxx
@@ -74,7 +74,7 @@ void state_iterator::next()
 
   for (auto &&space : this->m_space)
   {
-    auto rolled_over = space.inc();
+    auto rolled_over = space.next();
     if (rolled_over)
     {
       continue;

From c2bfc99f12f42d9e445446f3c9a7d7b91158083e Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Thu, 1 Sep 2022 09:13:34 -0400
Subject: [PATCH 29/41] remove need for output_indices

---
 examples/custom_iteration_spaces.cu | 32 ++++++++++++-----------------
 nvbench/axes_metadata.cuh           |  2 +-
 nvbench/axes_metadata.cxx           | 16 +++------------
 nvbench/detail/state_generator.cuh  |  2 +-
 nvbench/detail/state_generator.cxx  |  8 ++++++--
 nvbench/iteration_space_base.cuh    |  4 +---
 nvbench/iteration_space_base.cxx    |  4 +---
 nvbench/linear_axis_space.cuh       |  2 +-
 nvbench/linear_axis_space.cxx       | 10 ++++-----
 nvbench/user_axis_space.cuh         | 28 +++++++++++--------------
 nvbench/user_axis_space.cxx         |  5 ++---
 nvbench/zip_axis_space.cuh          |  3 +--
 nvbench/zip_axis_space.cxx          | 12 +++++------
 testing/axes_iteration_space.cu     | 19 ++++++++---------
 testing/state_generator.cu          | 10 ++++-----
 15 files changed, 64 insertions(+), 93 deletions(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index 828e9c49..d89ee9a8 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -74,7 +74,7 @@ NVBENCH_BENCH(copy_sweep_grid_shape)
 // Zipped iteration of BlockSize and NumBlocks axes.
 // Will generate only 4 invocations of copy_sweep_grid_shape
 NVBENCH_BENCH(copy_sweep_grid_shape)
-  .set_name("tied_copy_sweep_grid_shape")
+  .set_name("zipped_copy_sweep_grid_shape")
   .add_zip_axes(nvbench::int64_axis{"BlockSize", {32, 64, 128, 256}},
                 nvbench::int64_axis{"NumBlocks", {1024, 512, 256, 128}});
 
@@ -89,10 +89,8 @@ NVBENCH_BENCH(copy_sweep_grid_shape)
 //
 struct under_diag final : nvbench::user_axis_space
 {
-  under_diag(std::vector<std::size_t> input_indices,
-             std::vector<std::size_t> output_indices)
-      : nvbench::user_axis_space(std::move(input_indices),
-                                 std::move(output_indices))
+  under_diag(std::vector<std::size_t> input_indices)
+      : nvbench::user_axis_space(std::move(input_indices))
   {}
 
   mutable std::size_t x_pos   = 0;
@@ -116,17 +114,16 @@ struct under_diag final : nvbench::user_axis_space
     };
 
     // our update function
-    std::vector<std::size_t> locs = m_output_indices;
     auto diag_under =
-      [&, locs, info](std::size_t,
-                      std::vector<nvbench::detail::axis_index> &indices) {
+      [&, info](std::size_t,
+                std::vector<nvbench::detail::axis_index> &indices) {
         nvbench::detail::axis_index temp = info[0];
         temp.index                       = x_pos;
-        indices[locs[0]]                 = temp;
+        indices.push_back(std::move(temp));
 
-        temp             = info[1];
-        temp.index       = y_pos;
-        indices[locs[1]] = temp;
+        temp       = info[1];
+        temp.index = y_pos;
+        indices.push_back(std::move(temp));
       };
 
     const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
@@ -169,10 +166,8 @@ NVBENCH_BENCH(copy_sweep_grid_shape)
 struct gauss final : nvbench::user_axis_space
 {
 
-  gauss(std::vector<std::size_t> input_indices,
-        std::vector<std::size_t> output_indices)
-      : nvbench::user_axis_space(std::move(input_indices),
-                                 std::move(output_indices))
+  gauss(std::vector<std::size_t> input_indices)
+      : nvbench::user_axis_space(std::move(input_indices))
   {}
 
   nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
@@ -193,12 +188,11 @@ struct gauss final : nvbench::user_axis_space
     }
 
     // our update function
-    std::vector<std::size_t> locs = m_output_indices;
-    auto gauss_func               = [=](std::size_t index,
+    auto gauss_func = [=](std::size_t index,
                           std::vector<nvbench::detail::axis_index> &indices) {
       nvbench::detail::axis_index temp = info[0];
       temp.index                       = gauss_indices[index];
-      indices[locs[0]]                 = temp;
+      indices.push_back(std::move(temp));
     };
 
     return nvbench::detail::axis_space_iterator(1,
diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index 103ab9a6..83ae4cd2 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -160,7 +160,7 @@ axes_metadata::axes_metadata(nvbench::type_list<TypeAxes...>)
       const std::size_t type_axis_index = axes.size();
 
       spaces.push_back(
-        std::make_unique<linear_axis_space>(type_axis_index, type_axis_index));
+        std::make_unique<linear_axis_space>(type_axis_index));
 
       // Note:
       // The word "type" appears 6 times in the next line.
diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx
index e0a0312e..a6dea060 100644
--- a/nvbench/axes_metadata.cxx
+++ b/nvbench/axes_metadata.cxx
@@ -137,8 +137,7 @@ void axes_metadata::add_string_axis(std::string name,
 void axes_metadata::add_axis(const axis_base &axis)
 {
   m_value_space.push_back(
-    std::make_unique<linear_axis_space>(m_axes.size(),
-                                        m_axes.size() - m_type_axe_count));
+    std::make_unique<linear_axis_space>(m_axes.size()));
   m_axes.push_back(axis.clone());
 }
 
@@ -152,11 +151,7 @@ void axes_metadata::add_zip_space(std::size_t first_index, std::size_t count)
 
   // compute the numeric indice for each name we have
   std::vector<std::size_t> input_indices(count);
-  std::vector<std::size_t> output_indices(count);
   std::iota(input_indices.begin(), input_indices.end(), first_index);
-  std::iota(input_indices.begin(),
-            input_indices.end(),
-            first_index - m_type_axe_count);
 
   const auto expected_size = m_axes[input_indices[0]]->get_size();
   for (auto i : input_indices)
@@ -174,8 +169,7 @@ void axes_metadata::add_zip_space(std::size_t first_index, std::size_t count)
   }
 
   // add the new tied iteration space
-  auto tied = std::make_unique<zip_axis_space>(std::move(input_indices),
-                                               std::move(output_indices));
+  auto tied = std::make_unique<zip_axis_space>(std::move(input_indices));
   m_value_space.push_back(std::move(tied));
 }
 
@@ -186,11 +180,7 @@ void axes_metadata::add_user_iteration_space(
 {
   // compute the numeric indice for each name we have
   std::vector<std::size_t> input_indices(count);
-  std::vector<std::size_t> output_indices(count);
   std::iota(input_indices.begin(), input_indices.end(), first_index);
-  std::iota(input_indices.begin(),
-            input_indices.end(),
-            first_index - m_type_axe_count);
 
   for (auto i : input_indices)
   {
@@ -201,7 +191,7 @@ void axes_metadata::add_user_iteration_space(
                      m_axes[i]->get_name());
   }
 
-  auto user_func = make(std::move(input_indices), std::move(output_indices));
+  auto user_func = make(std::move(input_indices));
   m_value_space.push_back(std::move(user_func));
 }
 
diff --git a/nvbench/detail/state_generator.cuh b/nvbench/detail/state_generator.cuh
index e1e14117..94996765 100644
--- a/nvbench/detail/state_generator.cuh
+++ b/nvbench/detail/state_generator.cuh
@@ -83,7 +83,7 @@ struct state_iterator
   void next();
 
   std::vector<axis_space_iterator> m_space;
-  std::size_t m_axes_count        = 0;
+  std::size_t  m_axes_count        = 0;
   std::size_t m_current_space     = 0;
   std::size_t m_current_iteration = 0;
   std::size_t m_max_iteration     = 1;
diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx
index fab3fe43..8a43b4e0 100644
--- a/nvbench/detail/state_generator.cxx
+++ b/nvbench/detail/state_generator.cxx
@@ -34,7 +34,8 @@ namespace nvbench::detail
 {
 // state_iterator ==============================================================
 
-void state_iterator::add_iteration_space(const nvbench::detail::axis_space_iterator &iter)
+void state_iterator::add_iteration_space(
+  const nvbench::detail::axis_space_iterator &iter)
 {
   m_axes_count += iter.m_number_of_axes;
   m_max_iteration *= iter.m_iteration_size;
@@ -55,11 +56,13 @@ void state_iterator::init()
 
 [[nodiscard]] std::vector<axis_index> state_iterator::get_current_indices() const
 {
-  std::vector<axis_index> indices(m_axes_count);
+  std::vector<axis_index> indices;
+  indices.reserve(m_axes_count);
   for (auto &m : m_space)
   {
     m.update_indices(indices);
   }
+  // verify length
   return indices;
 }
 
@@ -138,6 +141,7 @@ void state_generator::build_axis_configs()
                         axis.get_input_string(axis_info.index));
     }
   }
+
   for (vi.init(); vi.iter_valid(); vi.next())
   {
     auto &config = m_non_type_axis_configs.emplace_back();
diff --git a/nvbench/iteration_space_base.cuh b/nvbench/iteration_space_base.cuh
index 607b98be..130ae301 100644
--- a/nvbench/iteration_space_base.cuh
+++ b/nvbench/iteration_space_base.cuh
@@ -67,8 +67,7 @@ struct iteration_space_base
    * @param[input_indices] recorded indices of each axi from the axes metadata value space
    * @param[output_indices] requested indices of each axi for output when iterating the type+value space
    */
-  iteration_space_base(std::vector<std::size_t> input_indices,
-                       std::vector<std::size_t> output_indices);
+  iteration_space_base(std::vector<std::size_t> input_indices);
   virtual ~iteration_space_base();
 
   [[nodiscard]] std::unique_ptr<iteration_space_base> clone() const;
@@ -102,7 +101,6 @@ struct iteration_space_base
 
 protected:
   std::vector<std::size_t> m_input_indices;
-  std::vector<std::size_t> m_output_indices;
 
   virtual std::unique_ptr<iteration_space_base> do_clone() const            = 0;
   virtual detail::axis_space_iterator do_get_iterator(axes_info info) const = 0;
diff --git a/nvbench/iteration_space_base.cxx b/nvbench/iteration_space_base.cxx
index 0ecd6e3d..262cdc6b 100644
--- a/nvbench/iteration_space_base.cxx
+++ b/nvbench/iteration_space_base.cxx
@@ -23,10 +23,8 @@
 namespace nvbench
 {
 
-iteration_space_base::iteration_space_base(std::vector<std::size_t> input_indices,
-                                 std::vector<std::size_t> output_indices)
+iteration_space_base::iteration_space_base(std::vector<std::size_t> input_indices)
     : m_input_indices(std::move(input_indices))
-    , m_output_indices(std::move(output_indices))
 {}
 
 iteration_space_base::~iteration_space_base() = default;
diff --git a/nvbench/linear_axis_space.cuh b/nvbench/linear_axis_space.cuh
index d3593d53..78755936 100644
--- a/nvbench/linear_axis_space.cuh
+++ b/nvbench/linear_axis_space.cuh
@@ -31,7 +31,7 @@ namespace nvbench
  */
 struct linear_axis_space final : iteration_space_base
 {
-  linear_axis_space(std::size_t in, std::size_t out);
+  linear_axis_space(std::size_t in);
   ~linear_axis_space();
 
   std::unique_ptr<iteration_space_base> do_clone() const override;
diff --git a/nvbench/linear_axis_space.cxx b/nvbench/linear_axis_space.cxx
index 38218c97..0a6c17e0 100644
--- a/nvbench/linear_axis_space.cxx
+++ b/nvbench/linear_axis_space.cxx
@@ -23,20 +23,18 @@
 namespace nvbench
 {
 
-linear_axis_space::linear_axis_space(std::size_t in_index,
-                                     std::size_t out_index)
-    : iteration_space_base({in_index}, {out_index})
+linear_axis_space::linear_axis_space(std::size_t in_index)
+    : iteration_space_base({in_index})
 {}
 
 linear_axis_space::~linear_axis_space() = default;
 
 detail::axis_space_iterator linear_axis_space::do_get_iterator(axes_info info) const
 {
-  std::size_t loc{m_output_indices[0]};
   auto update_func = [=](std::size_t inc_index,
                          std::vector<detail::axis_index> &indices) {
-    indices[loc]       = info[0];
-    indices[loc].index = inc_index;
+    indices.push_back(info[0]);
+    indices.back().index = inc_index;
   };
 
   return detail::axis_space_iterator(1, info[0].size, update_func);
diff --git a/nvbench/user_axis_space.cuh b/nvbench/user_axis_space.cuh
index 76662464..cc64d0cb 100644
--- a/nvbench/user_axis_space.cuh
+++ b/nvbench/user_axis_space.cuh
@@ -35,47 +35,43 @@ namespace nvbench
  *
  * struct every_third final : nvbench::user_axis_space
  * {
- *   every_third(std::vector<std::size_t> input_indices,
- *               std::vector<std::size_t> output_indices)
- *       : nvbench::user_axis_space(std::move(input_indices),
- *                                  std::move(output_indices))
+ *   every_third(std::vector<std::size_t> input_indices)
+ *       : nvbench::user_axis_space(std::move(input_indices))
  *   {}
  *
  *   nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
  *   {
  *     // our increment function
- *     auto adv_func = [&, info](std::size_t &inc_index, std::size_t len) -> bool {
- *       inc_index += 3;
- *       return inc_index >= len;
+ *     auto adv_func = [&, info](std::size_t &inc_index, std::size_t len) ->
+ * bool { inc_index += 3; return inc_index >= len;
  *     };
  *
  *     // our update function
- *     std::vector<std::size_t> locs = m_output_indices;
  *     auto update_func              = [=](std::size_t inc_index,
- *                            std::vector<detail::axis_index> &indices) {
+ *                            std::vector<axis_index> &indices) {
  *       for (std::size_t i = 0; i < info.size(); ++i)
  *       {
  *         detail::axis_index temp = info[i];
  *         temp.index              = inc_index;
- *         indices[locs[i]]        = temp;
+ *         indices.push_back(std::move(temp));
  *       }
  *     };
- *    return detail::axis_space_iterator(locs.size(), (info[0].size/3), adv_func, update_func);
+ *    return detail::axis_space_iterator(locs.size(), (info[0].size/3),
+ * adv_func, update_func);
  *   }
  *
- *   std::size_t do_get_size(const axes_info &info) const { return (info[0].size/3); }
+ *   std::size_t do_get_size(const axes_info &info) const { return
+ * (info[0].size/3); }
  *   ...
  *
  */
 struct user_axis_space : iteration_space_base
 {
-  user_axis_space(std::vector<std::size_t> input_indices,
-                  std::vector<std::size_t> output_indices);
+  user_axis_space(std::vector<std::size_t> input_indices);
   ~user_axis_space();
 };
 
 using make_user_space_signature =
-  std::unique_ptr<iteration_space_base>(std::vector<std::size_t> input_indices,
-                                   std::vector<std::size_t> output_indices);
+  std::unique_ptr<iteration_space_base>(std::vector<std::size_t> input_indices);
 
 } // namespace nvbench
diff --git a/nvbench/user_axis_space.cxx b/nvbench/user_axis_space.cxx
index 31769070..c191ac45 100644
--- a/nvbench/user_axis_space.cxx
+++ b/nvbench/user_axis_space.cxx
@@ -23,9 +23,8 @@
 namespace nvbench
 {
 
-user_axis_space::user_axis_space(std::vector<std::size_t> input_indices,
-                                 std::vector<std::size_t> output_indices)
-    : iteration_space_base(std::move(input_indices), std::move(output_indices))
+user_axis_space::user_axis_space(std::vector<std::size_t> input_indices)
+    : iteration_space_base(std::move(input_indices))
 {}
 user_axis_space::~user_axis_space() = default;
 
diff --git a/nvbench/zip_axis_space.cuh b/nvbench/zip_axis_space.cuh
index 9f041428..21f5681a 100644
--- a/nvbench/zip_axis_space.cuh
+++ b/nvbench/zip_axis_space.cuh
@@ -37,8 +37,7 @@ namespace nvbench
  */
 struct zip_axis_space final : iteration_space_base
 {
-  zip_axis_space(std::vector<std::size_t> input_indices,
-      std::vector<std::size_t> output_indices);
+  zip_axis_space(std::vector<std::size_t> input_indices);
   ~zip_axis_space();
 
   std::unique_ptr<iteration_space_base> do_clone() const override;
diff --git a/nvbench/zip_axis_space.cxx b/nvbench/zip_axis_space.cxx
index 4fe64cde..8e717509 100644
--- a/nvbench/zip_axis_space.cxx
+++ b/nvbench/zip_axis_space.cxx
@@ -23,27 +23,25 @@
 namespace nvbench
 {
 
-zip_axis_space::zip_axis_space(std::vector<std::size_t> input_indices,
-                               std::vector<std::size_t> output_indices)
-    : iteration_space_base(std::move(input_indices), std::move(output_indices))
+zip_axis_space::zip_axis_space(std::vector<std::size_t> input_indices)
+    : iteration_space_base(std::move(input_indices))
 {}
 
 zip_axis_space::~zip_axis_space() = default;
 
 detail::axis_space_iterator zip_axis_space::do_get_iterator(axes_info info) const
 {
-  std::vector<std::size_t> locs = m_output_indices;
-  auto update_func              = [=](std::size_t inc_index,
+  auto update_func = [=](std::size_t inc_index,
                          std::vector<detail::axis_index> &indices) {
     for (std::size_t i = 0; i < info.size(); ++i)
     {
       detail::axis_index temp = info[i];
       temp.index              = inc_index;
-      indices[locs[i]]        = temp;
+      indices.push_back(std::move(temp));
     }
   };
 
-  return detail::axis_space_iterator(locs.size(), info[0].size, update_func);
+  return detail::axis_space_iterator(info.size(), info[0].size, update_func);
 }
 
 std::size_t zip_axis_space::do_get_size(const axes_info &info) const
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index 73e1c23f..57f3115e 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -181,10 +181,8 @@ void test_zip_clone()
 
 struct under_diag final : nvbench::user_axis_space
 {
-  under_diag(std::vector<std::size_t> input_indices,
-             std::vector<std::size_t> output_indices)
-      : nvbench::user_axis_space(std::move(input_indices),
-                                 std::move(output_indices))
+  under_diag(std::vector<std::size_t> input_indices)
+      : nvbench::user_axis_space(std::move(input_indices))
   {}
 
   mutable std::size_t x_pos   = 0;
@@ -208,17 +206,16 @@ struct under_diag final : nvbench::user_axis_space
     };
 
     // our update function
-    std::vector<std::size_t> locs = m_output_indices;
     auto diag_under =
-      [&, locs, info](std::size_t,
-                      std::vector<nvbench::detail::axis_index> &indices) {
+      [&, info](std::size_t,
+                std::vector<nvbench::detail::axis_index> &indices) {
         nvbench::detail::axis_index temp = info[0];
         temp.index                       = x_pos;
-        indices[locs[0]]                 = temp;
+        indices.push_back(std::move(temp));
 
-        temp             = info[1];
-        temp.index       = y_pos;
-        indices[locs[1]] = temp;
+        temp       = info[1];
+        temp.index = y_pos;
+        indices.push_back(std::move(temp));
       };
 
     const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
diff --git a/testing/state_generator.cu b/testing/state_generator.cu
index 26dc0e38..8ba6c5ae 100644
--- a/testing/state_generator.cu
+++ b/testing/state_generator.cu
@@ -62,7 +62,7 @@ void test_single_state()
   std::vector<std::unique_ptr<nvbench::axis_base>> axes;
   axes.push_back(std::make_unique<nvbench::string_axis>(si));
 
-  sg.add_iteration_space(nvbench::linear_axis_space{0, 0}.get_iterator(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{0}.get_iterator(axes));
   ASSERT(sg.get_number_of_states() == 1);
   sg.init();
   ASSERT(sg.iter_valid());
@@ -96,10 +96,10 @@ void test_basic()
   axes.emplace_back(std::make_unique<nvbench::string_axis>(si3));
   axes.emplace_back(std::make_unique<nvbench::string_axis>(si4));
 
-  sg.add_iteration_space(nvbench::linear_axis_space{0, 0}.get_iterator(axes));
-  sg.add_iteration_space(nvbench::linear_axis_space{1, 1}.get_iterator(axes));
-  sg.add_iteration_space(nvbench::linear_axis_space{2, 2}.get_iterator(axes));
-  sg.add_iteration_space(nvbench::linear_axis_space{3, 3}.get_iterator(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{0}.get_iterator(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{1}.get_iterator(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{2}.get_iterator(axes));
+  sg.add_iteration_space(nvbench::linear_axis_space{3}.get_iterator(axes));
 
   ASSERT_MSG(sg.get_number_of_states() == (2 * 3 * 3 * 2),
              "Actual: {} Expected: {}",

From 910b5cc7598f13f1900df952b977e5adbaab7a75 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Thu, 1 Sep 2022 16:18:08 -0400
Subject: [PATCH 30/41] Simplified user iterators

---
 examples/custom_iteration_spaces.cu | 28 ++++++++++++----------------
 nvbench/detail/axes_iterator.cuh    | 19 ++++++++++++-------
 nvbench/detail/state_generator.cxx  |  2 +-
 nvbench/linear_axis_space.cxx       |  8 ++++----
 nvbench/user_axis_space.cuh         | 23 +++++++++++------------
 nvbench/zip_axis_space.cxx          | 11 +++++------
 testing/axes_iteration_space.cu     | 14 +++++---------
 7 files changed, 50 insertions(+), 55 deletions(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index d89ee9a8..1b0ae9e4 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -116,18 +116,14 @@ struct under_diag final : nvbench::user_axis_space
     // our update function
     auto diag_under =
       [&, info](std::size_t,
-                std::vector<nvbench::detail::axis_index> &indices) {
-        nvbench::detail::axis_index temp = info[0];
-        temp.index                       = x_pos;
-        indices.push_back(std::move(temp));
-
-        temp       = info[1];
-        temp.index = y_pos;
-        indices.push_back(std::move(temp));
+                std::vector<nvbench::detail::axis_index>::iterator start,
+                std::vector<nvbench::detail::axis_index>::iterator end) {
+        start->index = x_pos;
+        end->index   = y_pos;
       };
 
     const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
-    return nvbench::detail::axis_space_iterator(2,
+    return nvbench::detail::axis_space_iterator(info,
                                                 iteration_length,
                                                 adv_func,
                                                 diag_under);
@@ -188,14 +184,14 @@ struct gauss final : nvbench::user_axis_space
     }
 
     // our update function
-    auto gauss_func = [=](std::size_t index,
-                          std::vector<nvbench::detail::axis_index> &indices) {
-      nvbench::detail::axis_index temp = info[0];
-      temp.index                       = gauss_indices[index];
-      indices.push_back(std::move(temp));
-    };
+    auto gauss_func =
+      [=](std::size_t index,
+          std::vector<nvbench::detail::axis_index>::iterator start,
+          std::vector<nvbench::detail::axis_index>::iterator) {
+        start->index = gauss_indices[index];
+      };
 
-    return nvbench::detail::axis_space_iterator(1,
+    return nvbench::detail::axis_space_iterator(info,
                                                 iteration_length,
                                                 gauss_func);
   }
diff --git a/nvbench/detail/axes_iterator.cuh b/nvbench/detail/axes_iterator.cuh
index 5259bfbd..2275daa3 100644
--- a/nvbench/detail/axes_iterator.cuh
+++ b/nvbench/detail/axes_iterator.cuh
@@ -56,26 +56,28 @@ struct axis_index
 
 struct axis_space_iterator
 {
+  using axes_info        = std::vector<detail::axis_index>;
   using AdvanceSignature = bool(std::size_t &current_index, std::size_t length);
   using UpdateSignature  = void(std::size_t index,
-                               std::vector<axis_index> &indices);
+                               axes_info::iterator start,
+                               axes_info::iterator end);
 
   axis_space_iterator(
-    std::size_t axes_count,
+    std::vector<detail::axis_index> info,
     std::size_t iter_count,
     std::function<axis_space_iterator::AdvanceSignature> &&advance,
     std::function<axis_space_iterator::UpdateSignature> &&update)
-      : m_number_of_axes(axes_count)
+      : m_info(info)
       , m_iteration_size(iter_count)
       , m_advance(std::move(advance))
       , m_update(std::move(update))
   {}
 
   axis_space_iterator(
-    std::size_t axes_count,
+    std::vector<detail::axis_index> info,
     std::size_t iter_count,
     std::function<axis_space_iterator::UpdateSignature> &&update)
-      : m_number_of_axes(axes_count)
+      : m_info(info)
       , m_iteration_size(iter_count)
       , m_update(std::move(update))
   {}
@@ -87,10 +89,13 @@ struct axis_space_iterator
 
   void update_indices(std::vector<axis_index> &indices) const
   {
-    this->m_update(m_current_index, indices);
+    indices.insert(indices.end(), m_info.begin(), m_info.end());
+    axes_info::iterator end   = indices.end();
+    axes_info::iterator start = end - m_info.size();
+    this->m_update(m_current_index, start, end);
   }
 
-  std::size_t m_number_of_axes              = 1;
+  axes_info m_info;
   std::size_t m_iteration_size              = 1;
   std::function<AdvanceSignature> m_advance = [](std::size_t &current_index,
                                                  std::size_t length) {
diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx
index 8a43b4e0..d1d4bdae 100644
--- a/nvbench/detail/state_generator.cxx
+++ b/nvbench/detail/state_generator.cxx
@@ -37,7 +37,7 @@ namespace nvbench::detail
 void state_iterator::add_iteration_space(
   const nvbench::detail::axis_space_iterator &iter)
 {
-  m_axes_count += iter.m_number_of_axes;
+  m_axes_count += iter.m_info.size();
   m_max_iteration *= iter.m_iteration_size;
 
   m_space.push_back(std::move(iter));
diff --git a/nvbench/linear_axis_space.cxx b/nvbench/linear_axis_space.cxx
index 0a6c17e0..23ccc86e 100644
--- a/nvbench/linear_axis_space.cxx
+++ b/nvbench/linear_axis_space.cxx
@@ -32,12 +32,12 @@ linear_axis_space::~linear_axis_space() = default;
 detail::axis_space_iterator linear_axis_space::do_get_iterator(axes_info info) const
 {
   auto update_func = [=](std::size_t inc_index,
-                         std::vector<detail::axis_index> &indices) {
-    indices.push_back(info[0]);
-    indices.back().index = inc_index;
+                         axes_info::iterator start,
+                         axes_info::iterator) {
+    start->index = inc_index;
   };
 
-  return detail::axis_space_iterator(1, info[0].size, update_func);
+  return detail::axis_space_iterator(info, info[0].size, update_func);
 }
 
 std::size_t linear_axis_space::do_get_size(const axes_info &info) const
diff --git a/nvbench/user_axis_space.cuh b/nvbench/user_axis_space.cuh
index cc64d0cb..2737cefe 100644
--- a/nvbench/user_axis_space.cuh
+++ b/nvbench/user_axis_space.cuh
@@ -42,22 +42,21 @@ namespace nvbench
  *   nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
  *   {
  *     // our increment function
- *     auto adv_func = [&, info](std::size_t &inc_index, std::size_t len) ->
- * bool { inc_index += 3; return inc_index >= len;
+ *     auto adv_func = [&, info](std::size_t &inc_index,
+ *                               std::size_t len) -> bool {
+ *       inc_index += 3; return inc_index >= len;
  *     };
  *
  *     // our update function
- *     auto update_func              = [=](std::size_t inc_index,
- *                            std::vector<axis_index> &indices) {
- *       for (std::size_t i = 0; i < info.size(); ++i)
- *       {
- *         detail::axis_index temp = info[i];
- *         temp.index              = inc_index;
- *         indices.push_back(std::move(temp));
- *       }
+ *     auto update_func = [=](std::size_t inc_index,
+ *                            axes_info::iterator start,
+ *                            axes_info::iterator end) {
+ *           for (; start != end; ++start) {
+ *              start->index = inc_index;
+ *           }
  *     };
- *    return detail::axis_space_iterator(locs.size(), (info[0].size/3),
- * adv_func, update_func);
+ *    return detail::axis_space_iterator(info, (info[0].size/3),
+ *                                       adv_func, update_func);
  *   }
  *
  *   std::size_t do_get_size(const axes_info &info) const { return
diff --git a/nvbench/zip_axis_space.cxx b/nvbench/zip_axis_space.cxx
index 8e717509..3e687f7c 100644
--- a/nvbench/zip_axis_space.cxx
+++ b/nvbench/zip_axis_space.cxx
@@ -32,16 +32,15 @@ zip_axis_space::~zip_axis_space() = default;
 detail::axis_space_iterator zip_axis_space::do_get_iterator(axes_info info) const
 {
   auto update_func = [=](std::size_t inc_index,
-                         std::vector<detail::axis_index> &indices) {
-    for (std::size_t i = 0; i < info.size(); ++i)
+                         axes_info::iterator start,
+                         axes_info::iterator end) {
+    for (; start != end; ++start)
     {
-      detail::axis_index temp = info[i];
-      temp.index              = inc_index;
-      indices.push_back(std::move(temp));
+      start->index = inc_index;
     }
   };
 
-  return detail::axis_space_iterator(info.size(), info[0].size, update_func);
+  return detail::axis_space_iterator(info, info[0].size, update_func);
 }
 
 std::size_t zip_axis_space::do_get_size(const axes_info &info) const
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index 57f3115e..76c9946d 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -208,18 +208,14 @@ struct under_diag final : nvbench::user_axis_space
     // our update function
     auto diag_under =
       [&, info](std::size_t,
-                std::vector<nvbench::detail::axis_index> &indices) {
-        nvbench::detail::axis_index temp = info[0];
-        temp.index                       = x_pos;
-        indices.push_back(std::move(temp));
-
-        temp       = info[1];
-        temp.index = y_pos;
-        indices.push_back(std::move(temp));
+                std::vector<nvbench::detail::axis_index>::iterator start,
+                std::vector<nvbench::detail::axis_index>::iterator end) {
+        start->index = x_pos;
+        end->index   = y_pos;
       };
 
     const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
-    return nvbench::detail::axis_space_iterator(2,
+    return nvbench::detail::axis_space_iterator(info,
                                                 iteration_length,
                                                 adv_func,
                                                 diag_under);

From 62cbdc5ff931c5f238cc281da23f83c5a771f084 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Thu, 1 May 2025 16:37:53 +0000
Subject: [PATCH 31/41] Reduce runtime of gaussian iteration example.

---
 examples/custom_iteration_spaces.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index 3aef6830..0a1e15ea 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -225,9 +225,9 @@ NVBENCH_BENCH(dual_float64_axis)
     [](auto... args) -> std::unique_ptr<nvbench::iteration_space_base> {
       return std::make_unique<gauss>(args...);
     },
-    nvbench::float64_axis("Duration_A", nvbench::range(0., 1e-4, 1e-5)))
+    nvbench::float64_axis("Duration_A", nvbench::range(0., 1e-4, 5e-5)))
   .add_user_iteration_axes(
     [](auto... args) -> std::unique_ptr<nvbench::iteration_space_base> {
       return std::make_unique<gauss>(args...);
     },
-    nvbench::float64_axis("Duration_B", nvbench::range(0., 1e-4, 1e-5)));
+    nvbench::float64_axis("Duration_B", nvbench::range(0., 1e-4, 5e-5)));

From edefcd0f6a71b2df0d5111cb1e127d3ee11af45d Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Thu, 1 May 2025 16:38:31 +0000
Subject: [PATCH 32/41] Update fmt usage for new version.

---
 testing/axes_iteration_space.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index 76c9946d..7c13bc46 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -44,13 +44,13 @@ std::vector<T> sort(std::vector<T> &&vec)
 void no_op_generator(nvbench::state &state)
 {
   fmt::memory_buffer params;
-  fmt::format_to(params, "Params:");
+  fmt::format_to(std::back_inserter(params), "Params:");
   const auto &axis_values = state.get_axis_values();
   for (const auto &name : sort(axis_values.get_names()))
   {
     std::visit(
       [&params, &name](const auto &value) {
-        fmt::format_to(params, " {}: {}", name, value);
+        fmt::format_to(std::back_inserter(params), " {}: {}", name, value);
       },
       axis_values.get_value(name));
   }

From 250d755bd6ca60ea59af473fbac6b0e20030d855 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Thu, 1 May 2025 16:40:09 +0000
Subject: [PATCH 33/41] Update new test to support device-init changes.

---
 testing/axes_iteration_space.cu | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index 7c13bc46..710434ac 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -17,23 +17,24 @@
  */
 
 #include <nvbench/benchmark.cuh>
-
 #include <nvbench/callable.cuh>
+#include <nvbench/device_manager.cuh>
 #include <nvbench/named_values.cuh>
 #include <nvbench/state.cuh>
 #include <nvbench/type_list.cuh>
 #include <nvbench/type_strings.cuh>
 #include <nvbench/types.cuh>
 
-#include "test_asserts.cuh"
-
 #include <fmt/format.h>
 
 #include <algorithm>
+#include <iterator>
 #include <utility>
 #include <variant>
 #include <vector>
 
+#include "test_asserts.cuh"
+
 template <typename T>
 std::vector<T> sort(std::vector<T> &&vec)
 {
@@ -114,12 +115,18 @@ void test_zip_axes()
 {
   using benchmark_type = nvbench::benchmark<no_op_callable>;
   benchmark_type bench;
+  bench.set_devices(nvbench::device_manager::get().get_devices());
   bench.add_zip_axes(nvbench::float64_axis("F64 Axis", {0., .1, .25, .5, 1.}),
                      nvbench::int64_axis("I64 Axis", {1, 3, 2, 4, 5}));
 
-  ASSERT_MSG(bench.get_config_count() == 5 * bench.get_devices().size(),
-             "Got {}",
-             bench.get_config_count());
+  const auto num_devices = std::max(std::size_t(1), bench.get_devices().size());
+  ASSERT_MSG(bench.get_config_count() == 5 * num_devices,
+             "Got {}, expected {}",
+             bench.get_config_count(),
+             5 * bench.get_devices().size());
+
+  bench.set_devices(std::vector<int>{});
+  ASSERT_MSG(bench.get_config_count() == 5, "Got {}, expected {}", bench.get_config_count(), 5);
 }
 
 void test_zip_unequal_length()
@@ -241,6 +248,7 @@ void test_user_axes()
 {
   using benchmark_type = rezippable_benchmark<no_op_callable>;
   benchmark_type bench;
+  bench.set_devices(nvbench::device_manager::get().get_devices());
   bench.add_user_iteration_axes(
     [](auto... args) -> std::unique_ptr<nvbench::iteration_space_base> {
       return std::make_unique<under_diag>(args...);
@@ -248,9 +256,11 @@ void test_user_axes()
     nvbench::float64_axis("F64 Axis", {0., .1, .25, .5, 1.}),
     nvbench::int64_axis("I64 Axis", {1, 3, 2, 4, 5}));
 
-  ASSERT_MSG(bench.get_config_count() == 15 * bench.get_devices().size(),
-             "Got {}",
-             bench.get_config_count());
+  const auto num_devices = std::max(std::size_t(1), bench.get_devices().size());
+  ASSERT_MSG(bench.get_config_count() == 15 * num_devices, "Got {}", bench.get_config_count());
+
+  bench.set_devices(std::vector<int>{});
+  ASSERT_MSG(bench.get_config_count() == 15, "Got {}", bench.get_config_count());
 }
 
 int main()

From 4defa02f5157110d98e5e6897ffc71ce13322c01 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Thu, 1 May 2025 16:53:52 +0000
Subject: [PATCH 34/41] Precommit reformatting.

---
 examples/custom_iteration_spaces.cu | 71 +++++++++++------------------
 nvbench/axes_metadata.cuh           | 23 ++++------
 nvbench/axes_metadata.cxx           | 21 ++++-----
 nvbench/benchmark_base.cuh          |  8 ++--
 nvbench/benchmark_base.cxx          | 14 +++---
 nvbench/detail/axes_iterator.cuh    | 37 ++++++---------
 nvbench/detail/state_generator.cuh  |  2 +-
 nvbench/detail/state_generator.cxx  | 49 ++++++++------------
 nvbench/int64_axis.cxx              | 12 ++---
 nvbench/iteration_space_base.cuh    | 25 +++++-----
 nvbench/linear_axis_space.cuh       |  4 +-
 nvbench/linear_axis_space.cxx       |  9 +---
 nvbench/user_axis_space.cuh         |  2 +-
 nvbench/zip_axis_space.cuh          |  4 +-
 nvbench/zip_axis_space.cxx          | 20 ++++----
 testing/axes_iteration_space.cu     | 47 +++++++------------
 testing/benchmark.cu                | 10 ++--
 testing/option_parser.cu            |  1 +
 18 files changed, 145 insertions(+), 214 deletions(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index 0a1e15ea..a1d1b099 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -48,17 +48,15 @@ void copy_sweep_grid_shape(nvbench::state &state)
   thrust::device_vector<nvbench::int32_t> in(num_values, 0);
   thrust::device_vector<nvbench::int32_t> out(num_values, 0);
 
-  state.exec(
-    [block_size,
-     num_blocks,
-     num_values,
-     in_ptr  = thrust::raw_pointer_cast(in.data()),
-     out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
-      nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(
-        in_ptr,
-        out_ptr,
-        num_values);
-    });
+  state.exec([block_size,
+              num_blocks,
+              num_values,
+              in_ptr  = thrust::raw_pointer_cast(in.data()),
+              out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
+    nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(in_ptr,
+                                                                             out_ptr,
+                                                                             num_values);
+  });
 }
 
 //==============================================================================
@@ -100,8 +98,7 @@ struct under_diag final : nvbench::user_axis_space
   nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
   {
     // generate our increment function
-    auto adv_func = [&, info](std::size_t &inc_index,
-                              std::size_t /*len*/) -> bool {
+    auto adv_func = [&, info](std::size_t &inc_index, std::size_t /*len*/) -> bool {
       inc_index++;
       x_pos++;
       if (x_pos == info[0].size)
@@ -114,19 +111,15 @@ struct under_diag final : nvbench::user_axis_space
     };
 
     // our update function
-    auto diag_under =
-      [&, info](std::size_t,
-                std::vector<nvbench::detail::axis_index>::iterator start,
-                std::vector<nvbench::detail::axis_index>::iterator end) {
-        start->index = x_pos;
-        end->index   = y_pos;
-      };
+    auto diag_under = [&, info](std::size_t,
+                                std::vector<nvbench::detail::axis_index>::iterator start,
+                                std::vector<nvbench::detail::axis_index>::iterator end) {
+      start->index = x_pos;
+      end->index   = y_pos;
+    };
 
     const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
-    return nvbench::detail::axis_space_iterator(info,
-                                                iteration_length,
-                                                adv_func,
-                                                diag_under);
+    return nvbench::detail::axis_space_iterator(info, iteration_length, adv_func, diag_under);
   }
 
   std::size_t do_get_size(const axes_info &info) const
@@ -184,29 +177,20 @@ struct gauss final : nvbench::user_axis_space
     }
 
     // our update function
-    auto gauss_func =
-      [=](std::size_t index,
-          std::vector<nvbench::detail::axis_index>::iterator start,
-          std::vector<nvbench::detail::axis_index>::iterator) {
-        start->index = gauss_indices[index];
-      };
-
-    return nvbench::detail::axis_space_iterator(info,
-                                                iteration_length,
-                                                gauss_func);
+    auto gauss_func = [=](std::size_t index,
+                          std::vector<nvbench::detail::axis_index>::iterator start,
+                          std::vector<nvbench::detail::axis_index>::iterator) {
+      start->index = gauss_indices[index];
+    };
+
+    return nvbench::detail::axis_space_iterator(info, iteration_length, gauss_func);
   }
 
   std::size_t do_get_size(const axes_info &info) const { return info[0].size; }
 
-  std::size_t do_get_active_count(const axes_info &info) const
-  {
-    return info[0].size;
-  }
+  std::size_t do_get_active_count(const axes_info &info) const { return info[0].size; }
 
-  std::unique_ptr<iteration_space_base> do_clone() const
-  {
-    return std::make_unique<gauss>(*this);
-  }
+  std::unique_ptr<iteration_space_base> do_clone() const { return std::make_unique<gauss>(*this); }
 };
 //==============================================================================
 // Dual parameter sweep:
@@ -216,8 +200,7 @@ void dual_float64_axis(nvbench::state &state)
   const auto duration_B = state.get_float64("Duration_B");
 
   state.exec([duration_A, duration_B](nvbench::launch &launch) {
-    nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(duration_A +
-                                                            duration_B);
+    nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(duration_A + duration_B);
   });
 }
 NVBENCH_BENCH(dual_float64_axis)
diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index f5c0d163..fe38eb7c 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -41,9 +41,8 @@ namespace nvbench
 // Holds dynamic axes information.
 struct axes_metadata
 {
-  using axes_type = std::vector<std::unique_ptr<nvbench::axis_base>>;
-  using iteration_space_type =
-    std::vector<std::unique_ptr<nvbench::iteration_space_base>>;
+  using axes_type            = std::vector<std::unique_ptr<nvbench::axis_base>>;
+  using iteration_space_type = std::vector<std::unique_ptr<nvbench::iteration_space_base>>;
 
   template <typename... TypeAxes>
   explicit axes_metadata(nvbench::type_list<TypeAxes...>);
@@ -78,9 +77,8 @@ struct axes_metadata
   }
 
   template <typename... Args>
-  void add_user_iteration_axes(
-    std::function<nvbench::make_user_space_signature> make,
-    Args &&...args)
+  void add_user_iteration_axes(std::function<nvbench::make_user_space_signature> make,
+                               Args &&...args)
   {
     const std::size_t start = this->m_axes.size();
     const std::size_t count = sizeof...(Args);
@@ -132,10 +130,9 @@ private:
   iteration_space_type m_value_space;
 
   void add_zip_space(std::size_t first_index, std::size_t count);
-  void add_user_iteration_space(
-    std::function<nvbench::make_user_space_signature> make,
-    std::size_t first_index,
-    std::size_t count);
+  void add_user_iteration_space(std::function<nvbench::make_user_space_signature> make,
+                                std::size_t first_index,
+                                std::size_t count);
 };
 
 template <typename... TypeAxes>
@@ -148,14 +145,12 @@ axes_metadata::axes_metadata(nvbench::type_list<TypeAxes...>)
 
   auto names_iter = names.begin(); // contents will be moved from
   nvbench::tl::foreach<type_axes_list>(
-    [&axes = m_axes, &spaces = m_type_space, &names_iter](
-      [[maybe_unused]] auto wrapped_type) {
+    [&axes = m_axes, &spaces = m_type_space, &names_iter]([[maybe_unused]] auto wrapped_type) {
       // This is always called before other axes are added, so the length of the
       // axes vector will be the type axis index:
       const std::size_t type_axis_index = axes.size();
 
-      spaces.push_back(
-        std::make_unique<linear_axis_space>(type_axis_index));
+      spaces.push_back(std::make_unique<linear_axis_space>(type_axis_index));
 
       // Note:
       // The word "type" appears 6 times in the next line.
diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx
index d6fc98b3..0742f6b1 100644
--- a/nvbench/axes_metadata.cxx
+++ b/nvbench/axes_metadata.cxx
@@ -24,12 +24,11 @@
 
 #include <algorithm>
 #include <cassert>
+#include <iostream>
 #include <numeric>
 #include <stdexcept>
 #include <unordered_set>
 
-#include <iostream>
-
 namespace nvbench
 {
 
@@ -129,8 +128,7 @@ void axes_metadata::add_string_axis(std::string name, std::vector<std::string> d
 
 void axes_metadata::add_axis(const axis_base &axis)
 {
-  m_value_space.push_back(
-    std::make_unique<linear_axis_space>(m_axes.size()));
+  m_value_space.push_back(std::make_unique<linear_axis_space>(m_axes.size()));
   m_axes.push_back(axis.clone());
 }
 
@@ -138,7 +136,7 @@ void axes_metadata::add_zip_space(std::size_t first_index, std::size_t count)
 {
   NVBENCH_THROW_IF((count < 2),
                    std::runtime_error,
-                   "At least two axi ( {} provided ) need to be provided "
+                   "At least two axes ( {} provided ) need to be provided "
                    "when using zip_axes.",
                    count);
 
@@ -151,13 +149,13 @@ void axes_metadata::add_zip_space(std::size_t first_index, std::size_t count)
   {
     NVBENCH_THROW_IF((m_axes[i]->get_type() == nvbench::axis_type::type),
                      std::runtime_error,
-                     "Currently no support for tieing type axis ( {} ).",
+                     "Currently no support for zipping type axis ( {} ).",
                      m_axes[i]->get_name());
 
     NVBENCH_THROW_IF((m_axes[i]->get_size() < expected_size),
                      std::runtime_error,
-                     "All axes that are tied together must be atleast as long "
-                     "the first axi provided ( {} ).",
+                     "All axes that are zipped together must be at least as long "
+                     "as the first axis provided ( {} ).",
                      expected_size);
   }
 
@@ -166,10 +164,9 @@ void axes_metadata::add_zip_space(std::size_t first_index, std::size_t count)
   m_value_space.push_back(std::move(tied));
 }
 
-void axes_metadata::add_user_iteration_space(
-  std::function<nvbench::make_user_space_signature> make,
-  std::size_t first_index,
-  std::size_t count)
+void axes_metadata::add_user_iteration_space(std::function<nvbench::make_user_space_signature> make,
+                                             std::size_t first_index,
+                                             std::size_t count)
 {
   // compute the numeric indice for each name we have
   std::vector<std::size_t> input_indices(count);
diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh
index b00ff7ba..dd83715a 100644
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -116,8 +116,8 @@ struct benchmark_base
   /// @param[axes] a set of axis_base to be added to the benchmark
   /// and zipped together
   ///
-  template<typename... Axes>
-  benchmark_base &add_zip_axes(Axes&&... axes)
+  template <typename... Axes>
+  benchmark_base &add_zip_axes(Axes &&...axes)
   {
     m_axes.add_zip_axes(std::forward<Axes>(axes)...);
     return *this;
@@ -135,8 +135,8 @@ struct benchmark_base
   /// which constructs the user iteration space, and the reseet are axis_base to be
   /// added to the benchmark and iterated using the user iteration space
   ///
-  template<typename... ConstructorAndAxes>
-  benchmark_base &add_user_iteration_axes(ConstructorAndAxes&&... args)
+  template <typename... ConstructorAndAxes>
+  benchmark_base &add_user_iteration_axes(ConstructorAndAxes &&...args)
   {
     m_axes.add_user_iteration_axes(std::forward<ConstructorAndAxes>(args)...);
     return *this;
diff --git a/nvbench/benchmark_base.cxx b/nvbench/benchmark_base.cxx
index 2b1d3a05..ba507fd7 100644
--- a/nvbench/benchmark_base.cxx
+++ b/nvbench/benchmark_base.cxx
@@ -75,13 +75,13 @@ benchmark_base &benchmark_base::add_device(int device_id)
 
 std::size_t benchmark_base::get_config_count() const
 {
-  const auto& axes = m_axes.get_axes();
-  const std::size_t value_count = nvbench::detail::transform_reduce(
-    m_axes.get_value_iteration_space().cbegin(),
-    m_axes.get_value_iteration_space().cend(),
-    std::size_t{1},
-    std::multiplies<>{},
-    [&axes](const auto &space) { return space->get_size(axes); });
+  const auto &axes = m_axes.get_axes();
+  const std::size_t value_count =
+    nvbench::detail::transform_reduce(m_axes.get_value_iteration_space().cbegin(),
+                                      m_axes.get_value_iteration_space().cend(),
+                                      std::size_t{1},
+                                      std::multiplies<>{},
+                                      [&axes](const auto &space) { return space->get_size(axes); });
 
   const std::size_t type_count = nvbench::detail::transform_reduce(
     m_axes.get_type_iteration_space().cbegin(),
diff --git a/nvbench/detail/axes_iterator.cuh b/nvbench/detail/axes_iterator.cuh
index 2275daa3..c4444cb1 100644
--- a/nvbench/detail/axes_iterator.cuh
+++ b/nvbench/detail/axes_iterator.cuh
@@ -34,17 +34,16 @@ struct axis_index
 {
   axis_index() = default;
 
-  explicit axis_index(const axis_base *axi)
+  explicit axis_index(const axis_base *axis)
       : index(0)
-      , name(axi->get_name())
-      , type(axi->get_type())
-      , size(axi->get_size())
-      , active_size(axi->get_size())
+      , name(axis->get_name())
+      , type(axis->get_type())
+      , size(axis->get_size())
+      , active_size(axis->get_size())
   {
     if (type == nvbench::axis_type::type)
     {
-      active_size =
-        static_cast<const nvbench::type_axis *>(axi)->get_active_count();
+      active_size = static_cast<const nvbench::type_axis *>(axis)->get_active_count();
     }
   }
   std::size_t index;
@@ -62,30 +61,25 @@ struct axis_space_iterator
                                axes_info::iterator start,
                                axes_info::iterator end);
 
-  axis_space_iterator(
-    std::vector<detail::axis_index> info,
-    std::size_t iter_count,
-    std::function<axis_space_iterator::AdvanceSignature> &&advance,
-    std::function<axis_space_iterator::UpdateSignature> &&update)
+  axis_space_iterator(std::vector<detail::axis_index> info,
+                      std::size_t iter_count,
+                      std::function<axis_space_iterator::AdvanceSignature> &&advance,
+                      std::function<axis_space_iterator::UpdateSignature> &&update)
       : m_info(info)
       , m_iteration_size(iter_count)
       , m_advance(std::move(advance))
       , m_update(std::move(update))
   {}
 
-  axis_space_iterator(
-    std::vector<detail::axis_index> info,
-    std::size_t iter_count,
-    std::function<axis_space_iterator::UpdateSignature> &&update)
+  axis_space_iterator(std::vector<detail::axis_index> info,
+                      std::size_t iter_count,
+                      std::function<axis_space_iterator::UpdateSignature> &&update)
       : m_info(info)
       , m_iteration_size(iter_count)
       , m_update(std::move(update))
   {}
 
-  [[nodiscard]] bool next()
-  {
-    return this->m_advance(m_current_index, m_iteration_size);
-  }
+  [[nodiscard]] bool next() { return this->m_advance(m_current_index, m_iteration_size); }
 
   void update_indices(std::vector<axis_index> &indices) const
   {
@@ -97,8 +91,7 @@ struct axis_space_iterator
 
   axes_info m_info;
   std::size_t m_iteration_size              = 1;
-  std::function<AdvanceSignature> m_advance = [](std::size_t &current_index,
-                                                 std::size_t length) {
+  std::function<AdvanceSignature> m_advance = [](std::size_t &current_index, std::size_t length) {
     (current_index + 1 == length) ? current_index = 0 : current_index++;
     return (current_index == 0); // we rolled over
   };
diff --git a/nvbench/detail/state_generator.cuh b/nvbench/detail/state_generator.cuh
index 94996765..e1e14117 100644
--- a/nvbench/detail/state_generator.cuh
+++ b/nvbench/detail/state_generator.cuh
@@ -83,7 +83,7 @@ struct state_iterator
   void next();
 
   std::vector<axis_space_iterator> m_space;
-  std::size_t  m_axes_count        = 0;
+  std::size_t m_axes_count        = 0;
   std::size_t m_current_space     = 0;
   std::size_t m_current_iteration = 0;
   std::size_t m_max_iteration     = 1;
diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx
index 8acaa358..c1355782 100644
--- a/nvbench/detail/state_generator.cxx
+++ b/nvbench/detail/state_generator.cxx
@@ -32,8 +32,7 @@ namespace nvbench::detail
 {
 // state_iterator ==============================================================
 
-void state_iterator::add_iteration_space(
-  const nvbench::detail::axis_space_iterator &iter)
+void state_iterator::add_iteration_space(const nvbench::detail::axis_space_iterator &iter)
 {
   m_axes_count += iter.m_info.size();
   m_max_iteration *= iter.m_iteration_size;
@@ -106,16 +105,12 @@ void state_generator::build_axis_configs()
   // instantiations.
   {
     const auto &axes_vec = axes.get_axes();
-    std::for_each(type_space.crbegin(),
-                  type_space.crend(),
-                  [&ti, &axes_vec](const auto &space) {
-                    ti.add_iteration_space(space->get_iterator(axes_vec));
-                  });
-    std::for_each(value_space.begin(),
-                  value_space.end(),
-                  [&vi, &axes_vec](const auto &space) {
-                    vi.add_iteration_space(space->get_iterator(axes_vec));
-                  });
+    std::for_each(type_space.crbegin(), type_space.crend(), [&ti, &axes_vec](const auto &space) {
+      ti.add_iteration_space(space->get_iterator(axes_vec));
+    });
+    std::for_each(value_space.begin(), value_space.end(), [&vi, &axes_vec](const auto &space) {
+      vi.add_iteration_space(space->get_iterator(axes_vec));
+    });
   }
 
   m_type_axis_configs.clear();
@@ -126,8 +121,8 @@ void state_generator::build_axis_configs()
 
   for (ti.init(); ti.iter_valid(); ti.next())
   {
-    auto &[config, active_mask] = m_type_axis_configs.emplace_back(
-      std::make_pair(nvbench::named_values{}, true));
+    auto &[config, active_mask] =
+      m_type_axis_configs.emplace_back(std::make_pair(nvbench::named_values{}, true));
 
     for (const auto &axis_info : ti.get_current_indices())
     {
@@ -135,8 +130,7 @@ void state_generator::build_axis_configs()
 
       active_mask &= axis.get_is_active(axis_info.index);
 
-      config.set_string(axis.get_name(),
-                        axis.get_input_string(axis_info.index));
+      config.set_string(axis.get_name(), axis.get_input_string(axis_info.index));
     }
   }
 
@@ -154,30 +148,26 @@ void state_generator::build_axis_configs()
           assert("unreachable." && false);
           break;
         case axis_type::int64:
-          config.set_int64(
-            axis_info.name,
-            axes.get_int64_axis(axis_info.name).get_value(axis_info.index));
+          config.set_int64(axis_info.name,
+                           axes.get_int64_axis(axis_info.name).get_value(axis_info.index));
           break;
 
         case axis_type::float64:
-          config.set_float64(
-            axis_info.name,
-            axes.get_float64_axis(axis_info.name).get_value(axis_info.index));
+          config.set_float64(axis_info.name,
+                             axes.get_float64_axis(axis_info.name).get_value(axis_info.index));
           break;
 
         case axis_type::string:
-          config.set_string(
-            axis_info.name,
-            axes.get_string_axis(axis_info.name).get_value(axis_info.index));
+          config.set_string(axis_info.name,
+                            axes.get_string_axis(axis_info.name).get_value(axis_info.index));
           break;
       } // switch (type)
-    }   // for (axis_info : current_indices)
+    } // for (axis_info : current_indices)
   }
 
   if (m_type_axis_configs.empty())
   {
-    m_type_axis_configs.emplace_back(
-      std::make_pair(nvbench::named_values{}, true));
+    m_type_axis_configs.emplace_back(std::make_pair(nvbench::named_values{}, true));
   }
 }
 
@@ -204,8 +194,7 @@ void state_generator::add_states_for_device(const std::optional<device_info> &de
   const auto num_type_configs = m_type_axis_configs.size();
   for (std::size_t type_config_index = 0; type_config_index < num_type_configs; ++type_config_index)
   {
-    const auto &[type_config,
-                 axis_mask] = m_type_axis_configs[type_config_index];
+    const auto &[type_config, axis_mask] = m_type_axis_configs[type_config_index];
     if (!axis_mask)
     { // Don't generate inner vector if the type config is masked out.
       continue;
diff --git a/nvbench/int64_axis.cxx b/nvbench/int64_axis.cxx
index 090282ba..95cabf1c 100644
--- a/nvbench/int64_axis.cxx
+++ b/nvbench/int64_axis.cxx
@@ -28,14 +28,12 @@
 namespace
 {
 
-std::vector<nvbench::int64_t>
-construct_values(nvbench::int64_axis_flags flags,
-                 const std::vector<nvbench::int64_t> &inputs)
+std::vector<nvbench::int64_t> construct_values(nvbench::int64_axis_flags flags,
+                                               const std::vector<nvbench::int64_t> &inputs)
 {
 
   std::vector<int64_t> values;
-  const bool is_power_of_two =
-    static_cast<bool>(flags & nvbench::int64_axis_flags::power_of_two);
+  const bool is_power_of_two = static_cast<bool>(flags & nvbench::int64_axis_flags::power_of_two);
   if (!is_power_of_two)
   {
     values = inputs;
@@ -65,9 +63,7 @@ construct_values(nvbench::int64_axis_flags flags,
 namespace nvbench
 {
 
-int64_axis::int64_axis(std::string name,
-                       std::vector<int64_t> inputs,
-                       int64_axis_flags flags)
+int64_axis::int64_axis(std::string name, std::vector<int64_t> inputs, int64_axis_flags flags)
     : axis_base{std::move(name), axis_type::int64}
     , m_inputs{std::move(inputs)}
     , m_values{construct_values(flags, m_inputs)}
diff --git a/nvbench/iteration_space_base.cuh b/nvbench/iteration_space_base.cuh
index 130ae301..15420e85 100644
--- a/nvbench/iteration_space_base.cuh
+++ b/nvbench/iteration_space_base.cuh
@@ -24,17 +24,17 @@ namespace nvbench
 {
 
 /*!
- * Base class for all axi and axes iteration spaces.
+ * Base class for all axis iteration spaces.
  *
- * If we consider an axi to be a container of values, iteration_spaces
+ * If we consider an axis to be a container of values, iteration_spaces
  * would be how we can create iterators over that container.
  *
  * With that in mind we get the following mapping:
- * * linear_axis_space is equivalant to a forward iterator.
+ * * linear_axis_space is equivalent to a forward iterator.
  *
- * * zip_axis_space is equivalant to a zip iterator.
+ * * zip_axis_space is equivalent to a zip iterator.
  *
- * * user_axis_space is equivalant to a transform iterator.
+ * * user_axis_space is equivalent to a transform iterator.
  *
  * The `nvbench::axes_metadata` stores all axes in a std::vector. To represent
  * which axes each space is 'over' we store those indices. We don't store
@@ -54,18 +54,18 @@ struct iteration_space_base
   using axes_type = std::vector<std::unique_ptr<nvbench::axis_base>>;
   using axes_info = std::vector<detail::axis_index>;
 
-  using AdvanceSignature =
-    nvbench::detail::axis_space_iterator::AdvanceSignature;
-  using UpdateSignature = nvbench::detail::axis_space_iterator::UpdateSignature;
+  using AdvanceSignature = nvbench::detail::axis_space_iterator::AdvanceSignature;
+  using UpdateSignature  = nvbench::detail::axis_space_iterator::UpdateSignature;
 
   /*!
    * Construct a new derived iteration_space
    *
    * The input_indices and output_indices combine together to allow the iteration space to know
-   * what axi they should query from axes_metadata and where each of those map to in the output
+   * what axes they should query from axes_metadata and where each of those map to in the output
    * iteration space.
-   * @param[input_indices] recorded indices of each axi from the axes metadata value space
-   * @param[output_indices] requested indices of each axi for output when iterating the type+value space
+   * @param[input_indices] recorded indices of each axis from the axes metadata value space
+   * @param[output_indices] requested indices of each axis for output when iterating the type+value
+   * space
    */
   iteration_space_base(std::vector<std::size_t> input_indices);
   virtual ~iteration_space_base();
@@ -78,8 +78,7 @@ struct iteration_space_base
    * @param[axes]
    *
    */
-  [[nodiscard]] detail::axis_space_iterator
-  get_iterator(const axes_type &axes) const;
+  [[nodiscard]] detail::axis_space_iterator get_iterator(const axes_type &axes) const;
 
   /*!
    * Returns the number of active and inactive elements the iterator will have
diff --git a/nvbench/linear_axis_space.cuh b/nvbench/linear_axis_space.cuh
index 78755936..4adf385d 100644
--- a/nvbench/linear_axis_space.cuh
+++ b/nvbench/linear_axis_space.cuh
@@ -24,9 +24,9 @@ namespace nvbench
 {
 
 /*!
- * Provides linear forward iteration over a single axi
+ * Provides linear forward iteration over a single axis.
  *
- * The default for all axi added to a benchmark
+ * The default for all axes added to a benchmark
  *
  */
 struct linear_axis_space final : iteration_space_base
diff --git a/nvbench/linear_axis_space.cxx b/nvbench/linear_axis_space.cxx
index 23ccc86e..90e4ba98 100644
--- a/nvbench/linear_axis_space.cxx
+++ b/nvbench/linear_axis_space.cxx
@@ -31,19 +31,14 @@ linear_axis_space::~linear_axis_space() = default;
 
 detail::axis_space_iterator linear_axis_space::do_get_iterator(axes_info info) const
 {
-  auto update_func = [=](std::size_t inc_index,
-                         axes_info::iterator start,
-                         axes_info::iterator) {
+  auto update_func = [=](std::size_t inc_index, axes_info::iterator start, axes_info::iterator) {
     start->index = inc_index;
   };
 
   return detail::axis_space_iterator(info, info[0].size, update_func);
 }
 
-std::size_t linear_axis_space::do_get_size(const axes_info &info) const
-{
-  return info[0].size;
-}
+std::size_t linear_axis_space::do_get_size(const axes_info &info) const { return info[0].size; }
 
 std::size_t linear_axis_space::do_get_active_count(const axes_info &info) const
 {
diff --git a/nvbench/user_axis_space.cuh b/nvbench/user_axis_space.cuh
index 2737cefe..2152e5bc 100644
--- a/nvbench/user_axis_space.cuh
+++ b/nvbench/user_axis_space.cuh
@@ -26,7 +26,7 @@ namespace nvbench
 /*!
  * Provides user defined iteration over multiple axes
  *
- * Consider two axi with the following values:
+ * Consider two axes with the following values:
  * { 0, 1, 2, 3, 4, 5 }
  * { 0, 1, 2, 3, 4, 5 }
  *
diff --git a/nvbench/zip_axis_space.cuh b/nvbench/zip_axis_space.cuh
index 21f5681a..9017d345 100644
--- a/nvbench/zip_axis_space.cuh
+++ b/nvbench/zip_axis_space.cuh
@@ -26,11 +26,11 @@ namespace nvbench
 /*!
  * Provides linear forward iteration over multiple axes in lockstep
  *
- * Consider two axi with the following values:
+ * Consider two axes with the following values:
  * { 0, 1, 2, 3, 4, 5 }
  * { 0, 1, 2, 3, 4, 5 }
  *
- * Using a zip_axis_space over these two axi will generate 6 values
+ * Using a zip_axis_space over these two axes will generate 6 values
  * ( {0,0}, {1,1}, {2,2}, ... ) instead of the default 36 values
  * ( {0,0}, {0,1}, {0,2}, ...).
  *
diff --git a/nvbench/zip_axis_space.cxx b/nvbench/zip_axis_space.cxx
index 3e687f7c..5b06390b 100644
--- a/nvbench/zip_axis_space.cxx
+++ b/nvbench/zip_axis_space.cxx
@@ -31,22 +31,18 @@ zip_axis_space::~zip_axis_space() = default;
 
 detail::axis_space_iterator zip_axis_space::do_get_iterator(axes_info info) const
 {
-  auto update_func = [=](std::size_t inc_index,
-                         axes_info::iterator start,
-                         axes_info::iterator end) {
-    for (; start != end; ++start)
-    {
-      start->index = inc_index;
-    }
-  };
+  auto update_func =
+    [=](std::size_t inc_index, axes_info::iterator start, axes_info::iterator end) {
+      for (; start != end; ++start)
+      {
+        start->index = inc_index;
+      }
+    };
 
   return detail::axis_space_iterator(info, info[0].size, update_func);
 }
 
-std::size_t zip_axis_space::do_get_size(const axes_info &info) const
-{
-  return info[0].size;
-}
+std::size_t zip_axis_space::do_get_size(const axes_info &info) const { return info[0].size; }
 
 std::size_t zip_axis_space::do_get_active_count(const axes_info &info) const
 {
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index 710434ac..a068480c 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -68,8 +68,7 @@ struct rezippable_benchmark final : public nvbench::benchmark_base
   using type_axes        = TypeAxes;
   using type_configs     = nvbench::tl::cartesian_product<type_axes>;
 
-  static constexpr std::size_t num_type_configs =
-    nvbench::tl::size<type_configs>{};
+  static constexpr std::size_t num_type_configs = nvbench::tl::size<type_configs>{};
 
   rezippable_benchmark()
       : benchmark_base(type_axes{})
@@ -95,21 +94,16 @@ private:
 };
 
 template <typename Integer, typename Float, typename Other>
-void template_no_op_generator(nvbench::state &state,
-                              nvbench::type_list<Integer, Float, Other>)
+void template_no_op_generator(nvbench::state &state, nvbench::type_list<Integer, Float, Other>)
 {
-  ASSERT(nvbench::type_strings<Integer>::input_string() ==
-         state.get_string("Integer"));
-  ASSERT(nvbench::type_strings<Float>::input_string() ==
-         state.get_string("Float"));
-  ASSERT(nvbench::type_strings<Other>::input_string() ==
-         state.get_string("Other"));
+  ASSERT(nvbench::type_strings<Integer>::input_string() == state.get_string("Integer"));
+  ASSERT(nvbench::type_strings<Float>::input_string() == state.get_string("Float"));
+  ASSERT(nvbench::type_strings<Other>::input_string() == state.get_string("Other"));
 
   // Enum params using non-templated version:
   no_op_generator(state);
 }
-NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator,
-                                 template_no_op_callable);
+NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, template_no_op_callable);
 
 void test_zip_axes()
 {
@@ -134,9 +128,8 @@ void test_zip_unequal_length()
   using benchmark_type = nvbench::benchmark<no_op_callable>;
   benchmark_type bench;
 
-  ASSERT_THROWS_ANY(
-    bench.add_zip_axes(nvbench::float64_axis("F64 Axis", {0., .1, .25, .5, 1.}),
-                       nvbench::int64_axis("I64 Axis", {1, 3, 2})));
+  ASSERT_THROWS_ANY(bench.add_zip_axes(nvbench::float64_axis("F64 Axis", {0., .1, .25, .5, 1.}),
+                                       nvbench::int64_axis("I64 Axis", {1, 3, 2})));
 }
 
 void test_zip_clone()
@@ -146,8 +139,7 @@ void test_zip_clone()
   bench.set_devices(std::vector<int>{});
   bench.add_int64_power_of_two_axis("I64 POT Axis", {10, 20});
   bench.add_int64_axis("I64 Axis", {10, 20});
-  bench.add_zip_axes(nvbench::string_axis("Strings",
-                                          {"string a", "string b", "string c"}),
+  bench.add_zip_axes(nvbench::string_axis("Strings", {"string a", "string b", "string c"}),
                      nvbench::float64_axis("F64 Axis", {0., .1, .25}));
 
   const auto expected_count = bench.get_config_count();
@@ -199,8 +191,7 @@ struct under_diag final : nvbench::user_axis_space
   nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
   {
     // generate our increment function
-    auto adv_func = [&, info](std::size_t &inc_index,
-                              std::size_t /*len*/) -> bool {
+    auto adv_func = [&, info](std::size_t &inc_index, std::size_t /*len*/) -> bool {
       inc_index++;
       x_pos++;
       if (x_pos == info[0].size)
@@ -213,19 +204,15 @@ struct under_diag final : nvbench::user_axis_space
     };
 
     // our update function
-    auto diag_under =
-      [&, info](std::size_t,
-                std::vector<nvbench::detail::axis_index>::iterator start,
-                std::vector<nvbench::detail::axis_index>::iterator end) {
-        start->index = x_pos;
-        end->index   = y_pos;
-      };
+    auto diag_under = [&, info](std::size_t,
+                                std::vector<nvbench::detail::axis_index>::iterator start,
+                                std::vector<nvbench::detail::axis_index>::iterator end) {
+      start->index = x_pos;
+      end->index   = y_pos;
+    };
 
     const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
-    return nvbench::detail::axis_space_iterator(info,
-                                                iteration_length,
-                                                adv_func,
-                                                diag_under);
+    return nvbench::detail::axis_space_iterator(info, iteration_length, adv_func, diag_under);
   }
 
   std::size_t do_get_size(const axes_info &info) const
diff --git a/testing/benchmark.cu b/testing/benchmark.cu
index a97b4da1..7d1f5606 100644
--- a/testing/benchmark.cu
+++ b/testing/benchmark.cu
@@ -285,11 +285,11 @@ void test_get_config_count()
   bench.set_type_axes_names({"Integer", "Float", "Other"});
   bench.get_axes().get_type_axis(0).set_active_inputs({"I16", "I32"}); // 2,  2
   bench.get_axes().get_type_axis(1).set_active_inputs({"F32", "F64"}); // 2,  4
-  bench.get_axes().get_type_axis(2).set_active_inputs({"bool"});           // 1,  4
-  bench.add_float64_axis("foo", {0.4, 2.3, 4.3});                     // 3, 12
-  bench.add_int64_axis("bar", {4, 6, 15});                            // 3, 36
-  bench.add_string_axis("baz", {"str", "ing"});                           // 2, 72
-  bench.add_string_axis("fez", {"single"});                                   // 1, 72
+  bench.get_axes().get_type_axis(2).set_active_inputs({"bool"});       // 1,  4
+  bench.add_float64_axis("foo", {0.4, 2.3, 4.3});                      // 3, 12
+  bench.add_int64_axis("bar", {4, 6, 15});                             // 3, 36
+  bench.add_string_axis("baz", {"str", "ing"});                        // 2, 72
+  bench.add_string_axis("fez", {"single"});                            // 1, 72
 
   auto const num_devices = std::max(std::size_t(1), bench.get_devices().size());
 
diff --git a/testing/option_parser.cu b/testing/option_parser.cu
index 1339a405..a6bf9f55 100644
--- a/testing/option_parser.cu
+++ b/testing/option_parser.cu
@@ -23,6 +23,7 @@
 #include <fmt/format.h>
 
 #include <iostream>
+
 #include "test_asserts.cuh"
 
 //==============================================================================

From 4bd5690cd3f5ff6c2a961556beb1ed8c6f4be098 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Thu, 1 May 2025 17:27:41 +0000
Subject: [PATCH 35/41] Fix warnings on clang.

---
 examples/custom_iteration_spaces.cu | 5 +++--
 nvbench/detail/axes_iterator.cuh    | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index a1d1b099..be1869ab 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -33,8 +33,8 @@
 void copy_sweep_grid_shape(nvbench::state &state)
 {
   // Get current parameters:
-  const int block_size = static_cast<int>(state.get_int64("BlockSize"));
-  const int num_blocks = static_cast<int>(state.get_int64("NumBlocks"));
+  const auto block_size = static_cast<unsigned int>(state.get_int64("BlockSize"));
+  const auto num_blocks = static_cast<unsigned int>(state.get_int64("NumBlocks"));
 
   // Number of int32s in 256 MiB:
   const std::size_t num_values = 256 * 1024 * 1024 / sizeof(nvbench::int32_t);
@@ -53,6 +53,7 @@ void copy_sweep_grid_shape(nvbench::state &state)
               num_values,
               in_ptr  = thrust::raw_pointer_cast(in.data()),
               out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
+    (void)num_values; // clang thinks this is unused.
     nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(in_ptr,
                                                                              out_ptr,
                                                                              num_values);
diff --git a/nvbench/detail/axes_iterator.cuh b/nvbench/detail/axes_iterator.cuh
index c4444cb1..9d413ee8 100644
--- a/nvbench/detail/axes_iterator.cuh
+++ b/nvbench/detail/axes_iterator.cuh
@@ -83,9 +83,10 @@ struct axis_space_iterator
 
   void update_indices(std::vector<axis_index> &indices) const
   {
+    using diff_t = typename axes_info::difference_type;
     indices.insert(indices.end(), m_info.begin(), m_info.end());
     axes_info::iterator end   = indices.end();
-    axes_info::iterator start = end - m_info.size();
+    axes_info::iterator start = end - static_cast<diff_t>(m_info.size());
     this->m_update(m_current_index, start, end);
   }
 

From cca9308dd6169ea2ef7d32b0a0055838df4e3f99 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Thu, 1 May 2025 17:33:08 +0000
Subject: [PATCH 36/41] Update docs, whitespace.

---
 nvbench/iteration_space_base.cuh | 4 +---
 nvbench/iteration_space_base.cxx | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/nvbench/iteration_space_base.cuh b/nvbench/iteration_space_base.cuh
index 15420e85..bfb02ef3 100644
--- a/nvbench/iteration_space_base.cuh
+++ b/nvbench/iteration_space_base.cuh
@@ -64,8 +64,6 @@ struct iteration_space_base
    * what axes they should query from axes_metadata and where each of those map to in the output
    * iteration space.
    * @param[input_indices] recorded indices of each axis from the axes metadata value space
-   * @param[output_indices] requested indices of each axis for output when iterating the type+value
-   * space
    */
   iteration_space_base(std::vector<std::size_t> input_indices);
   virtual ~iteration_space_base();
@@ -90,7 +88,7 @@ struct iteration_space_base
   [[nodiscard]] std::size_t get_size(const axes_type &axes) const;
 
   /*!
-   * Returns the number of active elements the iterator will over when
+   * Returns the number of active elements the iterator will have when
    * executed over @a axes
    *
    * Note:
diff --git a/nvbench/iteration_space_base.cxx b/nvbench/iteration_space_base.cxx
index 262cdc6b..fbb9ef3a 100644
--- a/nvbench/iteration_space_base.cxx
+++ b/nvbench/iteration_space_base.cxx
@@ -53,7 +53,6 @@ get_axes_info(const nvbench::iteration_space_base::axes_type &axes,
 
 detail::axis_space_iterator iteration_space_base::get_iterator(const axes_type &axes) const
 {
-
   return this->do_get_iterator(get_axes_info(axes, m_input_indices));
 }
 

From 498c73d4e8797179310995df51389bf91f529382 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Fri, 2 May 2025 17:43:18 +0000
Subject: [PATCH 37/41] Updated some docs.

---
 nvbench/user_axis_space.cuh | 39 ++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/nvbench/user_axis_space.cuh b/nvbench/user_axis_space.cuh
index 2152e5bc..7a151877 100644
--- a/nvbench/user_axis_space.cuh
+++ b/nvbench/user_axis_space.cuh
@@ -24,45 +24,44 @@ namespace nvbench
 {
 
 /*!
- * Provides user defined iteration over multiple axes
+ * Provides user defined iteration over one or more axes
  *
- * Consider two axes with the following values:
- * { 0, 1, 2, 3, 4, 5 }
- * { 0, 1, 2, 3, 4, 5 }
- *
- * If we wanted to provide an axis space that skipped every third value
- * We would implement it like this:
+ * If we wanted to provide an axis space that only returns every third
+ * value in an axis we would implement it like this:
  *
  * struct every_third final : nvbench::user_axis_space
  * {
- *   every_third(std::vector<std::size_t> input_indices)
+ *   explicit every_third(std::vector<std::size_t> input_indices)
  *       : nvbench::user_axis_space(std::move(input_indices))
  *   {}
  *
  *   nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
  *   {
  *     // our increment function
- *     auto adv_func = [&, info](std::size_t &inc_index,
- *                               std::size_t len) -> bool {
- *       inc_index += 3; return inc_index >= len;
+ *     auto adv_func = [](std::size_t &inc_index,
+ *                        std::size_t len) -> bool {
+ *       inc_index += 3;
+ *       return inc_index >= len;
  *     };
  *
  *     // our update function
- *     auto update_func = [=](std::size_t inc_index,
- *                            axes_info::iterator start,
- *                            axes_info::iterator end) {
- *           for (; start != end; ++start) {
- *              start->index = inc_index;
- *           }
+ *     auto update_func = [](std::size_t inc_index,
+ *                           axes_info::iterator start,
+ *                           axes_info::iterator end) {
+ *       for (; start != end; ++start) {
+ *         start->index = inc_index;
+ *       }
  *     };
  *    return detail::axis_space_iterator(info, (info[0].size/3),
  *                                       adv_func, update_func);
  *   }
  *
- *   std::size_t do_get_size(const axes_info &info) const { return
- * (info[0].size/3); }
+ *   std::size_t do_get_size(const axes_info &info) const
+ *   {
+ *     return (info[0].size/3);
+ *   }
  *   ...
- *
+ * };
  */
 struct user_axis_space : iteration_space_base
 {

From 0fae0058b6eae6e271b4bf41247b3eaa361a56a2 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Fri, 2 May 2025 17:43:35 +0000
Subject: [PATCH 38/41] Rename header to match class name

---
 nvbench/detail/{axes_iterator.cuh => axis_space_iterator.cuh} | 0
 nvbench/detail/state_generator.cuh                            | 2 +-
 nvbench/iteration_space_base.cuh                              | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename nvbench/detail/{axes_iterator.cuh => axis_space_iterator.cuh} (100%)

diff --git a/nvbench/detail/axes_iterator.cuh b/nvbench/detail/axis_space_iterator.cuh
similarity index 100%
rename from nvbench/detail/axes_iterator.cuh
rename to nvbench/detail/axis_space_iterator.cuh
diff --git a/nvbench/detail/state_generator.cuh b/nvbench/detail/state_generator.cuh
index e1e14117..c6faef18 100644
--- a/nvbench/detail/state_generator.cuh
+++ b/nvbench/detail/state_generator.cuh
@@ -20,7 +20,7 @@
 
 #include <nvbench/axes_metadata.cuh>
 #include <nvbench/axis_base.cuh>
-#include <nvbench/detail/axes_iterator.cuh>
+#include <nvbench/detail/axis_space_iterator.cuh>
 #include <nvbench/state.cuh>
 
 #include <optional>
diff --git a/nvbench/iteration_space_base.cuh b/nvbench/iteration_space_base.cuh
index bfb02ef3..eb1e24ee 100644
--- a/nvbench/iteration_space_base.cuh
+++ b/nvbench/iteration_space_base.cuh
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <nvbench/detail/axes_iterator.cuh>
+#include <nvbench/detail/axis_space_iterator.cuh>
 
 namespace nvbench
 {

From d7989ddf1e014970ed7d9ce5361dd296c45eda75 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Fri, 2 May 2025 17:59:03 +0000
Subject: [PATCH 39/41] Docs update.

---
 nvbench/zip_axis_space.cuh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nvbench/zip_axis_space.cuh b/nvbench/zip_axis_space.cuh
index 9017d345..139eb17b 100644
--- a/nvbench/zip_axis_space.cuh
+++ b/nvbench/zip_axis_space.cuh
@@ -32,7 +32,7 @@ namespace nvbench
  *
  * Using a zip_axis_space over these two axes will generate 6 values
  * ( {0,0}, {1,1}, {2,2}, ... ) instead of the default 36 values
- * ( {0,0}, {0,1}, {0,2}, ...).
+ * ( {0,0}, {0,1}, {0,2}, ... ).
  *
  */
 struct zip_axis_space final : iteration_space_base

From a2bf266e16f4f499f1e7a53b3eccdfd82cdad3d1 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Fri, 2 May 2025 18:10:29 +0000
Subject: [PATCH 40/41] Rename some space -> spaces for clarity.

---
 nvbench/axes_metadata.cuh          | 50 +++++++++++++++---------------
 nvbench/axes_metadata.cxx          | 34 ++++++++++----------
 nvbench/benchmark_base.cxx         |  8 ++---
 nvbench/detail/state_generator.cxx | 20 +++++++-----
 4 files changed, 58 insertions(+), 54 deletions(-)

diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh
index fe38eb7c..89e183ee 100644
--- a/nvbench/axes_metadata.cuh
+++ b/nvbench/axes_metadata.cuh
@@ -41,8 +41,8 @@ namespace nvbench
 // Holds dynamic axes information.
 struct axes_metadata
 {
-  using axes_type            = std::vector<std::unique_ptr<nvbench::axis_base>>;
-  using iteration_space_type = std::vector<std::unique_ptr<nvbench::iteration_space_base>>;
+  using axes_type             = std::vector<std::unique_ptr<nvbench::axis_base>>;
+  using iteration_spaces_type = std::vector<std::unique_ptr<nvbench::iteration_space_base>>;
 
   template <typename... TypeAxes>
   explicit axes_metadata(nvbench::type_list<TypeAxes...>);
@@ -71,7 +71,6 @@ struct axes_metadata
   {
     const std::size_t start = this->m_axes.size();
     const std::size_t count = sizeof...(Args);
-    // (this->add_axis(std::forward<Args>(args)), ...);
     (m_axes.push_back(args.clone()), ...);
     this->add_zip_space(start, count);
   }
@@ -86,13 +85,13 @@ struct axes_metadata
     this->add_user_iteration_space(std::move(make), start, count);
   }
 
-  [[nodiscard]] const iteration_space_type &get_type_iteration_space() const
+  [[nodiscard]] const iteration_spaces_type &get_type_iteration_spaces() const
   {
-    return m_type_space;
+    return m_type_iteration_spaces;
   }
-  [[nodiscard]] const iteration_space_type &get_value_iteration_space() const
+  [[nodiscard]] const iteration_spaces_type &get_value_iteration_spaces() const
   {
-    return m_value_space;
+    return m_value_iteration_spaces;
   }
 
   [[nodiscard]] const nvbench::int64_axis &get_int64_axis(std::string_view name) const;
@@ -126,8 +125,8 @@ struct axes_metadata
 private:
   axes_type m_axes;
   std::size_t m_type_axe_count = 0;
-  iteration_space_type m_type_space;
-  iteration_space_type m_value_space;
+  iteration_spaces_type m_type_iteration_spaces;
+  iteration_spaces_type m_value_iteration_spaces;
 
   void add_zip_space(std::size_t first_index, std::size_t count);
   void add_user_iteration_space(std::function<nvbench::make_user_space_signature> make,
@@ -144,22 +143,23 @@ axes_metadata::axes_metadata(nvbench::type_list<TypeAxes...>)
   auto names                   = axes_metadata::generate_default_type_axis_names(num_type_axes);
 
   auto names_iter = names.begin(); // contents will be moved from
-  nvbench::tl::foreach<type_axes_list>(
-    [&axes = m_axes, &spaces = m_type_space, &names_iter]([[maybe_unused]] auto wrapped_type) {
-      // This is always called before other axes are added, so the length of the
-      // axes vector will be the type axis index:
-      const std::size_t type_axis_index = axes.size();
-
-      spaces.push_back(std::make_unique<linear_axis_space>(type_axis_index));
-
-      // Note:
-      // The word "type" appears 6 times in the next line.
-      // Every. Single. Token.
-      typedef typename decltype(wrapped_type)::type type_list;
-      auto axis = std::make_unique<nvbench::type_axis>(std::move(*names_iter++), type_axis_index);
-      axis->template set_inputs<type_list>();
-      axes.push_back(std::move(axis));
-    });
+  nvbench::tl::foreach<type_axes_list>([&axes   = m_axes,
+                                        &spaces = m_type_iteration_spaces,
+                                        &names_iter]([[maybe_unused]] auto wrapped_type) {
+    // This is always called before other axes are added, so the length of the
+    // axes vector will be the type axis index:
+    const std::size_t type_axis_index = axes.size();
+
+    spaces.push_back(std::make_unique<linear_axis_space>(type_axis_index));
+
+    // Note:
+    // The word "type" appears 6 times in the next line.
+    // Every. Single. Token.
+    typedef typename decltype(wrapped_type)::type type_list;
+    auto axis = std::make_unique<nvbench::type_axis>(std::move(*names_iter++), type_axis_index);
+    axis->template set_inputs<type_list>();
+    axes.push_back(std::move(axis));
+  });
   m_type_axe_count = m_axes.size();
 }
 
diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx
index 0742f6b1..c9ddb496 100644
--- a/nvbench/axes_metadata.cxx
+++ b/nvbench/axes_metadata.cxx
@@ -41,16 +41,16 @@ axes_metadata::axes_metadata(const axes_metadata &other)
   }
 
   m_type_axe_count = other.m_type_axe_count;
-  m_type_space.reserve(other.m_type_space.size());
-  for (const auto &iter : other.m_type_space)
+  m_type_iteration_spaces.reserve(other.m_type_iteration_spaces.size());
+  for (const auto &iter : other.m_type_iteration_spaces)
   {
-    m_type_space.push_back(iter->clone());
+    m_type_iteration_spaces.push_back(iter->clone());
   }
 
-  m_value_space.reserve(other.m_value_space.size());
-  for (const auto &iter : other.m_value_space)
+  m_value_iteration_spaces.reserve(other.m_value_iteration_spaces.size());
+  for (const auto &iter : other.m_value_iteration_spaces)
   {
-    m_value_space.push_back(iter->clone());
+    m_value_iteration_spaces.push_back(iter->clone());
   }
 }
 
@@ -65,18 +65,18 @@ axes_metadata &axes_metadata::operator=(const axes_metadata &other)
 
   m_type_axe_count = other.m_type_axe_count;
 
-  m_type_space.clear();
-  m_type_space.reserve(other.m_type_space.size());
-  for (const auto &iter : other.m_type_space)
+  m_type_iteration_spaces.clear();
+  m_type_iteration_spaces.reserve(other.m_type_iteration_spaces.size());
+  for (const auto &iter : other.m_type_iteration_spaces)
   {
-    m_type_space.push_back(iter->clone());
+    m_type_iteration_spaces.push_back(iter->clone());
   }
 
-  m_value_space.clear();
-  m_value_space.reserve(other.m_value_space.size());
-  for (const auto &iter : other.m_value_space)
+  m_value_iteration_spaces.clear();
+  m_value_iteration_spaces.reserve(other.m_value_iteration_spaces.size());
+  for (const auto &iter : other.m_value_iteration_spaces)
   {
-    m_value_space.push_back(iter->clone());
+    m_value_iteration_spaces.push_back(iter->clone());
   }
 
   return *this;
@@ -128,7 +128,7 @@ void axes_metadata::add_string_axis(std::string name, std::vector<std::string> d
 
 void axes_metadata::add_axis(const axis_base &axis)
 {
-  m_value_space.push_back(std::make_unique<linear_axis_space>(m_axes.size()));
+  m_value_iteration_spaces.push_back(std::make_unique<linear_axis_space>(m_axes.size()));
   m_axes.push_back(axis.clone());
 }
 
@@ -161,7 +161,7 @@ void axes_metadata::add_zip_space(std::size_t first_index, std::size_t count)
 
   // add the new tied iteration space
   auto tied = std::make_unique<zip_axis_space>(std::move(input_indices));
-  m_value_space.push_back(std::move(tied));
+  m_value_iteration_spaces.push_back(std::move(tied));
 }
 
 void axes_metadata::add_user_iteration_space(std::function<nvbench::make_user_space_signature> make,
@@ -182,7 +182,7 @@ void axes_metadata::add_user_iteration_space(std::function<nvbench::make_user_sp
   }
 
   auto user_func = make(std::move(input_indices));
-  m_value_space.push_back(std::move(user_func));
+  m_value_iteration_spaces.push_back(std::move(user_func));
 }
 
 const int64_axis &axes_metadata::get_int64_axis(std::string_view name) const
diff --git a/nvbench/benchmark_base.cxx b/nvbench/benchmark_base.cxx
index ba507fd7..bc805906 100644
--- a/nvbench/benchmark_base.cxx
+++ b/nvbench/benchmark_base.cxx
@@ -77,15 +77,15 @@ std::size_t benchmark_base::get_config_count() const
 {
   const auto &axes = m_axes.get_axes();
   const std::size_t value_count =
-    nvbench::detail::transform_reduce(m_axes.get_value_iteration_space().cbegin(),
-                                      m_axes.get_value_iteration_space().cend(),
+    nvbench::detail::transform_reduce(m_axes.get_value_iteration_spaces().cbegin(),
+                                      m_axes.get_value_iteration_spaces().cend(),
                                       std::size_t{1},
                                       std::multiplies<>{},
                                       [&axes](const auto &space) { return space->get_size(axes); });
 
   const std::size_t type_count = nvbench::detail::transform_reduce(
-    m_axes.get_type_iteration_space().cbegin(),
-    m_axes.get_type_iteration_space().cend(),
+    m_axes.get_type_iteration_spaces().cbegin(),
+    m_axes.get_type_iteration_spaces().cend(),
     std::size_t{1},
     std::multiplies<>{},
     [&axes](const auto &space) { return space->get_active_count(axes); });
diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx
index c1355782..202a5579 100644
--- a/nvbench/detail/state_generator.cxx
+++ b/nvbench/detail/state_generator.cxx
@@ -92,8 +92,8 @@ state_generator::state_generator(const benchmark_base &bench)
 void state_generator::build_axis_configs()
 {
   const axes_metadata &axes = m_benchmark.get_axes();
-  const auto &type_space    = axes.get_type_iteration_space();
-  const auto &value_space   = axes.get_value_iteration_space();
+  const auto &type_spaces   = axes.get_type_iteration_spaces();
+  const auto &value_spaces  = axes.get_value_iteration_spaces();
 
   state_iterator ti;
   state_iterator vi;
@@ -105,12 +105,16 @@ void state_generator::build_axis_configs()
   // instantiations.
   {
     const auto &axes_vec = axes.get_axes();
-    std::for_each(type_space.crbegin(), type_space.crend(), [&ti, &axes_vec](const auto &space) {
-      ti.add_iteration_space(space->get_iterator(axes_vec));
-    });
-    std::for_each(value_space.begin(), value_space.end(), [&vi, &axes_vec](const auto &space) {
-      vi.add_iteration_space(space->get_iterator(axes_vec));
-    });
+    std::for_each(type_spaces.crbegin(), //
+                  type_spaces.crend(),
+                  [&ti, &axes_vec](const auto &space) {
+                    ti.add_iteration_space(space->get_iterator(axes_vec));
+                  });
+    std::for_each(value_spaces.begin(), //
+                  value_spaces.end(),
+                  [&vi, &axes_vec](const auto &space) {
+                    vi.add_iteration_space(space->get_iterator(axes_vec));
+                  });
   }
 
   m_type_axis_configs.clear();

From c8909c7d1bb24bd05c5a1024e7820867bde0d021 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Fri, 2 May 2025 20:30:23 +0000
Subject: [PATCH 41/41] Refactoring / renaming.

---
 examples/custom_iteration_spaces.cu    |  49 +++++------
 nvbench/detail/axis_space_iterator.cuh | 110 ++++++++++++++-----------
 nvbench/detail/state_generator.cuh     |   5 +-
 nvbench/detail/state_generator.cxx     |  69 +++++++++-------
 nvbench/iteration_space_base.cuh       |  30 +++----
 nvbench/iteration_space_base.cxx       |  22 ++---
 nvbench/linear_axis_space.cuh          |   9 +-
 nvbench/linear_axis_space.cxx          |  24 +++---
 nvbench/user_axis_space.cuh            |  12 +--
 nvbench/zip_axis_space.cuh             |   8 +-
 nvbench/zip_axis_space.cxx             |  46 +++++++----
 testing/axes_iteration_space.cu        |  25 +++---
 testing/state_generator.cu             |  21 ++---
 13 files changed, 231 insertions(+), 199 deletions(-)

diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu
index be1869ab..2fd16bb9 100644
--- a/examples/custom_iteration_spaces.cu
+++ b/examples/custom_iteration_spaces.cu
@@ -96,13 +96,13 @@ struct under_diag final : nvbench::user_axis_space
   mutable std::size_t y_pos   = 0;
   mutable std::size_t x_start = 0;
 
-  nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
+  nvbench::detail::axis_space_iterator do_get_iterator(axis_value_indices info) const
   {
     // generate our increment function
     auto adv_func = [&, info](std::size_t &inc_index, std::size_t /*len*/) -> bool {
       inc_index++;
       x_pos++;
-      if (x_pos == info[0].size)
+      if (x_pos == info[0].axis_size)
       {
         x_pos = ++x_start;
         y_pos = x_start;
@@ -112,25 +112,24 @@ struct under_diag final : nvbench::user_axis_space
     };
 
     // our update function
-    auto diag_under = [&, info](std::size_t,
-                                std::vector<nvbench::detail::axis_index>::iterator start,
-                                std::vector<nvbench::detail::axis_index>::iterator end) {
-      start->index = x_pos;
-      end->index   = y_pos;
-    };
+    auto diag_under =
+      [&, info](std::size_t, axis_value_indices::iterator start, axis_value_indices::iterator end) {
+        start->value_index = x_pos;
+        end->value_index   = y_pos;
+      };
 
-    const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
+    const size_t iteration_length = ((info[0].axis_size * (info[1].axis_size + 1)) / 2);
     return nvbench::detail::axis_space_iterator(info, iteration_length, adv_func, diag_under);
   }
 
-  std::size_t do_get_size(const axes_info &info) const
+  std::size_t do_get_size(const axis_value_indices &info) const
   {
-    return ((info[0].size * (info[1].size + 1)) / 2);
+    return ((info[0].axis_size * (info[1].axis_size + 1)) / 2);
   }
 
-  std::size_t do_get_active_count(const axes_info &info) const
+  std::size_t do_get_active_count(const axis_value_indices &info) const
   {
-    return ((info[0].size * (info[1].size + 1)) / 2);
+    return ((info[0].axis_size * (info[1].axis_size + 1)) / 2);
   }
 
   std::unique_ptr<nvbench::iteration_space_base> do_clone() const
@@ -160,36 +159,38 @@ struct gauss final : nvbench::user_axis_space
       : nvbench::user_axis_space(std::move(input_indices))
   {}
 
-  nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
+  nvbench::detail::axis_space_iterator do_get_iterator(axis_value_indices info) const
   {
-    const double mid_point = static_cast<double>((info[0].size / 2));
+    const double mid_point = static_cast<double>((info[0].axis_size / 2));
 
     std::random_device rd{};
     std::mt19937 gen{rd()};
     std::normal_distribution<> d{mid_point, 2};
 
-    const size_t iteration_length = info[0].size;
+    const size_t iteration_length = info[0].axis_size;
     std::vector<std::size_t> gauss_indices(iteration_length);
     for (auto &g : gauss_indices)
     {
-      auto v = std::min(static_cast<double>(info[0].size), d(gen));
+      auto v = std::min(static_cast<double>(info[0].axis_size), d(gen));
       v      = std::max(0.0, v);
       g      = static_cast<std::size_t>(v);
     }
 
     // our update function
-    auto gauss_func = [=](std::size_t index,
-                          std::vector<nvbench::detail::axis_index>::iterator start,
-                          std::vector<nvbench::detail::axis_index>::iterator) {
-      start->index = gauss_indices[index];
-    };
+    auto gauss_func =
+      [=](std::size_t index, axis_value_indices::iterator start, axis_value_indices::iterator) {
+        start->value_index = gauss_indices[index];
+      };
 
     return nvbench::detail::axis_space_iterator(info, iteration_length, gauss_func);
   }
 
-  std::size_t do_get_size(const axes_info &info) const { return info[0].size; }
+  std::size_t do_get_size(const axis_value_indices &info) const { return info[0].axis_size; }
 
-  std::size_t do_get_active_count(const axes_info &info) const { return info[0].size; }
+  std::size_t do_get_active_count(const axis_value_indices &info) const
+  {
+    return info[0].axis_size;
+  }
 
   std::unique_ptr<iteration_space_base> do_clone() const { return std::make_unique<gauss>(*this); }
 };
diff --git a/nvbench/detail/axis_space_iterator.cuh b/nvbench/detail/axis_space_iterator.cuh
index 9d413ee8..bd86c3ef 100644
--- a/nvbench/detail/axis_space_iterator.cuh
+++ b/nvbench/detail/axis_space_iterator.cuh
@@ -30,76 +30,86 @@ namespace nvbench
 namespace detail
 {
 
-struct axis_index
+// Tracks current value and axis information used while iterating through axes.
+struct axis_value_index
 {
-  axis_index() = default;
-
-  explicit axis_index(const axis_base *axis)
-      : index(0)
-      , name(axis->get_name())
-      , type(axis->get_type())
-      , size(axis->get_size())
-      , active_size(axis->get_size())
-  {
-    if (type == nvbench::axis_type::type)
-    {
-      active_size = static_cast<const nvbench::type_axis *>(axis)->get_active_count();
-    }
-  }
-  std::size_t index;
-  std::string name;
-  nvbench::axis_type type;
-  std::size_t size;
-  std::size_t active_size;
+  axis_value_index() = default;
+
+  explicit axis_value_index(const axis_base *axis)
+      : value_index(0)
+      , axis_name(axis->get_name())
+      , axis_type(axis->get_type())
+      , axis_size(axis->get_size())
+      , axis_active_size(axis_type == nvbench::axis_type::type
+                           ? static_cast<const nvbench::type_axis *>(axis)->get_active_count()
+                           : axis->get_size())
+  {}
+
+  std::size_t value_index;
+  std::string axis_name;
+  nvbench::axis_type axis_type;
+  std::size_t axis_size;
+  std::size_t axis_active_size;
 };
 
 struct axis_space_iterator
 {
-  using axes_info        = std::vector<detail::axis_index>;
-  using AdvanceSignature = bool(std::size_t &current_index, std::size_t length);
-  using UpdateSignature  = void(std::size_t index,
-                               axes_info::iterator start,
-                               axes_info::iterator end);
+  using axis_value_indices = std::vector<detail::axis_value_index>;
+  using advance_signature  = bool(std::size_t &current_iteration, std::size_t iteration_size);
+  using update_signature   = void(std::size_t current_iteration,
+                                axis_value_indices::iterator start_axis_value_info,
+                                axis_value_indices::iterator end_axis_value_info);
 
-  axis_space_iterator(std::vector<detail::axis_index> info,
-                      std::size_t iter_count,
-                      std::function<axis_space_iterator::AdvanceSignature> &&advance,
-                      std::function<axis_space_iterator::UpdateSignature> &&update)
-      : m_info(info)
-      , m_iteration_size(iter_count)
+  axis_space_iterator(axis_value_indices info,
+                      std::size_t iteration_size,
+                      std::function<axis_space_iterator::advance_signature> &&advance,
+                      std::function<axis_space_iterator::update_signature> &&update)
+      : m_iteration_size(iteration_size)
+      , m_axis_value_indices(std::move(info))
       , m_advance(std::move(advance))
       , m_update(std::move(update))
   {}
 
-  axis_space_iterator(std::vector<detail::axis_index> info,
+  axis_space_iterator(axis_value_indices info,
                       std::size_t iter_count,
-                      std::function<axis_space_iterator::UpdateSignature> &&update)
-      : m_info(info)
-      , m_iteration_size(iter_count)
+                      std::function<axis_space_iterator::update_signature> &&update)
+      : m_iteration_size(iter_count)
+      , m_axis_value_indices(std::move(info))
       , m_update(std::move(update))
   {}
 
-  [[nodiscard]] bool next() { return this->m_advance(m_current_index, m_iteration_size); }
+  [[nodiscard]] bool next() { return m_advance(m_current_iteration, m_iteration_size); }
 
-  void update_indices(std::vector<axis_index> &indices) const
+  void update_axis_value_indices(axis_value_indices &info) const
   {
-    using diff_t = typename axes_info::difference_type;
-    indices.insert(indices.end(), m_info.begin(), m_info.end());
-    axes_info::iterator end   = indices.end();
-    axes_info::iterator start = end - static_cast<diff_t>(m_info.size());
-    this->m_update(m_current_index, start, end);
+    using diff_t = typename axis_value_indices::difference_type;
+    info.insert(info.end(), m_axis_value_indices.begin(), m_axis_value_indices.end());
+    axis_value_indices::iterator end   = info.end();
+    axis_value_indices::iterator start = end - static_cast<diff_t>(m_axis_value_indices.size());
+    m_update(m_current_iteration, start, end);
   }
 
-  axes_info m_info;
-  std::size_t m_iteration_size              = 1;
-  std::function<AdvanceSignature> m_advance = [](std::size_t &current_index, std::size_t length) {
-    (current_index + 1 == length) ? current_index = 0 : current_index++;
-    return (current_index == 0); // we rolled over
-  };
-  std::function<UpdateSignature> m_update = nullptr;
+  [[nodiscard]] const axis_value_indices &get_axis_value_indices() const
+  {
+    return m_axis_value_indices;
+  }
+  [[nodiscard]] axis_value_indices &get_axis_value_indices() { return m_axis_value_indices; }
+
+  [[nodiscard]] std::size_t get_iteration_size() const { return m_iteration_size; }
 
 private:
-  std::size_t m_current_index = 0;
+  std::size_t m_current_iteration = 0;
+  std::size_t m_iteration_size    = 1;
+
+  axis_value_indices m_axis_value_indices;
+
+  std::function<advance_signature> m_advance = [](std::size_t &current_iteration,
+                                                  std::size_t iteration_size) {
+    (current_iteration + 1 == iteration_size) ? current_iteration = 0 : current_iteration++;
+    return (current_iteration == 0); // we rolled over
+  };
+
+  std::function<update_signature> m_update = nullptr;
 };
 
 } // namespace detail
diff --git a/nvbench/detail/state_generator.cuh b/nvbench/detail/state_generator.cuh
index c6faef18..62ce8bcb 100644
--- a/nvbench/detail/state_generator.cuh
+++ b/nvbench/detail/state_generator.cuh
@@ -78,13 +78,12 @@ struct state_iterator
 
   [[nodiscard]] std::size_t get_number_of_states() const;
   void init();
-  [[nodiscard]] std::vector<axis_index> get_current_indices() const;
+  [[nodiscard]] std::vector<axis_value_index> get_current_axis_value_indices() const;
   [[nodiscard]] bool iter_valid() const;
   void next();
 
-  std::vector<axis_space_iterator> m_space;
+  std::vector<axis_space_iterator> m_axis_space_iterators;
   std::size_t m_axes_count        = 0;
-  std::size_t m_current_space     = 0;
   std::size_t m_current_iteration = 0;
   std::size_t m_max_iteration     = 1;
 };
diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx
index 202a5579..7bc61d9a 100644
--- a/nvbench/detail/state_generator.cxx
+++ b/nvbench/detail/state_generator.cxx
@@ -18,6 +18,7 @@
 
 #include <nvbench/benchmark_base.cuh>
 #include <nvbench/detail/state_generator.cuh>
+#include <nvbench/detail/throw.cuh>
 #include <nvbench/detail/transform_reduce.cuh>
 #include <nvbench/device_info.cuh>
 #include <nvbench/named_values.cuh>
@@ -25,6 +26,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <exception>
 #include <functional>
 #include <numeric>
 
@@ -34,10 +36,10 @@ namespace nvbench::detail
 
 void state_iterator::add_iteration_space(const nvbench::detail::axis_space_iterator &iter)
 {
-  m_axes_count += iter.m_info.size();
-  m_max_iteration *= iter.m_iteration_size;
+  m_axes_count += iter.get_axis_value_indices().size();
+  m_max_iteration *= iter.get_iteration_size();
 
-  m_space.push_back(std::move(iter));
+  m_axis_space_iterators.push_back(std::move(iter));
 }
 
 [[nodiscard]] std::size_t state_iterator::get_number_of_states() const
@@ -45,22 +47,26 @@ void state_iterator::add_iteration_space(const nvbench::detail::axis_space_itera
   return this->m_max_iteration;
 }
 
-void state_iterator::init()
-{
-  m_current_space     = 0;
-  m_current_iteration = 0;
-}
+void state_iterator::init() { m_current_iteration = 0; }
 
-[[nodiscard]] std::vector<axis_index> state_iterator::get_current_indices() const
+[[nodiscard]] std::vector<axis_value_index> state_iterator::get_current_axis_value_indices() const
 {
-  std::vector<axis_index> indices;
-  indices.reserve(m_axes_count);
-  for (auto &m : m_space)
+  std::vector<axis_value_index> info;
+  info.reserve(m_axes_count);
+  for (auto &iter : m_axis_space_iterators)
+  {
+    iter.update_axis_value_indices(info);
+  }
+
+  if (info.size() != m_axes_count)
   {
-    m.update_indices(indices);
+    NVBENCH_THROW(std::runtime_error,
+                  "Internal error: State iterator has {} axes, but only {} were updated.",
+                  m_axes_count,
+                  info.size());
   }
-  // verify length
-  return indices;
+
+  return info;
 }
 
 [[nodiscard]] bool state_iterator::iter_valid() const
@@ -72,9 +78,9 @@ void state_iterator::next()
 {
   m_current_iteration++;
 
-  for (auto &&space : this->m_space)
+  for (auto &iter : this->m_axis_space_iterators)
   {
-    auto rolled_over = space.next();
+    const auto rolled_over = iter.next();
     if (rolled_over)
     {
       continue;
@@ -128,13 +134,13 @@ void state_generator::build_axis_configs()
     auto &[config, active_mask] =
       m_type_axis_configs.emplace_back(std::make_pair(nvbench::named_values{}, true));
 
-    for (const auto &axis_info : ti.get_current_indices())
+    for (const auto &info : ti.get_current_axis_value_indices())
     {
-      const auto &axis = axes.get_type_axis(axis_info.name);
+      const auto &axis = axes.get_type_axis(info.axis_name);
 
-      active_mask &= axis.get_is_active(axis_info.index);
+      active_mask &= axis.get_is_active(info.value_index);
 
-      config.set_string(axis.get_name(), axis.get_input_string(axis_info.index));
+      config.set_string(axis.get_name(), axis.get_input_string(info.value_index));
     }
   }
 
@@ -143,30 +149,33 @@ void state_generator::build_axis_configs()
     auto &config = m_non_type_axis_configs.emplace_back();
 
     // Add non-type parameters to state:
-    for (const auto &axis_info : vi.get_current_indices())
+    for (const auto &axis_value : vi.get_current_axis_value_indices())
     {
-      switch (axis_info.type)
+      switch (axis_value.axis_type)
       {
         default:
         case axis_type::type:
           assert("unreachable." && false);
           break;
         case axis_type::int64:
-          config.set_int64(axis_info.name,
-                           axes.get_int64_axis(axis_info.name).get_value(axis_info.index));
+          config.set_int64(
+            axis_value.axis_name,
+            axes.get_int64_axis(axis_value.axis_name).get_value(axis_value.value_index));
           break;
 
         case axis_type::float64:
-          config.set_float64(axis_info.name,
-                             axes.get_float64_axis(axis_info.name).get_value(axis_info.index));
+          config.set_float64(
+            axis_value.axis_name,
+            axes.get_float64_axis(axis_value.axis_name).get_value(axis_value.value_index));
           break;
 
         case axis_type::string:
-          config.set_string(axis_info.name,
-                            axes.get_string_axis(axis_info.name).get_value(axis_info.index));
+          config.set_string(
+            axis_value.axis_name,
+            axes.get_string_axis(axis_value.axis_name).get_value(axis_value.value_index));
           break;
       } // switch (type)
-    } // for (axis_info : current_indices)
+    } // for (axis_values)
   }
 
   if (m_type_axis_configs.empty())
diff --git a/nvbench/iteration_space_base.cuh b/nvbench/iteration_space_base.cuh
index eb1e24ee..8250eea4 100644
--- a/nvbench/iteration_space_base.cuh
+++ b/nvbench/iteration_space_base.cuh
@@ -51,30 +51,24 @@ namespace nvbench
  */
 struct iteration_space_base
 {
-  using axes_type = std::vector<std::unique_ptr<nvbench::axis_base>>;
-  using axes_info = std::vector<detail::axis_index>;
+  using axes_type          = std::vector<std::unique_ptr<nvbench::axis_base>>;
+  using axis_value_indices = std::vector<detail::axis_value_index>;
 
-  using AdvanceSignature = nvbench::detail::axis_space_iterator::AdvanceSignature;
-  using UpdateSignature  = nvbench::detail::axis_space_iterator::UpdateSignature;
+  using advance_signature = nvbench::detail::axis_space_iterator::advance_signature;
+  using update_signature  = nvbench::detail::axis_space_iterator::update_signature;
 
   /*!
    * Construct a new derived iteration_space
    *
-   * The input_indices and output_indices combine together to allow the iteration space to know
-   * what axes they should query from axes_metadata and where each of those map to in the output
-   * iteration space.
-   * @param[input_indices] recorded indices of each axis from the axes metadata value space
+   * @param[input_axis_indices] Index of each associated axis in axes_metadata.
    */
-  iteration_space_base(std::vector<std::size_t> input_indices);
+  iteration_space_base(std::vector<std::size_t> input_axis_indices);
   virtual ~iteration_space_base();
 
   [[nodiscard]] std::unique_ptr<iteration_space_base> clone() const;
 
   /*!
-   * Returns the iterator over the @a axis provided
-   *
-   * @param[axes]
-   *
+   * Returns the iterator over the @a axes provided
    */
   [[nodiscard]] detail::axis_space_iterator get_iterator(const axes_type &axes) const;
 
@@ -97,12 +91,12 @@ struct iteration_space_base
   [[nodiscard]] std::size_t get_active_count(const axes_type &axes) const;
 
 protected:
-  std::vector<std::size_t> m_input_indices;
+  std::vector<std::size_t> m_axis_indices;
 
-  virtual std::unique_ptr<iteration_space_base> do_clone() const            = 0;
-  virtual detail::axis_space_iterator do_get_iterator(axes_info info) const = 0;
-  virtual std::size_t do_get_size(const axes_info &info) const              = 0;
-  virtual std::size_t do_get_active_count(const axes_info &info) const      = 0;
+  virtual std::unique_ptr<iteration_space_base> do_clone() const                     = 0;
+  virtual detail::axis_space_iterator do_get_iterator(axis_value_indices info) const = 0;
+  virtual std::size_t do_get_size(const axis_value_indices &info) const              = 0;
+  virtual std::size_t do_get_active_count(const axis_value_indices &info) const      = 0;
 };
 
 } // namespace nvbench
diff --git a/nvbench/iteration_space_base.cxx b/nvbench/iteration_space_base.cxx
index fbb9ef3a..590f9648 100644
--- a/nvbench/iteration_space_base.cxx
+++ b/nvbench/iteration_space_base.cxx
@@ -23,8 +23,8 @@
 namespace nvbench
 {
 
-iteration_space_base::iteration_space_base(std::vector<std::size_t> input_indices)
-    : m_input_indices(std::move(input_indices))
+iteration_space_base::iteration_space_base(std::vector<std::size_t> input_axis_indices)
+    : m_axis_indices(std::move(input_axis_indices))
 {}
 
 iteration_space_base::~iteration_space_base() = default;
@@ -37,15 +37,15 @@ std::unique_ptr<iteration_space_base> iteration_space_base::clone() const
 
 namespace
 {
-nvbench::iteration_space_base::axes_info
-get_axes_info(const nvbench::iteration_space_base::axes_type &axes,
-              const std::vector<std::size_t> &indices)
+nvbench::iteration_space_base::axis_value_indices
+get_axis_value_indices(const nvbench::iteration_space_base::axes_type &axes,
+                       const std::vector<std::size_t> &indices)
 {
-  nvbench::iteration_space_base::axes_info info;
+  nvbench::iteration_space_base::axis_value_indices info;
   info.reserve(indices.size());
-  for (auto &n : indices)
+  for (auto &idx : indices)
   {
-    info.emplace_back(axes[n].get());
+    info.emplace_back(axes[idx].get());
   }
   return info;
 }
@@ -53,16 +53,16 @@ get_axes_info(const nvbench::iteration_space_base::axes_type &axes,
 
 detail::axis_space_iterator iteration_space_base::get_iterator(const axes_type &axes) const
 {
-  return this->do_get_iterator(get_axes_info(axes, m_input_indices));
+  return this->do_get_iterator(get_axis_value_indices(axes, m_axis_indices));
 }
 
 std::size_t iteration_space_base::get_size(const axes_type &axes) const
 {
-  return this->do_get_size(get_axes_info(axes, m_input_indices));
+  return this->do_get_size(get_axis_value_indices(axes, m_axis_indices));
 }
 std::size_t iteration_space_base::get_active_count(const axes_type &axes) const
 {
-  return this->do_get_active_count(get_axes_info(axes, m_input_indices));
+  return this->do_get_active_count(get_axis_value_indices(axes, m_axis_indices));
 }
 
 } // namespace nvbench
diff --git a/nvbench/linear_axis_space.cuh b/nvbench/linear_axis_space.cuh
index 4adf385d..fbb28a38 100644
--- a/nvbench/linear_axis_space.cuh
+++ b/nvbench/linear_axis_space.cuh
@@ -27,17 +27,16 @@ namespace nvbench
  * Provides linear forward iteration over a single axis.
  *
  * The default for all axes added to a benchmark
- *
  */
 struct linear_axis_space final : iteration_space_base
 {
-  linear_axis_space(std::size_t in);
+  linear_axis_space(std::size_t axis_index);
   ~linear_axis_space();
 
   std::unique_ptr<iteration_space_base> do_clone() const override;
-  detail::axis_space_iterator do_get_iterator(axes_info info) const override;
-  std::size_t do_get_size(const axes_info &info) const override;
-  std::size_t do_get_active_count(const axes_info &info) const override;
+  detail::axis_space_iterator do_get_iterator(axis_value_indices info) const override;
+  std::size_t do_get_size(const axis_value_indices &info) const override;
+  std::size_t do_get_active_count(const axis_value_indices &info) const override;
 };
 
 } // namespace nvbench
diff --git a/nvbench/linear_axis_space.cxx b/nvbench/linear_axis_space.cxx
index 90e4ba98..134e7c15 100644
--- a/nvbench/linear_axis_space.cxx
+++ b/nvbench/linear_axis_space.cxx
@@ -23,26 +23,30 @@
 namespace nvbench
 {
 
-linear_axis_space::linear_axis_space(std::size_t in_index)
-    : iteration_space_base({in_index})
+linear_axis_space::linear_axis_space(std::size_t axis_index)
+    : iteration_space_base({axis_index})
 {}
 
 linear_axis_space::~linear_axis_space() = default;
 
-detail::axis_space_iterator linear_axis_space::do_get_iterator(axes_info info) const
+detail::axis_space_iterator linear_axis_space::do_get_iterator(axis_value_indices info) const
 {
-  auto update_func = [=](std::size_t inc_index, axes_info::iterator start, axes_info::iterator) {
-    start->index = inc_index;
-  };
+  auto update_func = [](std::size_t current_iteration,
+                        axis_value_indices::iterator start,
+                        axis_value_indices::iterator) { start->value_index = current_iteration; };
 
-  return detail::axis_space_iterator(info, info[0].size, update_func);
+  const auto axis_size = info[0].axis_size;
+  return detail::axis_space_iterator(std::move(info), axis_size, update_func);
 }
 
-std::size_t linear_axis_space::do_get_size(const axes_info &info) const { return info[0].size; }
+std::size_t linear_axis_space::do_get_size(const axis_value_indices &info) const
+{
+  return info[0].axis_size;
+}
 
-std::size_t linear_axis_space::do_get_active_count(const axes_info &info) const
+std::size_t linear_axis_space::do_get_active_count(const axis_value_indices &info) const
 {
-  return info[0].active_size;
+  return info[0].axis_active_size;
 }
 
 std::unique_ptr<iteration_space_base> linear_axis_space::do_clone() const
diff --git a/nvbench/user_axis_space.cuh b/nvbench/user_axis_space.cuh
index 7a151877..e15bf74e 100644
--- a/nvbench/user_axis_space.cuh
+++ b/nvbench/user_axis_space.cuh
@@ -35,7 +35,7 @@ namespace nvbench
  *       : nvbench::user_axis_space(std::move(input_indices))
  *   {}
  *
- *   nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
+ *   nvbench::detail::axis_space_iterator do_get_iterator(axis_value_indices info) const
  *   {
  *     // our increment function
  *     auto adv_func = [](std::size_t &inc_index,
@@ -46,19 +46,19 @@ namespace nvbench
  *
  *     // our update function
  *     auto update_func = [](std::size_t inc_index,
- *                           axes_info::iterator start,
- *                           axes_info::iterator end) {
+ *                           axis_value_indices::iterator start,
+ *                           axis_value_indices::iterator end) {
  *       for (; start != end; ++start) {
  *         start->index = inc_index;
  *       }
  *     };
- *    return detail::axis_space_iterator(info, (info[0].size/3),
+ *    return detail::axis_space_iterator(info, (info[0].axis_size/3),
  *                                       adv_func, update_func);
  *   }
  *
- *   std::size_t do_get_size(const axes_info &info) const
+ *   std::size_t do_get_size(const axis_value_indices &info) const
  *   {
- *     return (info[0].size/3);
+ *     return (info[0].axis_size/3);
  *   }
  *   ...
  * };
diff --git a/nvbench/zip_axis_space.cuh b/nvbench/zip_axis_space.cuh
index 139eb17b..71df1159 100644
--- a/nvbench/zip_axis_space.cuh
+++ b/nvbench/zip_axis_space.cuh
@@ -37,13 +37,13 @@ namespace nvbench
  */
 struct zip_axis_space final : iteration_space_base
 {
-  zip_axis_space(std::vector<std::size_t> input_indices);
+  zip_axis_space(std::vector<std::size_t> input_axis_indices);
   ~zip_axis_space();
 
   std::unique_ptr<iteration_space_base> do_clone() const override;
-  detail::axis_space_iterator do_get_iterator(axes_info info) const override;
-  std::size_t do_get_size(const axes_info &info) const override;
-  std::size_t do_get_active_count(const axes_info &info) const override;
+  detail::axis_space_iterator do_get_iterator(axis_value_indices info) const override;
+  std::size_t do_get_size(const axis_value_indices &info) const override;
+  std::size_t do_get_active_count(const axis_value_indices &info) const override;
 };
 
 } // namespace nvbench
diff --git a/nvbench/zip_axis_space.cxx b/nvbench/zip_axis_space.cxx
index 5b06390b..c4e229d7 100644
--- a/nvbench/zip_axis_space.cxx
+++ b/nvbench/zip_axis_space.cxx
@@ -18,35 +18,51 @@
 
 #include "zip_axis_space.cuh"
 
+#include <nvbench/detail/throw.cuh>
 #include <nvbench/type_axis.cuh>
 
+#include <exception>
+
 namespace nvbench
 {
 
-zip_axis_space::zip_axis_space(std::vector<std::size_t> input_indices)
-    : iteration_space_base(std::move(input_indices))
+zip_axis_space::zip_axis_space(std::vector<std::size_t> input_axis_indices)
+    : iteration_space_base(std::move(input_axis_indices))
 {}
 
 zip_axis_space::~zip_axis_space() = default;
 
-detail::axis_space_iterator zip_axis_space::do_get_iterator(axes_info info) const
+detail::axis_space_iterator zip_axis_space::do_get_iterator(axis_value_indices info) const
 {
-  auto update_func =
-    [=](std::size_t inc_index, axes_info::iterator start, axes_info::iterator end) {
-      for (; start != end; ++start)
-      {
-        start->index = inc_index;
-      }
-    };
-
-  return detail::axis_space_iterator(info, info[0].size, update_func);
+  const auto axis_size = info[0].axis_size;
+  for (const auto &axis : info)
+  {
+    if (axis.axis_active_size != axis_size)
+    {
+      NVBENCH_THROW(std::runtime_error, "%s", "All zipped axes must have the same size.");
+    }
+  }
+
+  auto update_func = [](std::size_t current_iteration,
+                        axis_value_indices::iterator start_axis_value_info,
+                        axis_value_indices::iterator end_axis_value_info) {
+    for (; start_axis_value_info != end_axis_value_info; ++start_axis_value_info)
+    {
+      start_axis_value_info->value_index = current_iteration;
+    }
+  };
+
+  return detail::axis_space_iterator(std::move(info), axis_size, update_func);
 }
 
-std::size_t zip_axis_space::do_get_size(const axes_info &info) const { return info[0].size; }
+std::size_t zip_axis_space::do_get_size(const axis_value_indices &info) const
+{
+  return info[0].axis_size;
+}
 
-std::size_t zip_axis_space::do_get_active_count(const axes_info &info) const
+std::size_t zip_axis_space::do_get_active_count(const axis_value_indices &info) const
 {
-  return info[0].active_size;
+  return info[0].axis_active_size;
 }
 
 std::unique_ptr<iteration_space_base> zip_axis_space::do_clone() const
diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu
index a068480c..a2b79d41 100644
--- a/testing/axes_iteration_space.cu
+++ b/testing/axes_iteration_space.cu
@@ -188,13 +188,13 @@ struct under_diag final : nvbench::user_axis_space
   mutable std::size_t y_pos   = 0;
   mutable std::size_t x_start = 0;
 
-  nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const
+  nvbench::detail::axis_space_iterator do_get_iterator(axis_value_indices info) const
   {
     // generate our increment function
     auto adv_func = [&, info](std::size_t &inc_index, std::size_t /*len*/) -> bool {
       inc_index++;
       x_pos++;
-      if (x_pos == info[0].size)
+      if (x_pos == info[0].axis_size)
       {
         x_pos = ++x_start;
         y_pos = x_start;
@@ -204,25 +204,24 @@ struct under_diag final : nvbench::user_axis_space
     };
 
     // our update function
-    auto diag_under = [&, info](std::size_t,
-                                std::vector<nvbench::detail::axis_index>::iterator start,
-                                std::vector<nvbench::detail::axis_index>::iterator end) {
-      start->index = x_pos;
-      end->index   = y_pos;
-    };
+    auto diag_under =
+      [&, info](std::size_t, axis_value_indices::iterator start, axis_value_indices::iterator end) {
+        start->value_index = x_pos;
+        end->value_index   = y_pos;
+      };
 
-    const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2);
+    const size_t iteration_length = ((info[0].axis_size * (info[1].axis_size + 1)) / 2);
     return nvbench::detail::axis_space_iterator(info, iteration_length, adv_func, diag_under);
   }
 
-  std::size_t do_get_size(const axes_info &info) const
+  std::size_t do_get_size(const axis_value_indices &info) const
   {
-    return ((info[0].size * (info[1].size + 1)) / 2);
+    return ((info[0].axis_size * (info[1].axis_size + 1)) / 2);
   }
 
-  std::size_t do_get_active_count(const axes_info &info) const
+  std::size_t do_get_active_count(const axis_value_indices &info) const
   {
-    return ((info[0].size * (info[1].size + 1)) / 2);
+    return ((info[0].axis_size * (info[1].axis_size + 1)) / 2);
   }
 
   std::unique_ptr<nvbench::iteration_space_base> do_clone() const
diff --git a/testing/state_generator.cu b/testing/state_generator.cu
index d0f1a3df..3bba2fab 100644
--- a/testing/state_generator.cu
+++ b/testing/state_generator.cu
@@ -65,11 +65,12 @@ void test_single_state()
   ASSERT(sg.get_number_of_states() == 1);
   sg.init();
   ASSERT(sg.iter_valid());
-  ASSERT(sg.get_current_indices().size() == 1);
-  ASSERT(sg.get_current_indices()[0].name == "OnlyAxis");
-  ASSERT(sg.get_current_indices()[0].index == 0);
-  ASSERT(sg.get_current_indices()[0].size == 1);
-  ASSERT(sg.get_current_indices()[0].type == nvbench::axis_type::string);
+  ASSERT(sg.get_current_axis_value_indices().size() == 1);
+  ASSERT(sg.get_current_axis_value_indices()[0].axis_name == "OnlyAxis");
+  ASSERT(sg.get_current_axis_value_indices()[0].axis_size == 1);
+  ASSERT(sg.get_current_axis_value_indices()[0].axis_active_size == 1);
+  ASSERT(sg.get_current_axis_value_indices()[0].axis_type == nvbench::axis_type::string);
+  ASSERT(sg.get_current_axis_value_indices()[0].value_index == 0);
 
   sg.next();
   ASSERT(!sg.iter_valid());
@@ -112,14 +113,14 @@ void test_basic()
   {
     line.clear();
     fmt::format_to(std::back_inserter(line), "| {:^2}", line_num++);
-    for (auto &axis_index : sg.get_current_indices())
+    for (auto &axis_value : sg.get_current_axis_value_indices())
     {
-      ASSERT(axis_index.type == nvbench::axis_type::string);
+      ASSERT(axis_value.axis_type == nvbench::axis_type::string);
       fmt::format_to(std::back_inserter(line),
                      " | {}: {}/{}",
-                     axis_index.name,
-                     axis_index.index,
-                     axis_index.size);
+                     axis_value.axis_name,
+                     axis_value.value_index,
+                     axis_value.axis_size);
     }
     fmt::format_to(std::back_inserter(buffer), "{} |\n", fmt::to_string(line));
   }