diff --git a/benchmark/0011.containers/deque/0001.push_back/fast_io_reverse.cc b/benchmark/0011.containers/deque/0001.push_back/fast_io_reverse.cc index 8b2f91f1a..066c96549 100644 --- a/benchmark/0011.containers/deque/0001.push_back/fast_io_reverse.cc +++ b/benchmark/0011.containers/deque/0001.push_back/fast_io_reverse.cc @@ -23,5 +23,5 @@ int main() sum += e; } } - ::fast_io::io::perrln("sum=",sum); + ::fast_io::io::perrln("sum=", sum); } diff --git a/include/fast_io_dsal/impl/deque.h b/include/fast_io_dsal/impl/deque.h index da04c2dbe..e07464054 100644 --- a/include/fast_io_dsal/impl/deque.h +++ b/include/fast_io_dsal/impl/deque.h @@ -436,7 +436,23 @@ inline constexpr void deque_grow_to_new_blocks_count_impl(dequecontroltype &cont ::std::size_t const old_back_block_ptr_pos{static_cast<::std::size_t>(controller.back_block.controller_ptr - old_start_ptr)}; using block_typed_allocator = ::fast_io::typed_generic_allocator_adapter; - auto [new_start_ptr, new_blocks_count] = block_typed_allocator::allocate_at_least(new_blocks_count_least + 1zu); + +#if (defined(__GNUC__) || defined(__clang__)) + ::std::size_t new_blocks_count_least_p1; + if (__builtin_add_overflow(new_blocks_count_least, 1zu, __builtin_addressof(new_blocks_count_least_p1))) [[unlikely]] + { + ::fast_io::fast_terminate(); + } +#else + constexpr ::std::size_t mx{::std::numeric_limits<::std::size_t>::max()}; + ::std::size_t new_blocks_count_least_p1{new_blocks_count_least}; + if (mx == new_blocks_count_least) + { + ::fast_io::fast_terminate(); + } + ++new_blocks_count_least_p1; +#endif + auto [new_start_ptr, new_blocks_count] = block_typed_allocator::allocate_at_least(new_blocks_count_least_p1); auto const old_reserved_blocks_count{ static_cast<::std::size_t>(old_after_reserved_ptr - old_start_reserved_ptr)}; @@ -567,12 +583,21 @@ template inline constexpr void deque_allocate_on_empty_common_with_n_impl(dequecontroltype &controller, ::std::size_t align, ::std::size_t bytes, ::std::size_t initial_allocated_block_counts) noexcept { +#if (defined(__GNUC__) || defined(__clang__)) + ::std::size_t initial_allocated_block_counts_with_sentinel; + if (__builtin_add_overflow(initial_allocated_block_counts, 1u, + __builtin_addressof(initial_allocated_block_counts_with_sentinel))) + { + ::fast_io::fast_terminate(); + } +#else constexpr ::std::size_t maxval{::std::numeric_limits<::std::size_t>::max()}; if (initial_allocated_block_counts == maxval) [[unlikely]] { ::fast_io::fast_terminate(); } ::std::size_t initial_allocated_block_counts_with_sentinel{initial_allocated_block_counts + 1u}; +#endif using block_typed_allocator = ::fast_io::typed_generic_allocator_adapter; auto [allocated_blocks_ptr, allocated_blocks_count] = block_typed_allocator::allocate_at_least(initial_allocated_block_counts_with_sentinel); // we need a null terminator as sentinel like c style string does @@ -1205,27 +1230,202 @@ deque_erase_common_trivial_impl(::fast_io::containers::details::deque_controller return first; } -#if 0 +#if 1 + template -inline constexpr void deque_reserve_back_spaces_impl(dequecontroltype &controller, ::std::size_t n, ::std::size_t align, ::std::size_t blockbytes) noexcept +inline constexpr void deque_rebalance_or_grow_insertation_impl(dequecontroltype &controller, ::std::size_t extrablocks) noexcept { - ::std::size_t const nb{n/blockbytes}; + // ignore overchecked first + auto const used_blocks_count{ + static_cast<::std::size_t>(controller.back_block.controller_ptr - controller.front_block.controller_ptr) + 1zu}; + auto const total_slots_count{ + static_cast<::std::size_t>(controller.controller_block.controller_after_ptr - controller.controller_block.controller_start_ptr)}; + auto const half_slots_count{static_cast<::std::size_t>(total_slots_count >> 1u)}; +#if defined(__GNUC__) || defined(__clang__) + ::std::size_t new_used_blocks_count; + if (__builtin_add_overflow(used_blocks_count, extrablocks, __builtin_addressof(new_used_blocks_count))) [[unlikely]] + { + ::fast_io::fast_terminate(); + } +#else + constexpr ::std::size_t mx{::std::numeric_limits<::std::size_t>::max()}; + ::std::size_t const mx_sub_extrablocks{mx - extrablocks}; + if (mx_sub_extrablocks < used_blocks_count) + { + ::fast_io::fast_terminate(); + } + + auto const new_used_blocks_count{used_blocks_count + extrablocks}; +#endif + + if (half_slots_count < new_used_blocks_count) // grow blocks + { +#if defined(__GNUC__) || defined(__clang__) + ::std::size_t doubleslotsextra; + if (__builtin_add_overflow(total_slots_count, extrablocks, __builtin_addressof(doubleslotsextra))) + { + ::fast_io::fast_terminate(); + } + if (__builtin_add_overflow(doubleslotsextra, doubleslotsextra, __builtin_addressof(doubleslotsextra))) + { + ::fast_io::fast_terminate(); + } +#else + ::std::size_t mx_total_slots{mx - extrablocks}; + if (mx_total_slots < total_slots_count) + { + ::fast_io::fast_terminate(); + } + ::std::size_t doubleslotsextra{extrablocks + total_slots_count}; + constexpr ::std::size_t mxdv2m1{(mx >> 1u)}; + if (mxdv2m1 < doubleslotsextra) + { + ::fast_io::fast_terminate(); + } + doubleslotsextra <<= 1u; +#endif + ::fast_io::containers::details::deque_grow_to_new_blocks_count_impl(controller, doubleslotsextra); + } + else + { +#if 0 + ::fast_io::iomnp::debug_println(::std::source_location::current()); +#endif + // balance blocks + auto start_reserved_ptr{controller.controller_block.controller_start_reserved_ptr}; + auto after_reserved_ptr{controller.controller_block.controller_after_reserved_ptr}; + auto const reserved_blocks_count{ + static_cast<::std::size_t>(after_reserved_ptr - start_reserved_ptr)}; + auto const half_reserved_blocks_count{ + static_cast<::std::size_t>(reserved_blocks_count >> 1u)}; + auto reserved_pivot{start_reserved_ptr + half_reserved_blocks_count}; + auto const half_used_blocks_count{ + static_cast<::std::size_t>(new_used_blocks_count >> 1u)}; + auto used_blocks_pivot{controller.front_block.controller_ptr + half_used_blocks_count}; + if (used_blocks_pivot != reserved_pivot) + { + ::std::ptrdiff_t diff{reserved_pivot - used_blocks_pivot}; +#if 0 + ::fast_io::iomnp::debug_println(::std::source_location::current(), + "\tdiff=",diff); +#endif + auto rotate_pivot{diff < 0 ? start_reserved_ptr : after_reserved_ptr}; + rotate_pivot -= diff; + ::std::rotate(start_reserved_ptr, rotate_pivot, after_reserved_ptr); + controller.front_block.controller_ptr += diff; + controller.back_block.controller_ptr += diff; + } + + auto slots_pivot{controller.controller_block.controller_start_ptr + half_slots_count}; + if (slots_pivot != reserved_pivot) + { +#if 0 + ::fast_io::iomnp::debug_println(::std::source_location::current()); +#endif + ::std::ptrdiff_t diff{slots_pivot - reserved_pivot}; + ::fast_io::freestanding::overlapped_copy(start_reserved_ptr, + after_reserved_ptr, start_reserved_ptr + diff); + controller.front_block.controller_ptr += diff; + controller.back_block.controller_ptr += diff; + controller.controller_block.controller_start_reserved_ptr += diff; + *(controller.controller_block.controller_after_reserved_ptr += diff) = nullptr; + } + } +} + +template +inline constexpr void deque_reserve_back_blocks_impl(dequecontroltype &controller, ::std::size_t nb, ::std::size_t align, ::std::size_t blockbytes) noexcept +{ if (controller.controller_block.controller_start_ptr == nullptr) { ::fast_io::containers::details::deque_allocate_on_empty_common_with_n_impl( controller, align, blockbytes, nb); return; } - + + using replacetype = typename dequecontroltype::replacetype; + using begin_ptrtype = replacetype *; + + std::size_t diff_to_after_ptr = + static_cast( + controller.controller_block.controller_after_reserved_ptr - + controller.back_block.controller_ptr); + if (diff_to_after_ptr <= nb) + { + std::size_t distance_back_to_reserve{ + static_cast(controller.controller_block.controller_after_reserved_ptr - + controller.back_block.controller_ptr)}; + if (distance_back_to_reserve < nb) + { + ::fast_io::containers::details::deque_rebalance_or_grow_insertation_impl(controller, nb); + } + std::size_t diff_to_after_ptr2 = + static_cast( + controller.controller_block.controller_after_reserved_ptr - + controller.back_block.controller_ptr); + if (diff_to_after_ptr2 <= nb) + { + ::std::size_t front_reserved_blocks{ + static_cast<::std::size_t>(controller.front_block.controller_ptr - controller.controller_block.controller_start_reserved_ptr)}; + + ::std::size_t front_borrowed_blocks_count{front_reserved_blocks}; + ::std::size_t to_allocate_blocks{nb}; + if (nb < front_reserved_blocks) + { + front_borrowed_blocks_count = nb; + to_allocate_blocks = 0u; + } + else + { + to_allocate_blocks -= front_borrowed_blocks_count; + } + + auto controller_start_reserved_ptr{ + controller.controller_block.controller_start_reserved_ptr}; + + auto pos{ + controller.controller_block.controller_after_reserved_ptr}; + pos = ::fast_io::freestanding::non_overlapped_copy_n(controller_start_reserved_ptr, + front_borrowed_blocks_count, + pos); + controller.controller_block.controller_start_reserved_ptr = + controller_start_reserved_ptr + front_borrowed_blocks_count; + + for (auto e{pos + to_allocate_blocks}; pos != e; ++pos) + { + ::std::construct_at(pos, static_cast(allocator::allocate_aligned(align, blockbytes))); + } + *pos = nullptr; + controller.controller_block.controller_after_reserved_ptr = pos; + } + } + + if (controller.back_block.controller_ptr == controller.front_block.controller_ptr && controller.front_block.curr_ptr == controller.front_end_ptr) + { + auto front_block_controller_ptr{controller.front_block.controller_ptr + 1}; + controller.front_block.controller_ptr = front_block_controller_ptr; + auto front_begin_ptr = static_cast(*front_block_controller_ptr); + controller.front_block.curr_ptr = controller.front_block.begin_ptr = front_begin_ptr; + controller.front_end_ptr = front_begin_ptr + blockbytes; + } + + controller.back_block.controller_ptr += nb; + auto begin_ptr = + static_cast(*controller.back_block.controller_ptr); + + controller.back_block.begin_ptr = begin_ptr; + controller.back_block.curr_ptr = begin_ptr; + controller.back_end_ptr = begin_ptr + blockbytes; } +#if 0 template inline constexpr void deque_reserve_back_spaces(dequecontroltype &controller, ::std::size_t n) { - } #endif +#endif } // namespace details diff --git a/include/fast_io_legacy_impl/c/wincrt.h b/include/fast_io_legacy_impl/c/wincrt.h index d35ff022c..7bad68fd3 100644 --- a/include/fast_io_legacy_impl/c/wincrt.h +++ b/include/fast_io_legacy_impl/c/wincrt.h @@ -120,7 +120,7 @@ CRT heap debugging does not exist on mingw-w64 inline void wincrt_fp_allocate_buffer_impl(FILE *__restrict fpp) noexcept { - crt_iobuf *fp{reinterpret_cast(fpp)}; + ::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)}; if (fp->_bufsiz < 4) { fp->_bufsiz = wincrt_internal_buffer_size; @@ -145,7 +145,7 @@ inline void wincrt_fp_write_cold_malloc_case_impl(FILE *__restrict fpp, char con return; } - crt_iobuf *fp{reinterpret_cast(fpp)}; + ::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)}; ::std::size_t allocated_buffer_size{wincrt_internal_buffer_size}; @@ -173,7 +173,7 @@ inline void wincrt_fp_write_cold_malloc_case_impl(FILE *__restrict fpp, char con inline void wincrt_fp_write_cold_normal_case_impl(FILE *__restrict fpp, char const *__restrict first, ::std::size_t diff) { - crt_iobuf *fp{reinterpret_cast(fpp)}; + ::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)}; fp->_flag |= crt_dirty_value; if (::std::size_t const remain{static_cast<::std::size_t>(static_cast<::std::uint_least32_t>(fp->_cnt))}; diff < remain) @@ -210,7 +210,7 @@ inline void wincrt_fp_write_cold_normal_case_impl(FILE *__restrict fpp, char con inline void wincrt_fp_write_cold_impl(FILE *__restrict fp, char const *first, char const *last) { ::std::size_t diff{static_cast<::std::size_t>(last - first)}; - crt_iobuf *fpp{reinterpret_cast(fp)}; + ::fast_io::details::crt_iobuf *fpp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fp)}; if (fpp->_base == nullptr) { if (auto const fd{fpp->_file}; fd == ::fast_io::posix_stderr_number) @@ -237,7 +237,7 @@ template <::std::integral char_type> #endif inline void wincrt_fp_overflow_impl(FILE *__restrict fpp, char_type ch) { - crt_iobuf *fp{reinterpret_cast(fpp)}; + ::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)}; if (fp->_base == nullptr) { wincrt_fp_allocate_buffer_impl(fpp); @@ -259,7 +259,7 @@ inline void wincrt_fp_overflow_impl(FILE *__restrict fpp, char_type ch) #endif inline void wincrt_fp_flush_stdout_impl() { - crt_iobuf *fp{reinterpret_cast(::fast_io::win32::wincrt_acrt_iob_func(1))}; + ::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(::fast_io::win32::wincrt_acrt_iob_func(1))}; if (fp->_ptr == fp->_base) [[unlikely]] { return; @@ -278,7 +278,7 @@ inline char *wincrt_fp_read_cold_impl(FILE *__restrict fpp, char *first, ::std:: { wincrt_fp_flush_stdout_impl(); } - crt_iobuf *fp{reinterpret_cast(fpp)}; + ::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)}; ::std::size_t cnt{static_cast<::std::size_t>(static_cast<::std::uint_least32_t>(fp->_cnt))}; non_overlapped_copy_n(fp->_ptr, cnt, first); first += cnt; @@ -332,7 +332,7 @@ inline bool wincrt_fp_underflow_impl(FILE *__restrict fpp) { wincrt_fp_flush_stdout_impl(); } - crt_iobuf *fp{reinterpret_cast(fpp)}; + ::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)}; if (fp->_base == nullptr) { wincrt_fp_allocate_buffer_impl(fpp); @@ -362,7 +362,7 @@ template inline T *wincrt_get_buffer_ptr_impl(FILE *__restrict fpp) noexcept { static_assert(num < 4); - crt_iobuf *fp{reinterpret_cast(fpp)}; + ::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)}; if constexpr (num == 0) { return reinterpret_cast(fp->_base); @@ -383,7 +383,7 @@ inline void wincrt_set_buffer_curr_ptr_impl(FILE *__restrict fpp, #endif void *ptr) noexcept { - crt_iobuf *fp{reinterpret_cast(fpp)}; + ::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)}; fp->_cnt -= static_cast<::std::int_least32_t>( static_cast<::std::uint_least32_t>(static_cast<::std::size_t>(reinterpret_cast(ptr) - fp->_ptr))); fp->_ptr = reinterpret_cast(ptr); @@ -395,12 +395,12 @@ WINE has not correctly implemented this yet. I am submitting patches. inline void ucrt_lock_file(FILE *__restrict fp) noexcept { char *fp2{reinterpret_cast(fp)}; - ::fast_io::win32::EnterCriticalSection(fp2 + sizeof(crt_iobuf)); + ::fast_io::win32::EnterCriticalSection(fp2 + sizeof(::fast_io::details::crt_iobuf)); } inline void ucrt_unlock_file(FILE *__restrict fp) noexcept { char *fp2{reinterpret_cast(fp)}; - ::fast_io::win32::LeaveCriticalSection(fp2 + sizeof(crt_iobuf)); + ::fast_io::win32::LeaveCriticalSection(fp2 + sizeof(::fast_io::details::crt_iobuf)); } #endif } // namespace details @@ -487,7 +487,7 @@ template <::std::integral char_type> inline ::std::byte *read_some_bytes_underflow_define(::fast_io::basic_c_io_observer_unlocked ciob, ::std::byte *first, ::std::byte *last) { - return reinterpret_cast<::std::byte *>(::fast_io::details::wincrt_fp_read_cold_impl(ciob.fp, + return reinterpret_cast<::std::byte *>(::fast_io::details::wincrt_fp_read_cold_impl(ciob.fp, reinterpret_cast(first), reinterpret_cast(last))); }