From 1a1abaf95368419fe6f051f172c8b0c4aa663720 Mon Sep 17 00:00:00 2001 From: Min-Gu Yoo Date: Wed, 18 Feb 2026 12:28:10 -0800 Subject: [PATCH 1/5] refactor(core): rename _mark_untracked! to _record_type_touch! and add named constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tier 1 renames to match evolved architecture after Phase 3/5 optimizations: - _mark_untracked! → _record_type_touch! (records per-type bitmask, not a boolean) - _untracked_fixed_masks → _touched_type_masks (struct field) - _untracked_has_others → _touched_has_others (struct field) Add named constants replacing magic hex literals: - _LAZY_MODE_BIT (0x8000), _TYPED_LAZY_BIT (0x4000) - _MODE_BITS_MASK (0xC000), _TYPE_BITS_MASK (0x00FF) --- src/acquire.jl | 59 +++++++++++++++++++++++----------------------- src/convenience.jl | 56 +++++++++++++++++++++---------------------- src/types.jl | 25 ++++++++++++++------ 3 files changed, 76 insertions(+), 64 deletions(-) diff --git a/src/acquire.jl b/src/acquire.jl index 716517d..b0326f1 100644 --- a/src/acquire.jl +++ b/src/acquire.jl @@ -160,53 +160,54 @@ Get an N-dimensional view via `reshape` (zero creation cost). end # ============================================================================== -# Untracked Acquire Detection +# Type Touch Recording (for selective rewind) # ============================================================================== """ - _mark_untracked!(pool::AbstractArrayPool, ::Type{T}) + _record_type_touch!(pool::AbstractArrayPool, ::Type{T}) -Mark that an untracked acquire of type `T` has occurred at the current checkpoint depth. -Called by `acquire!` wrapper; macro-transformed calls use `_acquire_impl!` directly. +Record that type `T` was touched (acquired) at the current checkpoint depth. +Called by `acquire!` and convenience wrappers; macro-transformed calls use +`_acquire_impl!` directly (bypassing this for zero overhead). -For fixed-slot types, sets the corresponding bit in `_untracked_fixed_masks`. -For non-fixed-slot types, sets `_untracked_has_others` flag. +For fixed-slot types, sets the corresponding bit in `_touched_type_masks`. +For non-fixed-slot types, sets `_touched_has_others` flag. """ -@inline function _mark_untracked!(pool::AbstractArrayPool, ::Type{T}) where {T} +@inline function _record_type_touch!(pool::AbstractArrayPool, ::Type{T}) where {T} depth = pool._current_depth b = _fixed_slot_bit(T) if b == UInt16(0) - @inbounds pool._untracked_has_others[depth] = true + @inbounds pool._touched_has_others[depth] = true else - @inbounds pool._untracked_fixed_masks[depth] |= b + @inbounds pool._touched_type_masks[depth] |= b end nothing end -# CPU-specific override: adds lazy first-touch checkpoint in dynamic-selective mode +# CPU-specific override: adds lazy first-touch checkpoint in lazy mode # and typed-lazy mode. -# Bit 15 of _untracked_fixed_masks[depth] == 1 ↔ depth entered via _depth_only_checkpoint! -# Bit 14 of _untracked_fixed_masks[depth] == 1 ↔ depth entered via _typed_checkpoint_with_lazy! +# _LAZY_MODE_BIT (bit 15) in _touched_type_masks[depth] ↔ depth entered via _lazy_checkpoint! +# _TYPED_LAZY_BIT (bit 14) in _touched_type_masks[depth] ↔ depth entered via _typed_lazy_checkpoint! # On the first acquire of each fixed-slot type T at that depth, we retroactively save # n_active BEFORE the acquire (current value is still the parent's count), so that # the subsequent rewind can restore the parent's state correctly. -@inline function _mark_untracked!(pool::AdaptiveArrayPool, ::Type{T}) where {T} +@inline function _record_type_touch!(pool::AdaptiveArrayPool, ::Type{T}) where {T} depth = pool._current_depth b = _fixed_slot_bit(T) if b == UInt16(0) - @inbounds pool._untracked_has_others[depth] = true + @inbounds pool._touched_has_others[depth] = true else - current_mask = @inbounds pool._untracked_fixed_masks[depth] - # Lazy checkpoint: dynamic mode (bit 15) OR typed lazy mode (bit 14), AND first touch. + current_mask = @inbounds pool._touched_type_masks[depth] + # Lazy checkpoint: lazy mode (bit 15) OR typed lazy mode (bit 14), AND first touch. # Guard: skip if already checkpointed at this depth (prevents double-push when a - # tracked type is also acquired by a helper via acquire! → _mark_untracked!). - if (current_mask & 0xC000) != 0 && (current_mask & b) == 0 + # tracked type is also acquired by a helper via acquire! → _record_type_touch!). + if (current_mask & _MODE_BITS_MASK) != 0 && (current_mask & b) == 0 tp = get_typed_pool!(pool, T) if @inbounds(tp._checkpoint_depths[end]) != depth _checkpoint_typed_pool!(tp, depth) end end - @inbounds pool._untracked_fixed_masks[depth] = current_mask | b + @inbounds pool._touched_type_masks[depth] = current_mask | b end nothing end @@ -220,7 +221,7 @@ end _acquire_impl!(pool, Type{T}, dims...) -> ReshapedArray{T,N,...} Internal implementation of acquire!. Called directly by macro-transformed code -(no untracked marking). User code calls `acquire!` which adds marking. +(no type touch recording). User code calls `acquire!` which adds recording. """ @inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T} tp = get_typed_pool!(pool, T) @@ -263,7 +264,7 @@ end @inline _unsafe_acquire_impl!(pool::AbstractArrayPool, x::AbstractArray) = _unsafe_acquire_impl!(pool, eltype(x), size(x)) # ============================================================================== -# Acquisition API (User-facing with untracked marking) +# Acquisition API (User-facing with type touch recording) # ============================================================================== """ @@ -299,19 +300,19 @@ end See also: [`unsafe_acquire!`](@ref) for native array access. """ @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _acquire_impl!(pool, T, n) end # Multi-dimensional support (zero-allocation with N-D cache) @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _acquire_impl!(pool, T, dims...) end # Tuple support: allows acquire!(pool, T, size(A)) where size(A) returns NTuple{N,Int} @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _acquire_impl!(pool, T, dims...) end @@ -331,7 +332,7 @@ end ``` """ @inline function acquire!(pool::AbstractArrayPool, x::AbstractArray) - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _acquire_impl!(pool, eltype(x), size(x)) end @@ -386,18 +387,18 @@ end See also: [`acquire!`](@ref) for view-based access. """ @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_acquire_impl!(pool, T, n) end @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_acquire_impl!(pool, T, dims...) end # Tuple support @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_acquire_impl!(pool, T, dims) end @@ -417,7 +418,7 @@ end ``` """ @inline function unsafe_acquire!(pool::AbstractArrayPool, x::AbstractArray) - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _unsafe_acquire_impl!(pool, eltype(x), size(x)) end diff --git a/src/convenience.jl b/src/convenience.jl index 62d919f..053f8cb 100644 --- a/src/convenience.jl +++ b/src/convenience.jl @@ -43,22 +43,22 @@ end See also: [`ones!`](@ref), [`similar!`](@ref), [`acquire!`](@ref) """ @inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _zeros_impl!(pool, T, dims...) end @inline function zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _zeros_impl!(pool, default_eltype(pool), dims...) end @inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _zeros_impl!(pool, T, dims...) end @inline function zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _zeros_impl!(pool, default_eltype(pool), dims...) end @@ -116,22 +116,22 @@ end See also: [`zeros!`](@ref), [`similar!`](@ref), [`acquire!`](@ref) """ @inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _ones_impl!(pool, T, dims...) end @inline function ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _ones_impl!(pool, default_eltype(pool), dims...) end @inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _ones_impl!(pool, T, dims...) end @inline function ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _ones_impl!(pool, default_eltype(pool), dims...) end @@ -186,11 +186,11 @@ end See also: [`falses!`](@ref), [`ones!`](@ref), [`acquire!`](@ref) """ @inline function trues!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, Bit) + _record_type_touch!(pool, Bit) _trues_impl!(pool, dims...) end @inline function trues!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, Bit) + _record_type_touch!(pool, Bit) _trues_impl!(pool, dims...) end @@ -226,11 +226,11 @@ end See also: [`trues!`](@ref), [`zeros!`](@ref), [`acquire!`](@ref) """ @inline function falses!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, Bit) + _record_type_touch!(pool, Bit) _falses_impl!(pool, dims...) end @inline function falses!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, Bit) + _record_type_touch!(pool, Bit) _falses_impl!(pool, dims...) end @@ -273,22 +273,22 @@ end See also: [`zeros!`](@ref), [`ones!`](@ref), [`acquire!`](@ref) """ @inline function similar!(pool::AbstractArrayPool, x::AbstractArray) - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _similar_impl!(pool, x) end @inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _similar_impl!(pool, x, T) end @inline function similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _similar_impl!(pool, x, dims...) end @inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _similar_impl!(pool, x, T, dims...) end @@ -336,22 +336,22 @@ end See also: [`unsafe_ones!`](@ref), [`zeros!`](@ref), [`unsafe_acquire!`](@ref) """ @inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_zeros_impl!(pool, T, dims...) end @inline function unsafe_zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) end @inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_zeros_impl!(pool, T, dims...) end @inline function unsafe_zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) end @@ -403,22 +403,22 @@ end See also: [`unsafe_zeros!`](@ref), [`ones!`](@ref), [`unsafe_acquire!`](@ref) """ @inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_ones_impl!(pool, T, dims...) end @inline function unsafe_ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _unsafe_ones_impl!(pool, default_eltype(pool), dims...) end @inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_ones_impl!(pool, T, dims...) end @inline function unsafe_ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _unsafe_ones_impl!(pool, default_eltype(pool), dims...) end @@ -473,22 +473,22 @@ end See also: [`similar!`](@ref), [`unsafe_acquire!`](@ref) """ @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray) - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _unsafe_similar_impl!(pool, x) end @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_similar_impl!(pool, x, T) end @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _unsafe_similar_impl!(pool, x, dims...) end @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_similar_impl!(pool, x, T, dims...) end diff --git a/src/types.jl b/src/types.jl index 72b7c1e..847c1d9 100644 --- a/src/types.jl +++ b/src/types.jl @@ -367,7 +367,18 @@ Tests verify synchronization automatically. const FIXED_SLOT_FIELDS = (:float64, :float32, :int64, :int32, :complexf64, :complexf32, :bool, :bits) # ============================================================================== -# Fixed-Slot Bit Mapping (for typed untracked tracking) +# Bitmask Mode Constants +# ============================================================================== +# Bits 0-7: fixed-slot type touch tracking (one bit per type) +# Bits 14-15: mode flags set during checkpoint to control lazy behavior + +const _LAZY_MODE_BIT = UInt16(0x8000) # bit 15: lazy (dynamic-selective) checkpoint mode +const _TYPED_LAZY_BIT = UInt16(0x4000) # bit 14: typed lazy-fallback mode +const _MODE_BITS_MASK = UInt16(0xC000) # bits 14-15: all mode flags +const _TYPE_BITS_MASK = UInt16(0x00FF) # bits 0-7: fixed-slot type bits + +# ============================================================================== +# Fixed-Slot Bit Mapping (for type touch tracking) # ============================================================================== # Maps each fixed-slot type to a unique bit in a UInt16 bitmask. # Bit ordering matches FIXED_SLOT_FIELDS. Non-fixed types return UInt16(0). @@ -382,7 +393,7 @@ const FIXED_SLOT_FIELDS = (:float64, :float32, :int64, :int32, :complexf64, :com @inline _fixed_slot_bit(::Type{Bit}) = UInt16(1) << 7 @inline _fixed_slot_bit(::Type) = UInt16(0) # non-fixed-slot → triggers has_others -# Check whether a type's bit is set in a bitmask (e.g. _untracked_fixed_masks or combined). +# Check whether a type's bit is set in a bitmask (e.g. _touched_type_masks or combined). @inline _has_bit(mask::UInt16, ::Type{T}) where {T} = (mask & _fixed_slot_bit(T)) != 0 # ============================================================================== @@ -409,10 +420,10 @@ mutable struct AdaptiveArrayPool <: AbstractArrayPool # Fallback: rare types others::IdDict{DataType, Any} - # Untracked acquire detection (1-based sentinel pattern) + # Type touch tracking (1-based sentinel pattern) _current_depth::Int # Current scope depth (1 = global scope) - _untracked_fixed_masks::Vector{UInt16} # Per-depth: which fixed slots had untracked acquires - _untracked_has_others::Vector{Bool} # Per-depth: any non-fixed-slot untracked acquire? + _touched_type_masks::Vector{UInt16} # Per-depth: which fixed slots were touched + mode flags + _touched_has_others::Vector{Bool} # Per-depth: any non-fixed-slot type touched? end function AdaptiveArrayPool() @@ -427,8 +438,8 @@ function AdaptiveArrayPool() BitTypedPool(), IdDict{DataType, Any}(), 1, # _current_depth: 1 = global scope (sentinel) - [UInt16(0)], # _untracked_fixed_masks: sentinel (no bits set) - [false] # _untracked_has_others: sentinel (no others) + [UInt16(0)], # _touched_type_masks: sentinel (no bits set) + [false] # _touched_has_others: sentinel (no others) ) end From 0229de1596dde1ae6d8e319b37a61ebe8a17a841 Mon Sep 17 00:00:00 2001 From: Min-Gu Yoo Date: Wed, 18 Feb 2026 12:28:15 -0800 Subject: [PATCH 2/5] refactor(state): rename mode functions to lazy checkpoint/rewind naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tier 2 renames for checkpoint/rewind mode functions: - _depth_only_checkpoint! → _lazy_checkpoint! - _dynamic_selective_rewind! → _lazy_rewind! - _typed_checkpoint_with_lazy! → _typed_lazy_checkpoint! - _typed_selective_rewind! → _typed_lazy_rewind! - _generate_dynamic_selective_*_call → _generate_lazy_*_call Replace magic hex (0x8000, 0x4000, 0xC000, 0x00FF) with named constants. --- src/macros.jl | 70 ++++++++++++------------- src/state.jl | 140 +++++++++++++++++++++++++------------------------- 2 files changed, 105 insertions(+), 105 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index 28fba7a..21a7599 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -337,20 +337,20 @@ function _generate_pool_code(pool_name, expr, force_enable; source::Union{LineNu # Use typed checkpoint/rewind if all types are static, otherwise fallback to full use_typed = !has_dynamic && !isempty(static_types) - # For typed path: transform acquire! → _acquire_impl! (bypasses untracked marking) - # For dynamic path: keep acquire! untransformed so _mark_untracked! is called + # For typed path: transform acquire! → _acquire_impl! (bypasses type touch recording) + # For dynamic path: keep acquire! untransformed so _record_type_touch! is called transformed_expr = use_typed ? _transform_acquire_calls(expr, pool_name) : expr if use_typed checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types) else - checkpoint_call = _generate_dynamic_selective_checkpoint_call(esc(pool_name)) + checkpoint_call = _generate_lazy_checkpoint_call(esc(pool_name)) end if use_typed rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types) else - rewind_call = _generate_dynamic_selective_rewind_call(esc(pool_name)) + rewind_call = _generate_lazy_rewind_call(esc(pool_name)) end if force_enable @@ -429,8 +429,8 @@ function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, forc local_vars = _extract_local_assignments(expr) static_types, has_dynamic = _filter_static_types(all_types, local_vars) use_typed = !has_dynamic && !isempty(static_types) - # For typed path: transform acquire! → _acquire_impl! (bypasses untracked marking) - # For dynamic path: keep acquire! untransformed so _mark_untracked! is called + # For typed path: transform acquire! → _acquire_impl! (bypasses type touch recording) + # For dynamic path: keep acquire! untransformed so _record_type_touch! is called transformed_expr = use_typed ? _transform_acquire_calls(expr, pool_name) : expr pool_getter = :($_get_pool_for_backend($(Val{backend}()))) @@ -438,8 +438,8 @@ function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, forc checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types) rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types) else - checkpoint_call = _generate_dynamic_selective_checkpoint_call(esc(pool_name)) - rewind_call = _generate_dynamic_selective_rewind_call(esc(pool_name)) + checkpoint_call = _generate_lazy_checkpoint_call(esc(pool_name)) + rewind_call = _generate_lazy_rewind_call(esc(pool_name)) end return quote @@ -475,8 +475,8 @@ function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, forc # Use typed checkpoint/rewind if all types are static, otherwise fallback to full use_typed = !has_dynamic && !isempty(static_types) - # For typed path: transform acquire! → _acquire_impl! (bypasses untracked marking) - # For dynamic path: keep acquire! untransformed so _mark_untracked! is called + # For typed path: transform acquire! → _acquire_impl! (bypasses type touch recording) + # For dynamic path: keep acquire! untransformed so _record_type_touch! is called transformed_expr = use_typed ? _transform_acquire_calls(expr, pool_name) : expr # Use Val{backend}() for compile-time dispatch - fully inlinable @@ -485,13 +485,13 @@ function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, forc if use_typed checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types) else - checkpoint_call = _generate_dynamic_selective_checkpoint_call(esc(pool_name)) + checkpoint_call = _generate_lazy_checkpoint_call(esc(pool_name)) end if use_typed rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types) else - rewind_call = _generate_dynamic_selective_rewind_call(esc(pool_name)) + rewind_call = _generate_lazy_rewind_call(esc(pool_name)) end return quote @@ -537,8 +537,8 @@ function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, f static_types, has_dynamic = _filter_static_types(all_types, local_vars) use_typed = !has_dynamic && !isempty(static_types) - # For typed path: transform acquire! → _acquire_impl! (bypasses untracked marking) - # For dynamic path: keep acquire! untransformed so _mark_untracked! is called + # For typed path: transform acquire! → _acquire_impl! (bypasses type touch recording) + # For dynamic path: keep acquire! untransformed so _record_type_touch! is called transformed_body = use_typed ? _transform_acquire_calls(body, pool_name) : body # Use Val{backend}() for compile-time dispatch @@ -547,13 +547,13 @@ function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, f if use_typed checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types) else - checkpoint_call = _generate_dynamic_selective_checkpoint_call(esc(pool_name)) + checkpoint_call = _generate_lazy_checkpoint_call(esc(pool_name)) end if use_typed rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types) else - rewind_call = _generate_dynamic_selective_rewind_call(esc(pool_name)) + rewind_call = _generate_lazy_rewind_call(esc(pool_name)) end new_body = quote @@ -594,20 +594,20 @@ function _generate_function_pool_code(pool_name, func_def, force_enable, disable static_types, has_dynamic = _filter_static_types(all_types, local_vars) use_typed = !has_dynamic && !isempty(static_types) - # For typed path: transform acquire! → _acquire_impl! (bypasses untracked marking) - # For dynamic path: keep acquire! untransformed so _mark_untracked! is called + # For typed path: transform acquire! → _acquire_impl! (bypasses type touch recording) + # For dynamic path: keep acquire! untransformed so _record_type_touch! is called transformed_body = use_typed ? _transform_acquire_calls(body, pool_name) : body if use_typed checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types) else - checkpoint_call = _generate_dynamic_selective_checkpoint_call(esc(pool_name)) + checkpoint_call = _generate_lazy_checkpoint_call(esc(pool_name)) end if use_typed rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types) else - rewind_call = _generate_dynamic_selective_rewind_call(esc(pool_name)) + rewind_call = _generate_lazy_rewind_call(esc(pool_name)) end if force_enable @@ -910,8 +910,8 @@ end Generate bitmask-aware checkpoint call. When types are known at compile time, emits a conditional: -- if untracked types ⊆ tracked types → typed checkpoint (fast path) -- otherwise → `_typed_checkpoint_with_lazy!` (typed checkpoint + set bit 14 for +- if touched types ⊆ tracked types → typed checkpoint (fast path) +- otherwise → `_typed_lazy_checkpoint!` (typed checkpoint + set bit 14 for lazy first-touch checkpointing of extra types touched by helpers) """ function _generate_typed_checkpoint_call(pool_expr, types) @@ -920,7 +920,7 @@ function _generate_typed_checkpoint_call(pool_expr, types) else escaped_types = [esc(t) for t in types] typed_call = :($checkpoint!($pool_expr, $(escaped_types...))) - lazy_call = :($_typed_checkpoint_with_lazy!($pool_expr, $(escaped_types...))) + lazy_call = :($_typed_lazy_checkpoint!($pool_expr, $(escaped_types...))) return quote if $_can_use_typed_path($pool_expr, $_tracked_mask_for_types($(escaped_types...))) $typed_call @@ -936,8 +936,8 @@ end Generate bitmask-aware rewind call. When types are known at compile time, emits a conditional: -- if untracked types ⊆ tracked types → typed rewind (fast path) -- otherwise → `_typed_selective_rewind!` (rewinds tracked | untracked mask; +- if touched types ⊆ tracked types → typed rewind (fast path) +- otherwise → `_typed_lazy_rewind!` (rewinds tracked | touched mask; all touched types have Case A checkpoints via bit 14 lazy mode) """ function _generate_typed_rewind_call(pool_expr, types) @@ -946,7 +946,7 @@ function _generate_typed_rewind_call(pool_expr, types) else escaped_types = [esc(t) for t in types] typed_call = :($rewind!($pool_expr, $(escaped_types...))) - selective_call = :($_typed_selective_rewind!($pool_expr, + selective_call = :($_typed_lazy_rewind!($pool_expr, $_tracked_mask_for_types($(escaped_types...)))) return quote if $_can_use_typed_path($pool_expr, $_tracked_mask_for_types($(escaped_types...))) @@ -959,25 +959,25 @@ function _generate_typed_rewind_call(pool_expr, types) end """ - _generate_dynamic_selective_checkpoint_call(pool_expr) + _generate_lazy_checkpoint_call(pool_expr) Generate a depth-only checkpoint call for dynamic-selective mode (`use_typed=false`). Much lighter than full `checkpoint!`: only increments depth and pushes bitmask sentinels. """ -function _generate_dynamic_selective_checkpoint_call(pool_expr) - return :($_depth_only_checkpoint!($pool_expr)) +function _generate_lazy_checkpoint_call(pool_expr) + return :($_lazy_checkpoint!($pool_expr)) end """ - _generate_dynamic_selective_rewind_call(pool_expr) + _generate_lazy_rewind_call(pool_expr) Generate selective rewind code for dynamic-selective mode (`use_typed=false`). -Delegates to `_dynamic_selective_rewind!` — a single function call, symmetric -with `_depth_only_checkpoint!` for checkpoint. This avoids `let`-block overhead +Delegates to `_lazy_rewind!` — a single function call, symmetric +with `_lazy_checkpoint!` for checkpoint. This avoids `let`-block overhead in `finally` clauses (which can impair Julia's type inference and cause boxing). """ -function _generate_dynamic_selective_rewind_call(pool_expr) - return :($_dynamic_selective_rewind!($pool_expr)) +function _generate_lazy_rewind_call(pool_expr) + return :($_lazy_rewind!($pool_expr)) end @@ -991,7 +991,7 @@ end Transform acquire!/unsafe_acquire!/convenience function calls to their _impl! counterparts. Only transforms calls where the first argument matches `pool_name`. -This allows macro-transformed code to bypass the untracked marking overhead, +This allows macro-transformed code to bypass the type touch recording overhead, since the macro already knows about these calls at compile time. Transformation rules: diff --git a/src/state.jl b/src/state.jl index 119319c..0eaea1e 100644 --- a/src/state.jl +++ b/src/state.jl @@ -13,10 +13,10 @@ After warmup, this function has **zero allocation**. See also: [`rewind!`](@ref), [`@with_pool`](@ref) """ function checkpoint!(pool::AdaptiveArrayPool) - # Increment depth and initialize untracked bitmask state + # Increment depth and initialize type touch tracking state pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) depth = pool._current_depth # Fixed slots - zero allocation via @generated iteration @@ -38,14 +38,14 @@ end Save state for a specific type only. Used by optimized macros that know which types will be used at compile time. -Also updates _current_depth and bitmask state for untracked acquire detection. +Also updates _current_depth and bitmask state for type touch tracking. ~77% faster than full checkpoint! when only one type is used. """ @inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where T pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) _checkpoint_typed_pool!(get_typed_pool!(pool, T), pool._current_depth) nothing end @@ -69,8 +69,8 @@ compile-time unrolling. Increments _current_depth once for all types. checkpoint_exprs = [:(_checkpoint_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices] quote pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) $(checkpoint_exprs...) nothing end @@ -84,14 +84,14 @@ end end """ - _depth_only_checkpoint!(pool::AdaptiveArrayPool) + _lazy_checkpoint!(pool::AdaptiveArrayPool) -Lightweight checkpoint for dynamic-selective mode (`use_typed=false` macro path). +Lightweight checkpoint for lazy mode (`use_typed=false` macro path). Increments `_current_depth` and pushes bitmask sentinels — but does **not** save -`n_active` for any fixed-slot typed pool. The mode flag (bit 15) in -`_untracked_fixed_masks` marks this depth as dynamic-selective so that -`_mark_untracked!` can trigger lazy first-touch checkpoints. +`n_active` for any fixed-slot typed pool. The `_LAZY_MODE_BIT` (bit 15) in +`_touched_type_masks` marks this depth as lazy mode so that +`_record_type_touch!` can trigger lazy first-touch checkpoints. Existing `others` entries are eagerly checkpointed since there is no per-type tracking for non-fixed-slot pools; Case B in `_rewind_typed_pool!` handles any @@ -99,17 +99,17 @@ new `others` entries created during the scope (n_active starts at 0 = sentinel). Performance: ~2ns vs ~540ns for full `checkpoint!`. """ -@inline function _depth_only_checkpoint!(pool::AdaptiveArrayPool) +@inline function _lazy_checkpoint!(pool::AdaptiveArrayPool) pool._current_depth += 1 - # Bit 15 = dynamic-selective mode flag (bits 0–7 are fixed-slot bits) - push!(pool._untracked_fixed_masks, UInt16(0x8000)) - push!(pool._untracked_has_others, false) + # _LAZY_MODE_BIT = lazy mode flag (bits 0–7 are fixed-slot type bits) + push!(pool._touched_type_masks, _LAZY_MODE_BIT) + push!(pool._touched_has_others, false) depth = pool._current_depth # Eagerly checkpoint any pre-existing others entries. # New others types created during the scope start at n_active=0 (sentinel covers them). for p in values(pool.others) _checkpoint_typed_pool!(p, depth) - @inbounds pool._untracked_has_others[depth] = true + @inbounds pool._touched_has_others[depth] = true end nothing end @@ -125,7 +125,7 @@ Restore the pool state (n_active counters) from internal stacks. Uses _checkpoint_depths to accurately determine which entries to pop vs restore. Only the counters are restored; allocated memory remains for reuse. -Handles untracked acquires by checking _checkpoint_depths for accurate restoration. +Handles touched types by checking _checkpoint_depths for accurate restoration. **Safety**: If called at global scope (depth=1, no pending checkpoints), automatically delegates to `reset!` to safely clear all n_active counters. @@ -152,8 +152,8 @@ function rewind!(pool::AdaptiveArrayPool) _rewind_typed_pool!(tp, cur_depth) end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 return nothing @@ -172,8 +172,8 @@ Also updates _current_depth and bitmask state. return nothing end _rewind_typed_pool!(get_typed_pool!(pool, T), pool._current_depth) - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -203,8 +203,8 @@ Decrements _current_depth once after all types are rewound. return nothing end $(rewind_exprs...) - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -239,89 +239,89 @@ end end """ - _dynamic_selective_rewind!(pool::AdaptiveArrayPool) + _lazy_rewind!(pool::AdaptiveArrayPool) -Complete rewind for dynamic-selective mode (`use_typed=false` macro path). +Complete rewind for lazy mode (`use_typed=false` macro path). Reads the combined mask at the current depth, rewinds only the fixed-slot pools whose bits are set, handles any `others` entries, then pops the depth metadata. Called directly from the macro-generated `finally` clause as a single function call -(matching the structure of `_depth_only_checkpoint!` for symmetry and performance). +(matching the structure of `_lazy_checkpoint!` for symmetry and performance). """ -@inline function _dynamic_selective_rewind!(pool::AdaptiveArrayPool) +@inline function _lazy_rewind!(pool::AdaptiveArrayPool) d = pool._current_depth - bits = @inbounds(pool._untracked_fixed_masks[d]) & UInt16(0x00FF) + bits = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK _selective_rewind_fixed_slots!(pool, bits) - if @inbounds(pool._untracked_has_others[d]) + if @inbounds(pool._touched_has_others[d]) for tp in values(pool.others) _rewind_typed_pool!(tp, d) end end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end """ - _typed_checkpoint_with_lazy!(pool::AdaptiveArrayPool, types::Type...) + _typed_lazy_checkpoint!(pool::AdaptiveArrayPool, types::Type...) Typed checkpoint that enables lazy first-touch checkpointing for extra types touched by helpers (`use_typed=true`, `_can_use_typed_path=false` path). Calls `checkpoint!(pool, types...)` (checkpoints only the statically-known types), -then sets bit 14 (`0x4000`) in `_untracked_fixed_masks[depth]` to signal typed lazy mode. +then sets `_TYPED_LAZY_BIT` (bit 14) in `_touched_type_masks[depth]` to signal typed lazy mode. -`_mark_untracked!` checks `(mask & 0xC000) != 0` (bit 14 OR bit 15) to trigger a +`_record_type_touch!` checks `(mask & _MODE_BITS_MASK) != 0` (bit 14 OR bit 15) to trigger a lazy first-touch checkpoint for each extra type on first acquire, ensuring Case A (not Case B) applies at rewind and parent `n_active` is preserved correctly. """ -@inline function _typed_checkpoint_with_lazy!(pool::AdaptiveArrayPool, types::Type...) +@inline function _typed_lazy_checkpoint!(pool::AdaptiveArrayPool, types::Type...) checkpoint!(pool, types...) d = pool._current_depth - @inbounds pool._untracked_fixed_masks[d] |= UInt16(0x4000) # set bit 14 - # Eagerly snapshot pre-existing others entries — mirrors _depth_only_checkpoint!. - # _mark_untracked! cannot lazy-checkpoint others types (b==0 branch, no per-type bit). + @inbounds pool._touched_type_masks[d] |= _TYPED_LAZY_BIT + # Eagerly snapshot pre-existing others entries — mirrors _lazy_checkpoint!. + # _record_type_touch! cannot lazy-checkpoint others types (b==0 branch, no per-type bit). # Without this, a helper that re-acquires an already-active others type triggers Case B # at rewind and restores the wrong parent n_active value. # - # Also set has_others=true when pool.others is non-empty, so _typed_selective_rewind! + # Also set has_others=true when pool.others is non-empty, so _typed_lazy_rewind! # enters the others loop even for tracked non-fixed-slot types (e.g. CPU Float16) that - # used _acquire_impl! (bypassing _mark_untracked!, leaving has_others=false otherwise). + # used _acquire_impl! (bypassing _record_type_touch!, leaving has_others=false otherwise). # Skip re-snapshot for entries already checkpointed at d by checkpoint!(pool, types...) # (e.g. Float16 in types... was just checkpointed above — avoid double-push). for p in values(pool.others) if @inbounds(p._checkpoint_depths[end]) != d _checkpoint_typed_pool!(p, d) end - @inbounds pool._untracked_has_others[d] = true + @inbounds pool._touched_has_others[d] = true end nothing end """ - _typed_selective_rewind!(pool::AdaptiveArrayPool, tracked_mask::UInt16) + _typed_lazy_rewind!(pool::AdaptiveArrayPool, tracked_mask::UInt16) Selective rewind for typed mode (`use_typed=true`) fallback path. Called when `_can_use_typed_path` returns false (helpers touched types beyond the statically-tracked set). Rewinds only pools whose bits are set in -`tracked_mask | untracked_mask`. All touched types have Case A checkpoints, -guaranteed by the bit 14 lazy mode set in `_typed_checkpoint_with_lazy!`. +`tracked_mask | touched_mask`. All touched types have Case A checkpoints, +guaranteed by the `_TYPED_LAZY_BIT` mode set in `_typed_lazy_checkpoint!`. """ -@inline function _typed_selective_rewind!(pool::AdaptiveArrayPool, tracked_mask::UInt16) +@inline function _typed_lazy_rewind!(pool::AdaptiveArrayPool, tracked_mask::UInt16) d = pool._current_depth - untracked = @inbounds(pool._untracked_fixed_masks[d]) & UInt16(0x00FF) + untracked = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK combined = tracked_mask | untracked _selective_rewind_fixed_slots!(pool, combined) - if @inbounds(pool._untracked_has_others[d]) + if @inbounds(pool._touched_has_others[d]) for tp in values(pool.others) _rewind_typed_pool!(tp, d) end end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -333,7 +333,7 @@ Rewind only the fixed-slot typed pools whose bits are set in `mask`. Each of the 8 fixed-slot pools maps to bits 0–7 (same encoding as `_fixed_slot_bit`). Bits 8–15 (mode flags) are **not** checked here — callers must strip them -before passing the mask (e.g. `mask & UInt16(0x00FF)`). +before passing the mask (e.g. `mask & _TYPE_BITS_MASK`). Unset bits are skipped entirely: for pools that were acquired without a matching checkpoint, `_rewind_typed_pool!` Case B safely restores from the parent checkpoint. @@ -432,12 +432,12 @@ function Base.empty!(pool::AdaptiveArrayPool) end empty!(pool.others) - # Reset untracked detection state (1-based sentinel pattern) + # Reset type touch tracking state (1-based sentinel pattern) pool._current_depth = 1 # 1 = global scope (sentinel) - empty!(pool._untracked_fixed_masks) - push!(pool._untracked_fixed_masks, UInt16(0)) # Sentinel: no bits set - empty!(pool._untracked_has_others) - push!(pool._untracked_has_others, false) # Sentinel: no others + empty!(pool._touched_type_masks) + push!(pool._touched_type_masks, UInt16(0)) # Sentinel: no bits set + empty!(pool._touched_has_others) + push!(pool._touched_has_others, false) # Sentinel: no others return pool end @@ -470,7 +470,7 @@ Reset pool state without clearing allocated storage. This function: - Resets all `n_active` counters to 0 - Restores all checkpoint stacks to sentinel state -- Resets `_current_depth` and untracked bitmask state +- Resets `_current_depth` and type touch tracking state Unlike `empty!`, this **preserves** all allocated vectors, views, and N-D arrays for reuse, avoiding reallocation costs. @@ -513,12 +513,12 @@ function reset!(pool::AdaptiveArrayPool) reset!(tp) end - # Reset untracked detection state (1-based sentinel pattern) + # Reset type touch tracking state (1-based sentinel pattern) pool._current_depth = 1 # 1 = global scope (sentinel) - empty!(pool._untracked_fixed_masks) - push!(pool._untracked_fixed_masks, UInt16(0)) # Sentinel: no bits set - empty!(pool._untracked_has_others) - push!(pool._untracked_has_others, false) # Sentinel: no others + empty!(pool._touched_type_masks) + push!(pool._touched_type_masks, UInt16(0)) # Sentinel: no bits set + empty!(pool._touched_has_others) + push!(pool._touched_has_others, false) # Sentinel: no others return pool end @@ -579,17 +579,17 @@ end Check if the typed (fast) checkpoint/rewind path is safe to use. -Returns `true` when all untracked acquires at the current depth are a subset -of the tracked types (bitmask subset check) AND no non-fixed-slot types were used. +Returns `true` when all touched types at the current depth are a subset +of the tracked types (bitmask subset check) AND no non-fixed-slot types were touched. -The subset check: `(untracked_mask & ~tracked_mask) == 0` means every bit set -in `untracked_mask` is also set in `tracked_mask`. +The subset check: `(touched_mask & ~tracked_mask) == 0` means every bit set +in `touched_mask` is also set in `tracked_mask`. """ @inline function _can_use_typed_path(pool::AbstractArrayPool, tracked_mask::UInt16) depth = pool._current_depth - untracked_mask = @inbounds pool._untracked_fixed_masks[depth] - has_others = @inbounds pool._untracked_has_others[depth] - return (untracked_mask & ~tracked_mask) == UInt16(0) && !has_others + touched_mask = @inbounds pool._touched_type_masks[depth] + has_others = @inbounds pool._touched_has_others[depth] + return (touched_mask & ~tracked_mask) == UInt16(0) && !has_others end # ============================================================================== From 4cfedbdf46e7f464659a00b95e324de752d9f6a3 Mon Sep 17 00:00:00 2001 From: Min-Gu Yoo Date: Wed, 18 Feb 2026 12:28:20 -0800 Subject: [PATCH 3/5] refactor(cuda): mirror all naming changes in CUDA extension Apply Tier 1 + Tier 2 renames to CUDA extension: - Struct fields, function overrides, imports, and magic numbers - Float16 special handling preserved (bit 7 reassignment) --- ext/AdaptiveArrayPoolsCUDAExt/acquire.jl | 21 ++--- ext/AdaptiveArrayPoolsCUDAExt/state.jl | 101 ++++++++++++----------- ext/AdaptiveArrayPoolsCUDAExt/types.jl | 8 +- 3 files changed, 66 insertions(+), 64 deletions(-) diff --git a/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl b/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl index 23cbb36..437d516 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl @@ -28,7 +28,8 @@ # ============================================================================== using AdaptiveArrayPools: get_view!, get_nd_view!, get_nd_array!, allocate_vector, safe_prod, - _mark_untracked!, _fixed_slot_bit, _checkpoint_typed_pool! + _record_type_touch!, _fixed_slot_bit, _checkpoint_typed_pool!, + _MODE_BITS_MASK """ get_view!(tp::CuTypedPool{T}, n::Int) -> CuVector{T} @@ -165,44 +166,44 @@ Used by `unsafe_acquire!` - same zero-allocation behavior as `acquire!`. end # ============================================================================== -# CUDA _mark_untracked! override (Issue #2 / #2a fix) +# CUDA _record_type_touch! override (Issue #2 / #2a fix) # ============================================================================== # Float16 on CUDA: direct struct field with _fixed_slot_bit(Float16)=0. # We track Float16 via bit 7 (CUDA reassignment; CPU uses bit 7 for Bit type, absent on GPU). # This gives Float16 lazy first-touch checkpointing in bit-14 (typed lazy) and bit-15 (dynamic) # modes, ensuring Case A (not Case B) fires at rewind and parent n_active is preserved. -@inline function AdaptiveArrayPools._mark_untracked!(pool::CuAdaptiveArrayPool, ::Type{T}) where {T} +@inline function AdaptiveArrayPools._record_type_touch!(pool::CuAdaptiveArrayPool, ::Type{T}) where {T} depth = pool._current_depth b = _fixed_slot_bit(T) if b == UInt16(0) if T === Float16 # Float16: CUDA direct field tracked via bit 7 (not in pool.others dict). b16 = UInt16(1) << 7 - current_mask = @inbounds pool._untracked_fixed_masks[depth] + current_mask = @inbounds pool._touched_type_masks[depth] # Lazy first-touch checkpoint: bit 14 (typed lazy) OR bit 15 (dynamic), first touch only. # Guard: skip if already checkpointed at this depth (prevents double-push). - if (current_mask & 0xC000) != 0 && (current_mask & b16) == 0 + if (current_mask & _MODE_BITS_MASK) != 0 && (current_mask & b16) == 0 if @inbounds(pool.float16._checkpoint_depths[end]) != depth _checkpoint_typed_pool!(pool.float16, depth) end end - @inbounds pool._untracked_fixed_masks[depth] = current_mask | b16 + @inbounds pool._touched_type_masks[depth] = current_mask | b16 else # Genuine others type (UInt8, Int8, etc.) — eagerly snapshotted at scope entry. - @inbounds pool._untracked_has_others[depth] = true + @inbounds pool._touched_has_others[depth] = true end else - current_mask = @inbounds pool._untracked_fixed_masks[depth] + current_mask = @inbounds pool._touched_type_masks[depth] # Lazy first-touch checkpoint for fixed-slot types in bit 14/15 modes. # Guard: skip if already checkpointed at this depth (prevents double-push). - if (current_mask & 0xC000) != 0 && (current_mask & b) == 0 + if (current_mask & _MODE_BITS_MASK) != 0 && (current_mask & b) == 0 tp = AdaptiveArrayPools.get_typed_pool!(pool, T) if @inbounds(tp._checkpoint_depths[end]) != depth _checkpoint_typed_pool!(tp, depth) end end - @inbounds pool._untracked_fixed_masks[depth] = current_mask | b + @inbounds pool._touched_type_masks[depth] = current_mask | b end nothing end diff --git a/ext/AdaptiveArrayPoolsCUDAExt/state.jl b/ext/AdaptiveArrayPoolsCUDAExt/state.jl index 23d4ba6..e800c9f 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/state.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/state.jl @@ -6,7 +6,8 @@ # AbstractTypedPool, so they work for CuTypedPool automatically. using AdaptiveArrayPools: checkpoint!, rewind!, reset!, - _checkpoint_typed_pool!, _rewind_typed_pool!, _has_bit + _checkpoint_typed_pool!, _rewind_typed_pool!, _has_bit, + _LAZY_MODE_BIT, _TYPED_LAZY_BIT, _TYPE_BITS_MASK # ============================================================================== # GPU Fixed Slot Iteration @@ -33,8 +34,8 @@ end function AdaptiveArrayPools.checkpoint!(pool::CuAdaptiveArrayPool) # Increment depth and initialize untracked bitmask state pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) depth = pool._current_depth # Fixed slots - zero allocation via @generated iteration @@ -53,8 +54,8 @@ end # Type-specific checkpoint (single type) @inline function AdaptiveArrayPools.checkpoint!(pool::CuAdaptiveArrayPool, ::Type{T}) where {T} pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) _checkpoint_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, T), pool._current_depth) nothing end @@ -72,8 +73,8 @@ end checkpoint_exprs = [:(_checkpoint_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices] quote pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) $(checkpoint_exprs...) nothing end @@ -102,8 +103,8 @@ function AdaptiveArrayPools.rewind!(pool::CuAdaptiveArrayPool) _rewind_typed_pool!(tp, cur_depth) end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 return nothing @@ -116,8 +117,8 @@ end return nothing end _rewind_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, T), pool._current_depth) - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -140,17 +141,17 @@ end return nothing end $(rewind_exprs...) - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end end # ============================================================================== -# Dynamic-Selective Mode for CuAdaptiveArrayPool (use_typed=false path) +# Lazy Mode for CuAdaptiveArrayPool (use_typed=false path) # ============================================================================== -# Mirrors CPU _depth_only_checkpoint! / _dynamic_selective_rewind! in src/state.jl. +# Mirrors CPU _lazy_checkpoint! / _lazy_rewind! in src/state.jl. # # Float16 on CUDA: direct struct field (not in pool.others dict), but _fixed_slot_bit(Float16)=0. # We reassign Float16 to bit 7 (unused on CUDA; CPU uses bit 7 for Bit type which has no GPU equivalent). @@ -160,25 +161,25 @@ end # Bit 7 on CUDA is reserved for Float16 (CPU uses it for Bit; Bit type does not exist on GPU). @inline _cuda_float16_bit() = UInt16(1) << 7 -@inline function AdaptiveArrayPools._depth_only_checkpoint!(pool::CuAdaptiveArrayPool) +@inline function AdaptiveArrayPools._lazy_checkpoint!(pool::CuAdaptiveArrayPool) pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0x8000)) # bit 15: dynamic-selective mode - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, _LAZY_MODE_BIT) # lazy mode flag + push!(pool._touched_has_others, false) depth = pool._current_depth - # Eagerly checkpoint pre-existing others entries — same as CPU _depth_only_checkpoint!. + # Eagerly checkpoint pre-existing others entries — same as CPU _lazy_checkpoint!. # New types created during the scope start at n_active=0 (sentinel covers them, Case B safe). # Pre-existing types need their count saved now so Case A fires correctly at rewind. for p in values(pool.others) _checkpoint_typed_pool!(p, depth) - @inbounds pool._untracked_has_others[depth] = true + @inbounds pool._touched_has_others[depth] = true end - # Float16 uses lazy first-touch via bit 7 in _mark_untracked! — no eager checkpoint needed. + # Float16 uses lazy first-touch via bit 7 in _record_type_touch! — no eager checkpoint needed. nothing end -@inline function AdaptiveArrayPools._dynamic_selective_rewind!(pool::CuAdaptiveArrayPool) +@inline function AdaptiveArrayPools._lazy_rewind!(pool::CuAdaptiveArrayPool) d = pool._current_depth - mask = @inbounds(pool._untracked_fixed_masks[d]) & UInt16(0x00FF) + mask = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK _has_bit(mask, Float64) && _rewind_typed_pool!(pool.float64, d) _has_bit(mask, Float32) && _rewind_typed_pool!(pool.float32, d) _has_bit(mask, Int64) && _rewind_typed_pool!(pool.int64, d) @@ -188,13 +189,13 @@ end _has_bit(mask, Bool) && _rewind_typed_pool!(pool.bool, d) # Bit 7: Float16 (CUDA reassignment — _fixed_slot_bit(Float16)==0, must use explicit bit check) mask & _cuda_float16_bit() != 0 && _rewind_typed_pool!(pool.float16, d) - if @inbounds(pool._untracked_has_others[d]) + if @inbounds(pool._touched_has_others[d]) for tp in values(pool.others) _rewind_typed_pool!(tp, d) end end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -203,32 +204,32 @@ end # Typed-Fallback Helpers for CuAdaptiveArrayPool (Phase 5 parity) # ============================================================================== -# _typed_checkpoint_with_lazy!: typed checkpoint + set bit 14 for lazy extra-type tracking. +# _typed_lazy_checkpoint!: typed checkpoint + set bit 14 for lazy extra-type tracking. # Also eagerly snapshots pre-existing others entries (mirrors CPU fix for Issue #3). -@inline function AdaptiveArrayPools._typed_checkpoint_with_lazy!(pool::CuAdaptiveArrayPool, types::Type...) +@inline function AdaptiveArrayPools._typed_lazy_checkpoint!(pool::CuAdaptiveArrayPool, types::Type...) checkpoint!(pool, types...) d = pool._current_depth - @inbounds pool._untracked_fixed_masks[d] |= UInt16(0x4000) # set bit 14 - # Eagerly snapshot pre-existing others entries — same reasoning as _depth_only_checkpoint!. + @inbounds pool._touched_type_masks[d] |= _TYPED_LAZY_BIT + # Eagerly snapshot pre-existing others entries — same reasoning as _lazy_checkpoint!. # Skip re-snapshot for entries already checkpointed at d by checkpoint!(pool, types...) # (e.g. Float16 in types... was just checkpointed above — avoid double-push). for p in values(pool.others) if @inbounds(p._checkpoint_depths[end]) != d _checkpoint_typed_pool!(p, d) end - @inbounds pool._untracked_has_others[d] = true + @inbounds pool._touched_has_others[d] = true end - # Float16 uses lazy first-touch via bit 7 in _mark_untracked! — no eager checkpoint needed. + # Float16 uses lazy first-touch via bit 7 in _record_type_touch! — no eager checkpoint needed. nothing end -# _typed_selective_rewind!: selective rewind of (tracked | untracked) mask. +# _typed_lazy_rewind!: selective rewind of (tracked | untracked) mask. # Uses direct field access with bit checks — foreach_fixed_slot is single-argument (no bit yield). -# Bit 7: Float16 (CUDA-specific; lazy-checkpointed on first touch by _mark_untracked!). +# Bit 7: Float16 (CUDA-specific; lazy-checkpointed on first touch by _record_type_touch!). # has_others: genuine others types (UInt8, Int8, etc.) — eagerly checkpointed at scope entry. -@inline function AdaptiveArrayPools._typed_selective_rewind!(pool::CuAdaptiveArrayPool, tracked_mask::UInt16) +@inline function AdaptiveArrayPools._typed_lazy_rewind!(pool::CuAdaptiveArrayPool, tracked_mask::UInt16) d = pool._current_depth - untracked = @inbounds(pool._untracked_fixed_masks[d]) & UInt16(0x00FF) + untracked = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK combined = tracked_mask | untracked _has_bit(combined, Float64) && _rewind_typed_pool!(pool.float64, d) _has_bit(combined, Float32) && _rewind_typed_pool!(pool.float32, d) @@ -237,23 +238,23 @@ end _has_bit(combined, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d) _has_bit(combined, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d) _has_bit(combined, Bool) && _rewind_typed_pool!(pool.bool, d) - # Float16: bit 7 is set by _mark_untracked! on first untracked touch (lazy first-touch). - # Also rewind when Float16 was a *tracked* type in the macro: _typed_checkpoint_with_lazy! + # Float16: bit 7 is set by _record_type_touch! on first untracked touch (lazy first-touch). + # Also rewind when Float16 was a *tracked* type in the macro: _typed_lazy_checkpoint! # calls checkpoint!(pool, Float16) which pushes a checkpoint at depth d, but _acquire_impl! - # (macro transform) bypasses _mark_untracked!, leaving bit 7 = 0. + # (macro transform) bypasses _record_type_touch!, leaving bit 7 = 0. # _tracked_mask_for_types(Float16) == 0 (since _fixed_slot_bit(Float16) == 0), so # tracked_mask carries no bit for Float16 either. # Solution: check _checkpoint_depths to detect "Float16 was checkpointed at this depth". if combined & _cuda_float16_bit() != 0 || @inbounds(pool.float16._checkpoint_depths[end]) == d _rewind_typed_pool!(pool.float16, d) end - if @inbounds(pool._untracked_has_others[d]) + if @inbounds(pool._touched_has_others[d]) for tp in values(pool.others) _rewind_typed_pool!(tp, d) end end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -275,10 +276,10 @@ function AdaptiveArrayPools.reset!(pool::CuAdaptiveArrayPool) # Reset depth and bitmask sentinel state pool._current_depth = 1 - empty!(pool._untracked_fixed_masks) - push!(pool._untracked_fixed_masks, UInt16(0)) # Sentinel: no bits set - empty!(pool._untracked_has_others) - push!(pool._untracked_has_others, false) # Sentinel: no others + empty!(pool._touched_type_masks) + push!(pool._touched_type_masks, UInt16(0)) # Sentinel: no bits set + empty!(pool._touched_has_others) + push!(pool._touched_has_others, false) # Sentinel: no others return pool end @@ -334,10 +335,10 @@ function Base.empty!(pool::CuAdaptiveArrayPool) # Reset depth and bitmask sentinel state pool._current_depth = 1 - empty!(pool._untracked_fixed_masks) - push!(pool._untracked_fixed_masks, UInt16(0)) # Sentinel: no bits set - empty!(pool._untracked_has_others) - push!(pool._untracked_has_others, false) # Sentinel: no others + empty!(pool._touched_type_masks) + push!(pool._touched_type_masks, UInt16(0)) # Sentinel: no bits set + empty!(pool._touched_has_others) + push!(pool._touched_has_others, false) # Sentinel: no others return pool end diff --git a/ext/AdaptiveArrayPoolsCUDAExt/types.jl b/ext/AdaptiveArrayPoolsCUDAExt/types.jl index 056bd18..a3673f2 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/types.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/types.jl @@ -112,8 +112,8 @@ mutable struct CuAdaptiveArrayPool <: AbstractArrayPool # State management (same as CPU) _current_depth::Int - _untracked_fixed_masks::Vector{UInt16} # Per-depth: which fixed slots had untracked acquires - _untracked_has_others::Vector{Bool} # Per-depth: any non-fixed-slot untracked acquire? + _touched_type_masks::Vector{UInt16} # Per-depth: which fixed slots were touched + mode flags + _touched_has_others::Vector{Bool} # Per-depth: any non-fixed-slot type touched? # Device tracking (safety) device_id::Int @@ -132,8 +132,8 @@ function CuAdaptiveArrayPool() CuTypedPool{Bool}(), IdDict{DataType, Any}(), 1, # _current_depth (1 = global scope) - [UInt16(0)], # _untracked_fixed_masks: sentinel (no bits set) - [false], # _untracked_has_others: sentinel (no others) + [UInt16(0)], # _touched_type_masks: sentinel (no bits set) + [false], # _touched_has_others: sentinel (no others) CUDA.deviceid(dev) # Use public API ) end From 6ac4b23381352a4b0fd37fcccc177833be268c36 Mon Sep 17 00:00:00 2001 From: Min-Gu Yoo Date: Wed, 18 Feb 2026 12:28:25 -0800 Subject: [PATCH 4/5] refactor(tests,docs): update all tests and docs for naming changes - Update imports, function calls, field refs, and string assertions in tests - Update architecture docs with new function names and diagrams - Add naming_refactor_proposal.md as design record --- docs/design/naming_refactor_proposal.md | 429 +++++++++++++++++++++++ docs/src/architecture/macro-internals.md | 30 +- test/test_backend_macro_expansion.jl | 4 +- test/test_macro_expansion.jl | 42 +-- test/test_macro_internals.jl | 56 +-- test/test_state.jl | 384 ++++++++++---------- 6 files changed, 687 insertions(+), 258 deletions(-) create mode 100644 docs/design/naming_refactor_proposal.md diff --git a/docs/design/naming_refactor_proposal.md b/docs/design/naming_refactor_proposal.md new file mode 100644 index 0000000..28085c6 --- /dev/null +++ b/docs/design/naming_refactor_proposal.md @@ -0,0 +1,429 @@ +# Naming & Refactoring Proposal: Post-Optimization Cleanup + +> **Status**: Brainstorm / RFC +> **Context**: After Phase 3 (dynamic-selective) and Phase 5 (typed-fallback) optimizations, +> many internal function names still reflect the original "untracked acquire detection" mental +> model. This document proposes renaming to match the evolved architecture. + +--- + +## 1. Current Architecture: Three Execution Modes + +The `@with_pool` macro generates one of three checkpoint/rewind strategies: + +| Mode | Checkpoint | Acquire | Rewind | When | +|------|-----------|---------|--------|------| +| **Typed** | `checkpoint!(pool, T...)` | `_acquire_impl!(pool, T, ...)` | `rewind!(pool, T...)` | All types statically known | +| **Dynamic-Selective** | `_depth_only_checkpoint!(pool)` | `acquire!(pool, T, ...)` *(unchanged)* | `_dynamic_selective_rewind!(pool)` | Types only known at runtime | +| **Full** (manual) | `checkpoint!(pool)` | `acquire!(pool, T, ...)` | `rewind!(pool)` | User calls manually | + +Additionally, the **Typed** mode has a runtime fallback: +- If `_can_use_typed_path()` is false → `_typed_checkpoint_with_lazy!` + `_typed_selective_rewind!` + +--- + +## 2. Naming Tensions + +### 2.1 `_mark_untracked!` — The Core Irony + +**Current name**: `_mark_untracked!` +**What it does**: Records type usage in bitmask. Triggers lazy checkpoint on first touch. + +The word "untracked" is **doubly misleading**: +1. The function **tracks** type usage (sets bitmask bits) +2. The data it records is used to **selectively rewind** (the opposite of "untracked") + +The original semantics: "mark that this acquire happened in a path the macro doesn't track." +The actual semantics now: "record that type T was touched at this depth, and lazily checkpoint if needed." + +#### Candidates + +| Candidate | Pros | Cons | +|-----------|------|------| +| `_record_type_touch!` | "touch" captures first-touch/lazy-checkpoint semantics; action-oriented | Doesn't convey the bitmask mechanism | +| `_track_type_usage!` | Most literal description of what happens | "track" is overloaded (macro "tracks" types too) | +| `_notify_acquire!` | Observer-pattern feel; captures side-effect (lazy checkpoint) | Too generic; doesn't convey type-specificity | +| `_register_type!` | Clean, idiomatic ("register X in a registry") | Doesn't convey the "at this depth" scoping | +| `_touch_type!` | Shortest; "touch" is a Unix/DB idiom for "first access triggers action" | Might be too terse for complex semantics | +| `_mark_type_used!` | Simple and accurate | Still has "mark" which is vague | + +### 2.2 `_acquire_impl!` — The "Fast Path" Naming + +**Current name**: `_acquire_impl!` +**What it does**: Core acquire logic without type tracking. Called by macro-transformed code. + +The `_impl!` suffix is conventional but **non-descriptive**. It doesn't convey *why* this variant +exists (to skip tracking overhead when the macro already knows the types). + +#### Candidates + +| Candidate | Pros | Cons | +|-----------|------|------| +| `_acquire_direct!` | "direct" = no intermediary tracking step | Might imply "direct memory access" | +| `_acquire_bare!` | "bare" = stripped of wrapper logic | Non-standard terminology | +| `_acquire_core!` | "core" = the essential operation | Generic; doesn't explain *why* it's separate | +| `_acquire_scoped!` | "scoped" = macro already manages this scope | Misleading — the function itself isn't scoped | +| Keep `_acquire_impl!` | Well-understood `_impl` convention in Julia | Doesn't explain the tracking bypass | + +### 2.3 `_untracked_fixed_masks` / `_untracked_has_others` — Field Names + +**Current names**: Pool fields storing per-depth bitmask data. +**What they store**: Which types were acquired at each depth (for selective rewind). + +These fields are the **runtime type tracking** data structure, yet named "untracked." + +#### Candidates + +| Candidate | Pros | Cons | +|-----------|------|------| +| `_touched_fixed_masks` / `_touched_has_others` | "touched" matches first-touch semantics | Might confuse with "dirty" bit patterns | +| `_used_fixed_masks` / `_used_has_others` | Simplest, most literal | Too generic | +| `_acquired_fixed_masks` / `_acquired_has_others` | Directly describes the event (acquire happened) | Slightly long | +| `_runtime_fixed_masks` / `_runtime_has_others` | Contrasts with "compile-time" tracked types | Doesn't describe *what* is tracked | +| Keep current names | Consistency with existing code, comments, tests | Perpetuates the "untracked" confusion | + +### 2.4 Mode-Specific Functions — Consistency + +The three modes don't follow a consistent naming pattern: + +``` +Typed: checkpoint!(pool, T...) / rewind!(pool, T...) + + fallback: _typed_checkpoint_with_lazy!(pool, T...) / _typed_selective_rewind!(pool, mask) +Dynamic-Selective: _depth_only_checkpoint!(pool) / _dynamic_selective_rewind!(pool) +Full: checkpoint!(pool) / rewind!(pool) +``` + +**Observation**: The "typed" fallback functions have long compound names that mix the *mode* +(`typed`) with the *mechanism* (`with_lazy`, `selective`). + +#### Possible Consistent Scheme + +Option A — Mode prefix: +``` +_typed_checkpoint! → checkpoint!(pool, T...) (already clean) +_typed_lazy_checkpoint! → _typed_checkpoint_with_lazy!(pool, T...) +_typed_selective_rewind! → _typed_selective_rewind!(pool, mask) (already clean) +_dynamic_checkpoint! → _depth_only_checkpoint!(pool) +_dynamic_rewind! → _dynamic_selective_rewind!(pool) +``` + +Option B — Mechanism suffix: +``` +_checkpoint_typed! (checkpoint the typed pools) +_checkpoint_lazy! (checkpoint with lazy first-touch) +_checkpoint_depth_only! (only increment depth) +_rewind_typed! (rewind typed pools) +_rewind_selective! (rewind based on bitmask) +_rewind_dynamic! (dynamic bitmask rewind) +``` + +--- + +## 3. Holistic Renaming Proposals + +### Proposal A: "Touch" Metaphor (First-Touch Semantics) + +The architecture's key insight is **first-touch tracking**: when a type is first used at a depth, +it gets recorded (and lazily checkpointed). The "touch" metaphor captures this cleanly. + +``` +# Type tracking +_mark_untracked!(pool, T) → _touch_type!(pool, T) + +# Pool fields +_untracked_fixed_masks → _touched_fixed_masks +_untracked_has_others → _touched_has_others + +# Acquire internals (keep _impl convention) +_acquire_impl!(pool, T, n) → (keep as is, or _acquire_core!) +_unsafe_acquire_impl!(...) → (keep as is, or _unsafe_acquire_core!) + +# Dynamic mode (rename for symmetry) +_depth_only_checkpoint!(pool) → _lazy_checkpoint!(pool) # "lazy" = defers to first touch +_dynamic_selective_rewind!(pool) → _lazy_rewind!(pool) # symmetric with checkpoint + +# Typed fallback (simplify) +_typed_checkpoint_with_lazy! → _typed_lazy_checkpoint! # adjective before noun +_typed_selective_rewind! → _typed_lazy_rewind! # symmetric pair + +# Macro generators (follow function names) +_generate_dynamic_selective_checkpoint_call → _generate_lazy_checkpoint_call +_generate_dynamic_selective_rewind_call → _generate_lazy_rewind_call + +# Guards +_can_use_typed_path → (keep as is — already clear) +_tracked_mask_for_types → (keep as is — already clear) +``` + +**Pros**: Concise, consistent metaphor, captures the core mechanism. +**Cons**: "lazy" is overloaded in CS (lazy evaluation, lazy initialization). + +### Proposal B: "Record/Direct" Pair (Action-Based) + +Focus on what each function *does* as an action: + +``` +# Type tracking +_mark_untracked!(pool, T) → _record_type_touch!(pool, T) + +# Pool fields +_untracked_fixed_masks → _acquired_type_masks +_untracked_has_others → _acquired_has_others + +# Acquire internals +_acquire_impl!(pool, T, n) → _acquire_direct!(pool, T, n) # "direct" = no recording +_unsafe_acquire_impl!(...) → _unsafe_acquire_direct!(...) + +# All convenience _impl! follow: +_zeros_impl! → _zeros_direct! +_ones_impl! → _ones_direct! +_similar_impl! → _similar_direct! + +# Dynamic mode +_depth_only_checkpoint!(pool) → _deferred_checkpoint!(pool) # "deferred" = save later +_dynamic_selective_rewind!(pool) → _deferred_selective_rewind!(pool) # rewind what was deferred + +# Typed fallback +_typed_checkpoint_with_lazy! → _typed_deferred_checkpoint! # typed + deferred for extras +_typed_selective_rewind! → (keep — already descriptive) + +# Macro generators +_generate_dynamic_selective_* → _generate_deferred_* +``` + +**Pros**: Very descriptive, each name tells you exactly what happens. +**Cons**: Longer names, "deferred" is less intuitive than "lazy." + +### Proposal C: "Scope" Metaphor (Inside/Outside Macro Scope) + +Frame the naming around the key architectural distinction: code inside `@with_pool` scope +(macro-managed) vs outside (self-tracking): + +``` +# Type tracking — called from "outside scope" or "dynamic scope" +_mark_untracked!(pool, T) → _track_type!(pool, T) + +# Pool fields +_untracked_fixed_masks → _scope_type_masks # per-scope tracking +_untracked_has_others → _scope_has_others + +# Acquire internals — used by "in-scope" (macro-managed) code +_acquire_impl!(pool, T, n) → _scoped_acquire!(pool, T, n) # "scoped" = macro handles tracking +_unsafe_acquire_impl!(...) → _scoped_unsafe_acquire!(...) + +# Dynamic mode +_depth_only_checkpoint!(pool) → _open_scope!(pool) # "open" a new tracking scope +_dynamic_selective_rewind!(pool) → _close_scope!(pool) # "close" and rewind the scope + +# Typed fallback +_typed_checkpoint_with_lazy! → _open_typed_scope_with_fallback! +_typed_selective_rewind! → _close_typed_scope_with_fallback! + +# Guards +_can_use_typed_path → _scope_is_typed_only +``` + +**Pros**: Captures the architectural mental model cleanly. +**Cons**: "scope" semantics might clash with Julia's lexical scoping concepts. + +### Proposal D: Minimal Rename (Conservative) + +Only rename the most confusing items, keep everything else: + +``` +# The one truly misleading name: +_mark_untracked!(pool, T) → _record_type_touch!(pool, T) + +# The confusing fields: +_untracked_fixed_masks → _touched_type_masks +_untracked_has_others → _touched_has_others + +# Everything else stays as-is +_acquire_impl! → (keep) +_depth_only_checkpoint! → (keep) +_dynamic_selective_rewind! → (keep) +_typed_checkpoint_with_lazy! → (keep) +_typed_selective_rewind! → (keep) +``` + +**Pros**: Minimal churn, only fixes the genuinely confusing names. +**Cons**: Misses the opportunity for holistic consistency. + +--- + +## 4. Cross-Cutting Concerns + +### 4.1 Public API — Should NOT Change + +These are stable public APIs and should **never** be renamed: +- `acquire!`, `unsafe_acquire!`, `acquire_view!`, `acquire_array!` +- `checkpoint!`, `rewind!`, `reset!`, `empty!` +- `zeros!`, `ones!`, `trues!`, `falses!`, `similar!` +- `@with_pool`, `@maybe_with_pool` +- `get_task_local_pool` + +### 4.2 CUDA Extension Parity + +Any rename must be mirrored in: +- `ext/AdaptiveArrayPoolsCUDAExt/types.jl` +- `ext/AdaptiveArrayPoolsCUDAExt/state.jl` +- `ext/AdaptiveArrayPoolsCUDAExt/acquire.jl` + +### 4.3 Test Impact + +Renaming internal functions affects: +- `test/test_macro_internals.jl` (directly calls `_depth_only_checkpoint!`, `_dynamic_selective_rewind!`, etc.) +- `test/test_state.jl` (checkpoint/rewind tests) +- `test/test_macroexpand.jl` (checks expanded code contains specific function names) +- Any benchmarks referencing internal functions + +### 4.4 `Bit` 15 / Bit 14 Constants + +Currently the mode flags are raw hex literals (`0x8000`, `0x4000`). A related cleanup: +```julia +const _DYNAMIC_MODE_BIT = UInt16(0x8000) # bit 15 +const _LAZY_MODE_BIT = UInt16(0x4000) # bit 14 +const _MODE_BITS_MASK = UInt16(0xC000) # bits 14-15 +const _TYPE_BITS_MASK = UInt16(0x00FF) # bits 0-7 +``` + +These constants would replace scattered magic numbers throughout `state.jl` and `acquire.jl`. + +### 4.5 The `_impl!` Convention — Keep or Replace? + +The `_impl!` suffix is a **widely understood Julia convention** (e.g., `Base._similar_impl`). +Replacing it with `_direct!`, `_core!`, or `_scoped!` trades familiarity for specificity. + +Arguments for keeping `_impl!`: +- Julia developers immediately understand it as "internal implementation" +- No ambiguity about the function's role as a building block +- Grep-friendly: `_*_impl!` finds all implementation functions + +Arguments for replacing: +- `_impl!` doesn't explain *why* the split exists (tracking bypass) +- New developers might not realize the critical difference between `acquire!` and `_acquire_impl!` + +--- + +## 5. Recommended Changes (for Discussion) + +### Tier 1: High Impact, Low Risk (Do First) + +| Current | Proposed | Rationale | +|---------|----------|-----------| +| `_mark_untracked!` | `_record_type_touch!` | Most misleading name; "touch" captures first-touch + lazy checkpoint | +| `_untracked_fixed_masks` | `_touched_type_masks` | Field stores which types were *touched*, not which are "untracked" | +| `_untracked_has_others` | `_touched_has_others` | Consistent with above | + +### Tier 2: Medium Impact, Medium Risk + +| Current | Proposed | Rationale | +|---------|----------|-----------| +| `_depth_only_checkpoint!` | `_lazy_checkpoint!` | "lazy" captures the deferred-to-first-touch semantics | +| `_dynamic_selective_rewind!` | `_lazy_rewind!` | Symmetric with `_lazy_checkpoint!` | +| `_typed_checkpoint_with_lazy!` | `_typed_lazy_checkpoint!` | Cleaner word order | +| Magic numbers `0x8000`, `0x4000`, `0xC000`, `0x00FF` | Named constants (see 4.4) | Self-documenting code | + +### Tier 3: Low Impact, Higher Risk (Optional) + +| Current | Proposed | Rationale | +|---------|----------|-----------| +| `_acquire_impl!` | Keep as `_acquire_impl!` | Julia convention, well-understood | +| `_generate_dynamic_selective_*` | `_generate_lazy_*` | Follows Tier 2 rename | +| `_typed_selective_rewind!` | `_typed_lazy_rewind!` | Consistent with pair | + +--- + +## 6. Alternative: Do Nothing + +**Case for not renaming**: The current names work. They're documented. Tests pass. +"Untracked" has a clear historical meaning in the codebase, and commit history explains +the evolution. Renaming has a nonzero risk of introducing bugs (missed references, +CUDA extension drift) and makes git blame harder to follow. + +**Counter-argument**: The package is pre-1.0 and has few external users. Now is the +cheapest time to fix naming before the API surface solidifies. + +--- + +## 7. Open Questions + +1. **Should `_impl!` functions be renamed?** They work fine as a convention, but + `_acquire_direct!` or `_acquire_core!` would be more self-documenting. + +2. **Is "lazy" the right word?** In Julia, `lazy` is associated with `Lazy.jl` and + lazy evaluation. "Deferred" is more precise but longer. + +3. **Should mode names be formalized?** Currently modes are described in comments + as "typed", "dynamic-selective", "full". Should there be an enum or named constants? + +4. **How deep should the rename go?** Renaming `_mark_untracked!` alone fixes 80% + of the confusion. Is a holistic rename worth the churn? + +5. **Should the bitmask `_touched_type_masks` also track mode bits?** Currently + bits 0-7 = types, bits 14-15 = mode flags, all in the same field. Should mode + flags be a separate field for clarity? + +--- + +## Appendix: Complete Current → Proposed Mapping (Proposal A: "Touch/Lazy") + +``` +# === Type Tracking === +_mark_untracked!(pool, T) → _record_type_touch!(pool, T) + +# === Pool Fields === +_untracked_fixed_masks → _touched_type_masks +_untracked_has_others → _touched_has_others + +# === Acquire (keep _impl convention) === +_acquire_impl!(pool, T, n) → (no change) +_unsafe_acquire_impl!(pool, T, n) → (no change) +_zeros_impl!(pool, T, dims...) → (no change) +_ones_impl!(pool, T, dims...) → (no change) +_similar_impl!(pool, T, dims...) → (no change) + +# === Dynamic-Selective Mode === +_depth_only_checkpoint!(pool) → _lazy_checkpoint!(pool) +_dynamic_selective_rewind!(pool) → _lazy_rewind!(pool) + +# === Typed Fallback === +_typed_checkpoint_with_lazy!(pool, T...) → _typed_lazy_checkpoint!(pool, T...) +_typed_selective_rewind!(pool, mask) → _typed_lazy_rewind!(pool, mask) + +# === Selective Rewind Helper === +_selective_rewind_fixed_slots!(pool, mask) → (no change — already descriptive) + +# === Guards === +_can_use_typed_path(pool, mask) → (no change) +_tracked_mask_for_types(T...) → (no change) + +# === Macro Generators === +_generate_dynamic_selective_checkpoint_call → _generate_lazy_checkpoint_call +_generate_dynamic_selective_rewind_call → _generate_lazy_rewind_call +_generate_typed_checkpoint_call → (no change) +_generate_typed_rewind_call → (no change) + +# === Constants (new) === +(raw 0x8000) → _LAZY_MODE_BIT (or _DYNAMIC_MODE_BIT) +(raw 0x4000) → _TYPED_LAZY_BIT +(raw 0xC000) → _MODE_BITS_MASK +(raw 0x00FF) → _TYPE_BITS_MASK +``` + +### Files Affected + +| File | Changes | +|------|---------| +| `src/types.jl` | Field renames: `_untracked_*` → `_touched_*` | +| `src/acquire.jl` | `_mark_untracked!` → `_record_type_touch!` | +| `src/state.jl` | Mode functions, field references, constants | +| `src/macros.jl` | Generator function renames, field references | +| `src/convenience.jl` | `_mark_untracked!` calls | +| `src/task_local_pool.jl` | (unlikely changes) | +| `src/utils.jl` | (unlikely changes) | +| `ext/AdaptiveArrayPoolsCUDAExt/*.jl` | Mirror all renames | +| `test/test_macro_internals.jl` | Direct calls to renamed functions | +| `test/test_state.jl` | Field references | +| `test/test_macroexpand.jl` | String matching on expanded names | +| `test/test_allocation.jl` | (unlikely changes) | diff --git a/docs/src/architecture/macro-internals.md b/docs/src/architecture/macro-internals.md index 4c2c920..d983237 100644 --- a/docs/src/architecture/macro-internals.md +++ b/docs/src/architecture/macro-internals.md @@ -115,24 +115,24 @@ end # If only checkpoint!(pool, Int64), Float64 arrays won't be rewound! ``` -### The Solution: Bitmask-Based Untracked Tracking +### The Solution: Bitmask-Based Type Touch Tracking -Every `acquire!` call (and convenience functions) marks itself as "untracked" with type-specific bitmask information: +Every `acquire!` call (and convenience functions) records the type touch with type-specific bitmask information: ```julia # Public API (called from user code outside macro) @inline function acquire!(pool, ::Type{T}, n::Int) where {T} - _mark_untracked!(pool, T) # ← Sets type-specific bitmask! + _record_type_touch!(pool, T) # ← Records type-specific bitmask! _acquire_impl!(pool, T, n) end -# Macro-transformed calls skip the marking +# Macro-transformed calls skip the recording # (because macro already knows about them) -_acquire_impl!(pool, T, n) # ← No marking +_acquire_impl!(pool, T, n) # ← No recording ``` Each fixed-slot type maps to a bit in a `UInt16` bitmask via `_fixed_slot_bit(T)`. -Non-fixed-slot types set a separate `_untracked_has_others` flag. +Non-fixed-slot types set a separate `_touched_has_others` flag. ### Flow Diagram @@ -144,7 +144,7 @@ Non-fixed-slot types set a separate `_untracked_has_others` flag. │ A = _acquire_impl!(...) (macro-transformed, no mark) │ B = helper!(pool) │ └─► zeros!(pool, Float64, N) - │ └─► _mark_untracked!(pool, Float64) + │ └─► _record_type_touch!(pool, Float64) │ masks[2] |= 0x0001 (Float64 bit) ←───┐ │ │ │ ... more code ... │ @@ -161,9 +161,9 @@ end ### Why This Works -1. **Macro-tracked calls**: Transformed to `_acquire_impl!` → no bitmask mark → typed path -2. **Untracked calls**: Use public API → sets type-specific bitmask → subset check at rewind -3. **Subset optimization**: If untracked types are a subset of tracked types, the typed path is still safe +1. **Macro-tracked calls**: Transformed to `_acquire_impl!` → no bitmask touch → typed path +2. **External calls**: Use public API → records type-specific bitmask → subset check at rewind +3. **Subset optimization**: If touched types are a subset of tracked types, the typed path is still safe 4. **Result**: Always safe, with finer-grained optimization than a single boolean flag ## Nested `@with_pool` Handling @@ -191,14 +191,14 @@ end depth: 2 → 1, bitmask checked struct AdaptiveArrayPool # ... type pools ... _current_depth::Int # Current scope depth (1 = global) - _untracked_fixed_masks::Vector{UInt16} # Per-depth: which fixed slots untracked - _untracked_has_others::Vector{Bool} # Per-depth: any non-fixed-slot untracked + _touched_type_masks::Vector{UInt16} # Per-depth: which fixed slots were touched + _touched_has_others::Vector{Bool} # Per-depth: any non-fixed-slot type touched end # Initialized with sentinel: _current_depth = 1 # Global scope -_untracked_fixed_masks = [UInt16(0)] # Sentinel for depth=1 -_untracked_has_others = [false] # Sentinel for depth=1 +_touched_type_masks = [UInt16(0)] # Sentinel for depth=1 +_touched_has_others = [false] # Sentinel for depth=1 ``` ## Performance Impact @@ -256,7 +256,7 @@ end | `_extract_acquire_types(expr, pool_name)` | AST walk to find types | | `_filter_static_types(types, local_vars)` | Filter out locally-defined types | | `_transform_acquire_calls(expr, pool_name)` | Replace `acquire!` → `_acquire_impl!` | -| `_mark_untracked!(pool, T)` | Set type-specific bitmask for current depth | +| `_record_type_touch!(pool, T)` | Record type touch in bitmask for current depth | | `_can_use_typed_path(pool, mask)` | Bitmask subset check for typed vs full path | | `_tracked_mask_for_types(T...)` | Compile-time bitmask for tracked types | | `_generate_typed_checkpoint_call(pool, types)` | Generate bitmask-aware checkpoint | diff --git a/test/test_backend_macro_expansion.jl b/test/test_backend_macro_expansion.jl index f6bd192..9721cb0 100644 --- a/test/test_backend_macro_expansion.jl +++ b/test/test_backend_macro_expansion.jl @@ -58,8 +58,8 @@ @test occursin("_get_pool_for_backend", expr_str) @test occursin("Val{:cuda}", expr_str) # Empty body → use_typed=false → dynamic selective mode - @test occursin("_depth_only_checkpoint!", expr_str) - @test occursin("_dynamic_selective_rewind!", expr_str) + @test occursin("_lazy_checkpoint!", expr_str) + @test occursin("_lazy_rewind!", expr_str) end @testset "Type extraction" begin diff --git a/test/test_macro_expansion.jl b/test/test_macro_expansion.jl index dd8ef8a..91cdc33 100644 --- a/test/test_macro_expansion.jl +++ b/test/test_macro_expansion.jl @@ -102,8 +102,8 @@ # Should still have pool management (with gensym name). # Empty body → no acquire types → use_typed=false → dynamic selective mode. @test occursin("get_task_local_pool", expr_str) - @test occursin("_depth_only_checkpoint!", expr_str) - @test occursin("_dynamic_selective_rewind!", expr_str) + @test occursin("_lazy_checkpoint!", expr_str) + @test occursin("_lazy_rewind!", expr_str) end # Test @maybe_with_pool 1-arg @@ -149,9 +149,9 @@ expr_str = string(expr) # local_arr is detected as local → falls back to dynamic selective mode. - # Checkpoint is lightweight (_depth_only_checkpoint!), rewind is selective. - @test occursin("_depth_only_checkpoint!", expr_str) - @test occursin("_dynamic_selective_rewind!", expr_str) + # Checkpoint is lightweight (_lazy_checkpoint!), rewind is selective. + @test occursin("_lazy_checkpoint!", expr_str) + @test occursin("_lazy_rewind!", expr_str) # In dynamic mode acquire! is NOT transformed to _acquire_impl! @test !occursin("_acquire_impl!", expr_str) end @@ -789,9 +789,9 @@ end @testset "Dynamic selective mode: macro expansion" begin - @testset "use_typed=false generates _depth_only_checkpoint! (dynamic selective)" begin + @testset "use_typed=false generates _lazy_checkpoint! (dynamic selective)" begin # Phase 3: when the macro cannot extract static types (local var), it uses - # _depth_only_checkpoint! instead of a full checkpoint of all 8 slots. + # _lazy_checkpoint! instead of a full checkpoint of all 8 slots. expr = @macroexpand @with_pool pool begin local_arr = rand(10) v = acquire!(pool, local_arr) # eltype(local_arr) is dynamic → use_typed=false @@ -801,13 +801,13 @@ end expr_str = string(expr) # Phase 3 behavior: depth-only checkpoint, selective rewind - @test occursin("_depth_only_checkpoint!", expr_str) + @test occursin("_lazy_checkpoint!", expr_str) @test !occursin("_can_use_typed_path", expr_str) # only in typed path end @testset "use_typed=false does NOT transform acquire! → _acquire_impl! (dynamic mode)" begin # Phase 3: _transform_acquire_calls is skipped for dynamic-selective mode. - # acquire! stays as-is so _mark_untracked! is called and the selective rewind + # acquire! stays as-is so _record_type_touch! is called and the selective rewind # can see which types were actually touched. expr = @macroexpand @with_pool pool begin local_arr = rand(10) @@ -838,8 +838,8 @@ end # RED tests: desired macro behavior after Phase 3. # —————————————————————————————————————————————————————————————— - @testset "GREEN: use_typed=false uses _depth_only_checkpoint!" begin - # Phase 3 complete: dynamic path emits _depth_only_checkpoint! instead of + @testset "GREEN: use_typed=false uses _lazy_checkpoint!" begin + # Phase 3 complete: dynamic path emits _lazy_checkpoint! instead of # the full checkpoint!(pool). This avoids the ~540ns full checkpoint cost. expr = @macroexpand @with_pool pool begin local_arr = rand(10) @@ -849,13 +849,13 @@ end expr_str = string(expr) - @test occursin("_depth_only_checkpoint!", expr_str) + @test occursin("_lazy_checkpoint!", expr_str) # Full (eager) checkpoint must NOT appear; depth-only is the entry point @test !occursin("AdaptiveArrayPools.checkpoint!", expr_str) end - @testset "GREEN: use_typed=false uses _dynamic_selective_rewind!" begin - # Phase 3 complete: dynamic rewind path uses _dynamic_selective_rewind!, + @testset "GREEN: use_typed=false uses _lazy_rewind!" begin + # Phase 3 complete: dynamic rewind path uses _lazy_rewind!, # which selectively rewinds only typed pools that were actually touched. expr = @macroexpand @with_pool pool begin local_arr = rand(10) @@ -865,7 +865,7 @@ end expr_str = string(expr) - @test occursin("_dynamic_selective_rewind!", expr_str) + @test occursin("_lazy_rewind!", expr_str) # Full rewind must NOT appear; selective rewind is the only rewind call @test !occursin("AdaptiveArrayPools.rewind!", expr_str) end @@ -874,9 +874,9 @@ end # Phase 5: Typed-Fallback Optimization expansion tests (RED) # ========================================================================= - @testset "Phase 5: use_typed=true false-branch emits _typed_checkpoint_with_lazy!" begin + @testset "Phase 5: use_typed=true false-branch emits _typed_lazy_checkpoint!" begin # After Phase 5: when _can_use_typed_path=false at runtime, the checkpoint - # side calls _typed_checkpoint_with_lazy! instead of full checkpoint!(pool). + # side calls _typed_lazy_checkpoint! instead of full checkpoint!(pool). expr = @macroexpand @with_pool pool begin v = acquire!(pool, Float64, 10) # static type Float64 → use_typed=true v .= 1.0 @@ -884,13 +884,13 @@ end expr_str = string(expr) # Phase 5: else-branch uses lazy checkpoint - @test occursin("_typed_checkpoint_with_lazy!", expr_str) + @test occursin("_typed_lazy_checkpoint!", expr_str) # Full no-arg checkpoint!(pool) must NOT appear @test !occursin("AdaptiveArrayPools.checkpoint!(pool)", expr_str) end - @testset "Phase 5: use_typed=true false-branch emits _typed_selective_rewind!" begin - # After Phase 5: the rewind else-branch uses _typed_selective_rewind! instead of full rewind!(pool). + @testset "Phase 5: use_typed=true false-branch emits _typed_lazy_rewind!" begin + # After Phase 5: the rewind else-branch uses _typed_lazy_rewind! instead of full rewind!(pool). expr = @macroexpand @with_pool pool begin v = acquire!(pool, Float64, 10) v .= 1.0 @@ -898,7 +898,7 @@ end expr_str = string(expr) # Phase 5: else-branch uses selective rewind - @test occursin("_typed_selective_rewind!", expr_str) + @test occursin("_typed_lazy_rewind!", expr_str) # Full no-arg rewind!(pool) must NOT appear @test !occursin("AdaptiveArrayPools.rewind!(pool)", expr_str) end diff --git a/test/test_macro_internals.jl b/test/test_macro_internals.jl index e0ccf7b..ca3faeb 100644 --- a/test/test_macro_internals.jl +++ b/test/test_macro_internals.jl @@ -6,8 +6,8 @@ # to ensure correct type extraction and filtering for optimized checkpoint/rewind. import AdaptiveArrayPools: _extract_local_assignments, _filter_static_types, _extract_acquire_types, _uses_local_var -import AdaptiveArrayPools: _depth_only_checkpoint!, _dynamic_selective_rewind! -import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind!, _tracked_mask_for_types +import AdaptiveArrayPools: _lazy_checkpoint!, _lazy_rewind! +import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracked_mask_for_types @testset "Macro Internals" begin @@ -1420,9 +1420,9 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # ========================================================================== # Dynamic selective mode: runtime correctness - # Phase 3: ensure n_active == 0 after _dynamic_selective_rewind! exits scope. + # Phase 3: ensure n_active == 0 after _lazy_rewind! exits scope. # - # NOTE: Uses _depth_only_checkpoint! + _dynamic_selective_rewind! directly + # NOTE: Uses _lazy_checkpoint! + _lazy_rewind! directly # with explicit fresh AdaptiveArrayPool() instances to avoid task-local pool # contamination from other tests. This mirrors what the macro generates for # the use_typed=false path, testing the state layer in isolation. @@ -1432,49 +1432,49 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @testset "Single type (Float64): n_active restored after dynamic scope" begin # Simulates: @with_pool pool begin; v = acquire!(pool, eltype(arr), 10); end - # where arr is a local var → macro emits _depth_only_checkpoint! + - # _dynamic_selective_rewind! (no _acquire_impl! transformation). + # where arr is a local var → macro emits _lazy_checkpoint! + + # _lazy_rewind! (no _acquire_impl! transformation). pool = AdaptiveArrayPool() local_arr = rand(Float64, 10) - _depth_only_checkpoint!(pool) + _lazy_checkpoint!(pool) try - v = acquire!(pool, eltype(local_arr), 10) # _mark_untracked!(pool, Float64) + v = acquire!(pool, eltype(local_arr), 10) # _record_type_touch!(pool, Float64) v .= 1.0 @test pool.float64.n_active == 1 finally - _dynamic_selective_rewind!(pool) + _lazy_rewind!(pool) end @test pool.float64.n_active == 0 end @testset "similar!(pool, Float32 ref): n_active restored after dynamic scope" begin - # similar! calls _mark_untracked!(pool, eltype(ref)) directly, so the + # similar! calls _record_type_touch!(pool, eltype(ref)) directly, so the # dynamic selective rewind sees the type even without acquire! wrapping. pool = AdaptiveArrayPool() ref = rand(Float32, 5, 5) - _depth_only_checkpoint!(pool) + _lazy_checkpoint!(pool) try - m = similar!(pool, ref) # _mark_untracked!(pool, Float32) + _acquire_impl! + m = similar!(pool, ref) # _record_type_touch!(pool, Float32) + _acquire_impl! m .= 0.0f0 @test pool.float32.n_active == 1 finally - _dynamic_selective_rewind!(pool) + _lazy_rewind!(pool) end @test pool.float32.n_active == 0 end @testset "Mixed types (Float64 + Float32): both n_active restored" begin # Simulates dynamic-mode block with two types: macro does NOT transform - # acquire! calls, so _mark_untracked! is called for each type via acquire!. + # acquire! calls, so _record_type_touch! is called for each type via acquire!. pool = AdaptiveArrayPool() local_arr = rand(Float32, 8) - _depth_only_checkpoint!(pool) + _lazy_checkpoint!(pool) try - v1 = acquire!(pool, Float64, 10) # _mark_untracked!(pool, Float64) - v2 = acquire!(pool, eltype(local_arr), 8) # _mark_untracked!(pool, Float32) + v1 = acquire!(pool, Float64, 10) # _record_type_touch!(pool, Float64) + v2 = acquire!(pool, eltype(local_arr), 8) # _record_type_touch!(pool, Float32) v1 .= 0.0; v2 .= 0.0f0 finally - _dynamic_selective_rewind!(pool) + _lazy_rewind!(pool) end @test pool.float64.n_active == 0 @test pool.float32.n_active == 0 @@ -1484,26 +1484,26 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Inner scope must only rewind its own depth entry, leaving the parent # scope's n_active intact until the outer scope calls its own rewind. pool = AdaptiveArrayPool() - _depth_only_checkpoint!(pool) # outer scope, depth 2 + _lazy_checkpoint!(pool) # outer scope, depth 2 try outer_v = acquire!(pool, Float64, 10) # lazy checkpoint for float64 outer_v .= 3.14 @test pool.float64.n_active == 1 - _depth_only_checkpoint!(pool) # inner scope, depth 3 + _lazy_checkpoint!(pool) # inner scope, depth 3 try inner_v = acquire!(pool, Float64, 5) # lazy checkpoint (first touch at depth 3) inner_v .= 0.0 @test all(outer_v .== 3.14) # parent array must survive @test pool.float64.n_active == 2 finally - _dynamic_selective_rewind!(pool) # inner rewind: depth 3 → 2 + _lazy_rewind!(pool) # inner rewind: depth 3 → 2 end @test all(outer_v .== 3.14) # outer_v survives inner rewind @test pool.float64.n_active == 1 # only outer_v remains finally - _dynamic_selective_rewind!(pool) # outer rewind: depth 2 → 1 + _lazy_rewind!(pool) # outer rewind: depth 2 → 1 end @test pool.float64.n_active == 0 end @@ -1513,12 +1513,12 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # (NOT pool.bits, which is for BitArrays acquired via acquire!(pool, Bit, ...)) pool = AdaptiveArrayPool() ref_bv = trues(64) # BitVector, eltype = Bool - _depth_only_checkpoint!(pool) + _lazy_checkpoint!(pool) try - v = similar!(pool, ref_bv) # _mark_untracked!(pool, Bool) + v = similar!(pool, ref_bv) # _record_type_touch!(pool, Bool) v .= false finally - _dynamic_selective_rewind!(pool) + _lazy_rewind!(pool) end @test pool.bool.n_active == 0 end @@ -1545,8 +1545,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test pool.int64.n_active == 1 # Child scope: typed lazy checkpoint (Float64 tracked, but helper touches Int64) - # Simulates: _can_use_typed_path=false, macro emits _typed_checkpoint_with_lazy! - _typed_checkpoint_with_lazy!(pool, Float64) + # Simulates: _can_use_typed_path=false, macro emits _typed_lazy_checkpoint! + _typed_lazy_checkpoint!(pool, Float64) try child_float = acquire!(pool, Float64, 5) _phase5_extra_int64_helper!(pool) # touches Int64 (untracked in child) @@ -1554,7 +1554,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test pool.float64.n_active >= 1 finally tracked_mask = _tracked_mask_for_types(Float64) - _typed_selective_rewind!(pool, tracked_mask) + _typed_lazy_rewind!(pool, tracked_mask) end # Parent's Int64 must be intact (= 1) diff --git a/test/test_state.jl b/test/test_state.jl index d8f77fb..0b352ec 100644 --- a/test/test_state.jl +++ b/test/test_state.jl @@ -1,5 +1,5 @@ # Phase 5 internal functions used in tests below -import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind!, _tracked_mask_for_types +import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracked_mask_for_types @testset "State Management" begin @@ -309,8 +309,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind reset!(pool) @test pool._current_depth == 1 - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] @test pool.float64._checkpoint_n_active == [0] # Sentinel only @test pool.float64._checkpoint_depths == [0] # Sentinel only end @@ -497,7 +497,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind rewind!(pool) @test pool.float64.n_active == 0 @test pool._current_depth == 1 - @test pool._untracked_fixed_masks == [UInt16(0)] + @test pool._touched_type_masks == [UInt16(0)] end @testset "rewind! after reset!" begin @@ -753,7 +753,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind v_parent = acquire!(pool, Int64, 10) v_parent .= 42 # Initialize @test pool.int64.n_active == 1 - @test pool._untracked_fixed_masks[1] == AdaptiveArrayPools._fixed_slot_bit(Int64) + @test pool._touched_type_masks[1] == AdaptiveArrayPools._fixed_slot_bit(Int64) # Enter @with_pool - full checkpoint protects parent's Int64 arrays @with_pool pool begin @@ -779,7 +779,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind v_parent = acquire!(pool, Int32, 7) v_parent .= Int32(123) @test pool.int32.n_active == 1 - @test pool._untracked_fixed_masks[1] == AdaptiveArrayPools._fixed_slot_bit(Int32) + @test pool._touched_type_masks[1] == AdaptiveArrayPools._fixed_slot_bit(Int32) # Helper for Int32 function int32_helper(p) @@ -830,7 +830,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() # No global untracked acquire - @test pool._untracked_fixed_masks[1] == UInt16(0) + @test pool._touched_type_masks[1] == UInt16(0) # Checkpoint/rewind with typed - should work normally checkpoint!(pool) @@ -1005,7 +1005,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test pool.bool.n_active == 0 @test pool.complexf64.n_active == 0 @test pool._current_depth == 1 - @test pool._untracked_fixed_masks == [UInt16(0)] + @test pool._touched_type_masks == [UInt16(0)] empty!(pool) end @@ -1329,14 +1329,14 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() # New fields exist - @test hasfield(AdaptiveArrayPool, :_untracked_fixed_masks) - @test hasfield(AdaptiveArrayPool, :_untracked_has_others) + @test hasfield(AdaptiveArrayPool, :_touched_type_masks) + @test hasfield(AdaptiveArrayPool, :_touched_has_others) # Sentinel values at depth=1 (global scope) - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] - @test length(pool._untracked_fixed_masks) == 1 - @test length(pool._untracked_has_others) == 1 + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] + @test length(pool._touched_type_masks) == 1 + @test length(pool._touched_has_others) == 1 end @testset "Bitmask metadata: checkpoint! pushes sentinels" begin @@ -1344,15 +1344,15 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Full checkpoint checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 2 - @test length(pool._untracked_has_others) == 2 - @test pool._untracked_fixed_masks[2] == UInt16(0) - @test pool._untracked_has_others[2] == false + @test length(pool._touched_type_masks) == 2 + @test length(pool._touched_has_others) == 2 + @test pool._touched_type_masks[2] == UInt16(0) + @test pool._touched_has_others[2] == false # Another checkpoint checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 3 - @test length(pool._untracked_has_others) == 3 + @test length(pool._touched_type_masks) == 3 + @test length(pool._touched_has_others) == 3 # Cleanup rewind!(pool) @@ -1364,18 +1364,18 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Single-type checkpoint checkpoint!(pool, Float64) - @test length(pool._untracked_fixed_masks) == 2 - @test length(pool._untracked_has_others) == 2 - @test pool._untracked_fixed_masks[2] == UInt16(0) - @test pool._untracked_has_others[2] == false + @test length(pool._touched_type_masks) == 2 + @test length(pool._touched_has_others) == 2 + @test pool._touched_type_masks[2] == UInt16(0) + @test pool._touched_has_others[2] == false rewind!(pool, Float64) # Multi-type checkpoint checkpoint!(pool, Float64, Float32) - @test length(pool._untracked_fixed_masks) == 2 - @test length(pool._untracked_has_others) == 2 - @test pool._untracked_fixed_masks[2] == UInt16(0) - @test pool._untracked_has_others[2] == false + @test length(pool._touched_type_masks) == 2 + @test length(pool._touched_has_others) == 2 + @test pool._touched_type_masks[2] == UInt16(0) + @test pool._touched_has_others[2] == false rewind!(pool, Float64, Float32) end @@ -1383,33 +1383,33 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 2 - @test length(pool._untracked_has_others) == 2 + @test length(pool._touched_type_masks) == 2 + @test length(pool._touched_has_others) == 2 rewind!(pool) - @test length(pool._untracked_fixed_masks) == 1 - @test length(pool._untracked_has_others) == 1 + @test length(pool._touched_type_masks) == 1 + @test length(pool._touched_has_others) == 1 # Sentinel preserved - @test pool._untracked_fixed_masks[1] == UInt16(0) - @test pool._untracked_has_others[1] == false + @test pool._touched_type_masks[1] == UInt16(0) + @test pool._touched_has_others[1] == false end @testset "Bitmask metadata: typed rewind! pops" begin pool = AdaptiveArrayPool() checkpoint!(pool, Float64) - @test length(pool._untracked_fixed_masks) == 2 + @test length(pool._touched_type_masks) == 2 rewind!(pool, Float64) - @test length(pool._untracked_fixed_masks) == 1 - @test length(pool._untracked_has_others) == 1 + @test length(pool._touched_type_masks) == 1 + @test length(pool._touched_has_others) == 1 # Multi-type checkpoint!(pool, Float64, Int64) - @test length(pool._untracked_fixed_masks) == 2 + @test length(pool._touched_type_masks) == 2 rewind!(pool, Float64, Int64) - @test length(pool._untracked_fixed_masks) == 1 + @test length(pool._touched_type_masks) == 1 end @testset "Bitmask metadata: reset! restores sentinel" begin @@ -1418,11 +1418,11 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Build up state checkpoint!(pool) checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 3 + @test length(pool._touched_type_masks) == 3 reset!(pool) - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] @test pool._current_depth == 1 end @@ -1433,11 +1433,11 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind checkpoint!(pool) acquire!(pool, Float64, 10) checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 3 + @test length(pool._touched_type_masks) == 3 empty!(pool) - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] @test pool._current_depth == 1 end @@ -1450,8 +1450,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind end # No stack leaks — should be back to sentinel only - @test length(pool._untracked_fixed_masks) == 1 - @test length(pool._untracked_has_others) == 1 + @test length(pool._touched_type_masks) == 1 + @test length(pool._touched_has_others) == 1 @test pool._current_depth == 1 end @@ -1460,25 +1460,25 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Depth 2 checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 2 + @test length(pool._touched_type_masks) == 2 # Depth 3 checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 3 + @test length(pool._touched_type_masks) == 3 # Depth 4 checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 4 + @test length(pool._touched_type_masks) == 4 # Pop back rewind!(pool) - @test length(pool._untracked_fixed_masks) == 3 + @test length(pool._touched_type_masks) == 3 rewind!(pool) - @test length(pool._untracked_fixed_masks) == 2 + @test length(pool._touched_type_masks) == 2 rewind!(pool) - @test length(pool._untracked_fixed_masks) == 1 + @test length(pool._touched_type_masks) == 1 end # ========================================================================== @@ -1510,78 +1510,78 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test all(b -> b != UInt16(0), bits) end - @testset "Typed _mark_untracked!: fixed-slot types set mask bits" begin - using AdaptiveArrayPools: _mark_untracked!, _fixed_slot_bit + @testset "Typed _record_type_touch!: fixed-slot types set mask bits" begin + using AdaptiveArrayPools: _record_type_touch!, _fixed_slot_bit pool = AdaptiveArrayPool() checkpoint!(pool) # depth=2 # Mark Float64 untracked - _mark_untracked!(pool, Float64) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) - @test pool._untracked_has_others[2] == false + _record_type_touch!(pool, Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) + @test pool._touched_has_others[2] == false # Mark Float32 additionally — bits accumulate - _mark_untracked!(pool, Float32) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) - @test pool._untracked_has_others[2] == false + _record_type_touch!(pool, Float32) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) + @test pool._touched_has_others[2] == false # Mark Float64 again — idempotent - _mark_untracked!(pool, Float64) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) + _record_type_touch!(pool, Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) rewind!(pool) end - @testset "Typed _mark_untracked!: non-fixed-slot types set has_others" begin - using AdaptiveArrayPools: _mark_untracked!, _fixed_slot_bit + @testset "Typed _record_type_touch!: non-fixed-slot types set has_others" begin + using AdaptiveArrayPools: _record_type_touch!, _fixed_slot_bit pool = AdaptiveArrayPool() checkpoint!(pool) # depth=2 # Mark UInt8 (not a fixed slot) - _mark_untracked!(pool, UInt8) - @test pool._untracked_fixed_masks[2] == UInt16(0) # mask unchanged - @test pool._untracked_has_others[2] == true + _record_type_touch!(pool, UInt8) + @test pool._touched_type_masks[2] == UInt16(0) # mask unchanged + @test pool._touched_has_others[2] == true rewind!(pool) end - @testset "Typed _mark_untracked!: mixed fixed + others" begin - using AdaptiveArrayPools: _mark_untracked!, _fixed_slot_bit + @testset "Typed _record_type_touch!: mixed fixed + others" begin + using AdaptiveArrayPools: _record_type_touch!, _fixed_slot_bit pool = AdaptiveArrayPool() checkpoint!(pool) - _mark_untracked!(pool, Float64) - _mark_untracked!(pool, UInt8) # others - _mark_untracked!(pool, Int64) + _record_type_touch!(pool, Float64) + _record_type_touch!(pool, UInt8) # others + _record_type_touch!(pool, Int64) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Int64) - @test pool._untracked_has_others[2] == true + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Int64) + @test pool._touched_has_others[2] == true rewind!(pool) end - @testset "Typed _mark_untracked!: nested depth isolation" begin - using AdaptiveArrayPools: _mark_untracked!, _fixed_slot_bit + @testset "Typed _record_type_touch!: nested depth isolation" begin + using AdaptiveArrayPools: _record_type_touch!, _fixed_slot_bit pool = AdaptiveArrayPool() # Depth 2 checkpoint!(pool) - _mark_untracked!(pool, Float64) + _record_type_touch!(pool, Float64) # Depth 3 checkpoint!(pool) - _mark_untracked!(pool, Int32) + _record_type_touch!(pool, Int32) # Depth 3 has only Int32 - @test pool._untracked_fixed_masks[3] == _fixed_slot_bit(Int32) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) + @test pool._touched_type_masks[3] == _fixed_slot_bit(Int32) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) # Depth 1 (sentinel) untouched - @test pool._untracked_fixed_masks[1] == UInt16(0) + @test pool._touched_type_masks[1] == UInt16(0) rewind!(pool) rewind!(pool) @@ -1593,12 +1593,12 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() checkpoint!(pool) # depth=2 - # acquire! outside @with_pool calls _mark_untracked!(pool, T) + # acquire! outside @with_pool calls _record_type_touch!(pool, T) acquire!(pool, Float64, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) acquire!(pool, Int64, 5) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Int64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Int64) rewind!(pool) end @@ -1610,7 +1610,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind checkpoint!(pool) unsafe_acquire!(pool, Float32, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float32) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float32) rewind!(pool) end @@ -1623,37 +1623,37 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # zeros! with explicit type checkpoint!(pool) zeros!(pool, Float64, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) rewind!(pool) # zeros! without type → default_eltype → Float64 checkpoint!(pool) zeros!(pool, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) rewind!(pool) # ones! with type checkpoint!(pool) ones!(pool, Int32, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Int32) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Int32) rewind!(pool) # trues! → Bit type checkpoint!(pool) trues!(pool, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Bit) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Bit) rewind!(pool) # falses! → Bit type checkpoint!(pool) falses!(pool, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Bit) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Bit) rewind!(pool) # similar! with template array checkpoint!(pool) similar!(pool, rand(Float32, 5)) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float32) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float32) rewind!(pool) end @@ -1662,8 +1662,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind checkpoint!(pool) zeros!(pool, UInt8, 10) - @test pool._untracked_has_others[2] == true - @test pool._untracked_fixed_masks[2] == UInt16(0) + @test pool._touched_has_others[2] == true + @test pool._touched_type_masks[2] == UInt16(0) rewind!(pool) end @@ -1712,27 +1712,27 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == true # Case 2: untracked Float64, tracked includes Float64 → subset → OK - pool._untracked_fixed_masks[2] = _fixed_slot_bit(Float64) + pool._touched_type_masks[2] = _fixed_slot_bit(Float64) @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == true # Case 3: untracked Float64, tracked is Float32 only → NOT subset → full @test _can_use_typed_path(pool, _tracked_mask_for_types(Float32)) == false # Case 4: untracked Float64|Float32, tracked Float64 only → partial → full - pool._untracked_fixed_masks[2] = _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) + pool._touched_type_masks[2] = _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == false # Case 5: untracked Float64|Float32, tracked Float64|Float32 → exact match → OK @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64, Float32)) == true # Case 6: untracked Float64 + has_others → always full - pool._untracked_fixed_masks[2] = _fixed_slot_bit(Float64) - pool._untracked_has_others[2] = true + pool._touched_type_masks[2] = _fixed_slot_bit(Float64) + pool._touched_has_others[2] = true @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == false # Case 7: no fixed untracked but has_others → always full - pool._untracked_fixed_masks[2] = UInt16(0) - pool._untracked_has_others[2] = true + pool._touched_type_masks[2] = UInt16(0) + pool._touched_has_others[2] = true @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == false rewind!(pool) @@ -1765,7 +1765,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @testset "Scenario B: selective rewind when untracked NOT ⊆ tracked" begin # Helper acquires Float32 while @with_pool only tracks Float64. - # Phase 5: _can_use_typed_path=false → _typed_selective_rewind! covers + # Phase 5: _can_use_typed_path=false → _typed_lazy_rewind! covers # tracked (Float64) | untracked (Float32), so both are rewound correctly. function _scenario_b_helper!(pool) acquire!(pool, Float32, 5) @@ -1876,59 +1876,59 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # ================================================================== @testset "Phase 4: _untracked_flags field removed from AdaptiveArrayPool" begin # The legacy boolean _untracked_flags field has been replaced by - # bitmask-based tracking (_untracked_fixed_masks + _untracked_has_others). + # bitmask-based tracking (_touched_type_masks + _touched_has_others). # Verify it no longer exists as a struct field. @test !(:_untracked_flags in fieldnames(AdaptiveArrayPool)) # Verify the bitmask fields ARE present (they are the replacement) - @test :_untracked_fixed_masks in fieldnames(AdaptiveArrayPool) - @test :_untracked_has_others in fieldnames(AdaptiveArrayPool) + @test :_touched_type_masks in fieldnames(AdaptiveArrayPool) + @test :_touched_has_others in fieldnames(AdaptiveArrayPool) end @testset "Phase 4: bitmask stacks have no stale state after lifecycle ops" begin pool = AdaptiveArrayPool() # Initial sentinel state - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] # Checkpoint → mark → rewind cycle leaves no stale bits checkpoint!(pool) - _mark_untracked!(pool, Float64) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) + _record_type_touch!(pool, Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) rewind!(pool) - @test pool._untracked_fixed_masks == [UInt16(0)] # back to sentinel - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] # back to sentinel + @test pool._touched_has_others == [false] # Nested checkpoint → mark others → rewind cleans up checkpoint!(pool) # depth 2 checkpoint!(pool) # depth 3 - _mark_untracked!(pool, UInt8) # others at depth 3 - @test pool._untracked_has_others[3] == true + _record_type_touch!(pool, UInt8) # others at depth 3 + @test pool._touched_has_others[3] == true rewind!(pool) # back to depth 2 - @test length(pool._untracked_has_others) == 2 - @test pool._untracked_has_others[2] == false # depth 2 clean + @test length(pool._touched_has_others) == 2 + @test pool._touched_has_others[2] == false # depth 2 clean rewind!(pool) # back to depth 1 - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] # reset! restores sentinel state after deep nesting checkpoint!(pool) checkpoint!(pool) - _mark_untracked!(pool, Float32) - _mark_untracked!(pool, Int64) + _record_type_touch!(pool, Float32) + _record_type_touch!(pool, Int64) reset!(pool) - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] @test pool._current_depth == 1 # empty! also restores sentinel state checkpoint!(pool) - _mark_untracked!(pool, ComplexF64) - _mark_untracked!(pool, UInt16) + _record_type_touch!(pool, ComplexF64) + _record_type_touch!(pool, UInt16) empty!(pool) - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] @test pool._current_depth == 1 end @@ -1936,21 +1936,21 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Dynamic Selective Mode — Phase 1: Characterization & Safety Locks # ================================================================== - @testset "Dynamic selective mode: _acquire_impl! bypasses _mark_untracked!" begin + @testset "Dynamic selective mode: _acquire_impl! bypasses _record_type_touch!" begin using AdaptiveArrayPools: _acquire_impl!, _fixed_slot_bit pool = AdaptiveArrayPool() checkpoint!(pool) depth = pool._current_depth # = 2 - # Internal _acquire_impl! does NOT call _mark_untracked! (by design). + # Internal _acquire_impl! does NOT call _record_type_touch! (by design). # This is the key reason a simple "combined mask" approach is insufficient: # macro-transformed calls won't appear in untracked bitmasks. _acquire_impl!(pool, Float64, 5) - @test pool._untracked_fixed_masks[depth] == UInt16(0) # mask unchanged + @test pool._touched_type_masks[depth] == UInt16(0) # mask unchanged - # Public acquire! DOES call _mark_untracked! + # Public acquire! DOES call _record_type_touch! acquire!(pool, Float32, 5) - @test pool._untracked_fixed_masks[depth] == _fixed_slot_bit(Float32) + @test pool._touched_type_masks[depth] == _fixed_slot_bit(Float32) rewind!(pool) end @@ -2000,8 +2000,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind depth = pool._current_depth acquire!(pool, UInt8, 5) - @test pool._untracked_has_others[depth] == true - @test pool._untracked_fixed_masks[depth] == UInt16(0) + @test pool._touched_has_others[depth] == true + @test pool._touched_type_masks[depth] == UInt16(0) rewind!(pool) @test get_typed_pool!(pool, UInt8).n_active == 0 @@ -2016,7 +2016,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Record the stack length BEFORE entering the inner scope. # (global-scope bitmask at index 1 may be non-zero due to the acquire above.) - mask_before = pool._untracked_fixed_masks[1] + mask_before = pool._touched_type_masks[1] checkpoint!(pool) # no acquires in scope @@ -2025,10 +2025,10 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test pool.float64.n_active == n_before @test pool._current_depth == 1 # Stack has returned to exactly the sentinel (length 1) - @test length(pool._untracked_fixed_masks) == 1 - @test length(pool._untracked_has_others) == 1 + @test length(pool._touched_type_masks) == 1 + @test length(pool._touched_has_others) == 1 # Global-scope bitmask is unchanged from before we entered/exited the scope - @test pool._untracked_fixed_masks[1] == mask_before + @test pool._touched_type_masks[1] == mask_before end # —————————————————————————————————————————————————————————————— @@ -2036,22 +2036,22 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # These will FAIL until Phase 2 is complete. # —————————————————————————————————————————————————————————————— - @testset "DESIRED [RED]: _depth_only_checkpoint! is exported/defined" begin - # Phase 2 will add _depth_only_checkpoint! to src/state.jl. + @testset "DESIRED [RED]: _lazy_checkpoint! is exported/defined" begin + # Phase 2 will add _lazy_checkpoint! to src/state.jl. # This test explicitly signals the missing implementation. - @test isdefined(AdaptiveArrayPools, :_depth_only_checkpoint!) + @test isdefined(AdaptiveArrayPools, :_lazy_checkpoint!) end - @testset "DESIRED [RED]: _depth_only_checkpoint! does not eagerly checkpoint typed pools" begin + @testset "DESIRED [RED]: _lazy_checkpoint! does not eagerly checkpoint typed pools" begin # A depth-only checkpoint should increment _current_depth and push bitmask # sentinels, but NOT save n_active for any typed pool. # The sentinel in _checkpoint_depths is always depth=0, so if no checkpoint # was saved at the current depth, _checkpoint_depths[end] will be < current_depth. - if !isdefined(AdaptiveArrayPools, :_depth_only_checkpoint!) + if !isdefined(AdaptiveArrayPools, :_lazy_checkpoint!) @test false # RED: function not yet defined else pool = AdaptiveArrayPool() - AdaptiveArrayPools._depth_only_checkpoint!(pool) + AdaptiveArrayPools._lazy_checkpoint!(pool) depth = pool._current_depth # = 2 # No typed pool should have an eager checkpoint at this depth @@ -2062,21 +2062,21 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # But depth metadata IS updated @test pool._current_depth == 2 - @test length(pool._untracked_fixed_masks) == 2 - @test length(pool._untracked_has_others) == 2 + @test length(pool._touched_type_masks) == 2 + @test length(pool._touched_has_others) == 2 end end @testset "DESIRED [RED]: lazy first-touch checkpoint on acquire! in dynamic mode" begin - # In dynamic-selective mode, _mark_untracked! should lazily call + # In dynamic-selective mode, _record_type_touch! should lazily call # _checkpoint_typed_pool! on the FIRST acquire of each type per depth. # Only the touched pool gets checkpointed; others remain untouched. - if !isdefined(AdaptiveArrayPools, :_depth_only_checkpoint!) + if !isdefined(AdaptiveArrayPools, :_lazy_checkpoint!) @test false # RED: prerequisite not implemented else - using AdaptiveArrayPools: _depth_only_checkpoint! + using AdaptiveArrayPools: _lazy_checkpoint! pool = AdaptiveArrayPool() - _depth_only_checkpoint!(pool) # lightweight enter + _lazy_checkpoint!(pool) # lightweight enter depth = pool._current_depth # = 2 # Before any acquire: no checkpoint for any pool at this depth @@ -2097,14 +2097,14 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Phase 5: Typed-Fallback Optimization # ================================================================== - @testset "Phase 5: _typed_checkpoint_with_lazy! sets bit 14 and checkpoints known types" begin - # _typed_checkpoint_with_lazy! must checkpoint known types AND set bit 14 for lazy mode. + @testset "Phase 5: _typed_lazy_checkpoint! sets bit 14 and checkpoints known types" begin + # _typed_lazy_checkpoint! must checkpoint known types AND set bit 14 for lazy mode. pool = AdaptiveArrayPool() - _typed_checkpoint_with_lazy!(pool, Float64) + _typed_lazy_checkpoint!(pool, Float64) d = pool._current_depth # Bit 14 (0x4000) must be set; bits 0-7 must be 0 (no acquires yet) - @test (pool._untracked_fixed_masks[d] & UInt16(0x4000)) != 0 - @test (pool._untracked_fixed_masks[d] & UInt16(0x00FF)) == 0 + @test (pool._touched_type_masks[d] & UInt16(0x4000)) != 0 + @test (pool._touched_type_masks[d] & UInt16(0x00FF)) == 0 # Float64 should be checkpointed at this depth @test pool.float64._checkpoint_depths[end] == d # Float32 should NOT be checkpointed at this depth @@ -2132,14 +2132,14 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Child scope: typed checkpoint for Float64 only, but helper touches Int64 # Simulates @with_pool with static type Float64 but _can_use_typed_path = false - _typed_checkpoint_with_lazy!(pool, Float64) + _typed_lazy_checkpoint!(pool, Float64) acquire!(pool, Float64, 5) # tracked type _p0_helper_int64!(pool) # untracked Int64 → triggers lazy first-touch checkpoint @test pool.int64.n_active == 2 # parent's 1 + helper's 1 # Child scope exits via selective rewind tracked_mask = _tracked_mask_for_types(Float64) - _typed_selective_rewind!(pool, tracked_mask) + _typed_lazy_rewind!(pool, tracked_mask) # Parent's Int64 count must be restored to 1 (NOT 0) @test pool.int64.n_active == 1 @@ -2150,10 +2150,10 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind end @testset "Phase 5: bit 14 enables lazy first-touch checkpoint for extra types" begin - # _mark_untracked! condition is (current_mask & 0xC000) != 0. + # _record_type_touch! condition is (current_mask & 0xC000) != 0. # With bit 14 set (typed lazy mode), extra-type first touch triggers _checkpoint_typed_pool!. pool = AdaptiveArrayPool() - _typed_checkpoint_with_lazy!(pool, Float64) # typed chk + set bit 14 + _typed_lazy_checkpoint!(pool, Float64) # typed chk + set bit 14 d = pool._current_depth # Before acquiring Int64: no Int64 checkpoint at this depth @@ -2171,8 +2171,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @testset "Phase 5 (Issue #3): typed lazy mode preserves parent n_active for others types" begin # If a parent scope has an active others-type (UInt8) and a child uses - # _typed_checkpoint_with_lazy!, helpers touching the same type must NOT corrupt - # the parent's n_active. _typed_checkpoint_with_lazy! eagerly snapshots pool.others + # _typed_lazy_checkpoint!, helpers touching the same type must NOT corrupt + # the parent's n_active. _typed_lazy_checkpoint! eagerly snapshots pool.others # so Case A fires at rewind (not Case B with the wrong sentinel value). function _p5_helper_uint8!(pool) acquire!(pool, UInt8, 7) @@ -2187,16 +2187,16 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test parent_others_pool.n_active == 1 # Child scope: typed checkpoint for Float64 only; helper touches UInt8 (others) - # Without the fix: _typed_checkpoint_with_lazy! doesn't snapshot pool.others → + # Without the fix: _typed_lazy_checkpoint! doesn't snapshot pool.others → # rewind hits Case B → parent UInt8.n_active corrupted to 0. - _typed_checkpoint_with_lazy!(pool, Float64) + _typed_lazy_checkpoint!(pool, Float64) try acquire!(pool, Float64, 5) # tracked type _p5_helper_uint8!(pool) # untracked others type @test pool.others[UInt8].n_active == 2 # parent's 1 + helper's 1 finally tracked_mask = _tracked_mask_for_types(Float64) - _typed_selective_rewind!(pool, tracked_mask) + _typed_lazy_rewind!(pool, tracked_mask) end # Parent's UInt8 count must be preserved (= 1, NOT 0) @@ -2212,12 +2212,12 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # They should FAIL before the fix and PASS after. # ================================================================== - @testset "Issue #1: _depth_only_checkpoint! orphaned others stack leak" begin - # Bug: _depth_only_checkpoint! eagerly checkpoints pool.others entries, - # but sets _untracked_has_others[depth] = false. On _dynamic_selective_rewind!, + @testset "Issue #1: _lazy_checkpoint! orphaned others stack leak" begin + # Bug: _lazy_checkpoint! eagerly checkpoints pool.others entries, + # but sets _touched_has_others[depth] = false. On _lazy_rewind!, # the others loop is skipped (flag is false), leaving orphaned checkpoint entries. # In a loop, each iteration pushes one more stale entry → unbounded stack growth. - using AdaptiveArrayPools: _depth_only_checkpoint!, _dynamic_selective_rewind! + using AdaptiveArrayPools: _lazy_checkpoint!, _lazy_rewind! pool = AdaptiveArrayPool() @@ -2231,8 +2231,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Run 10 iterations of dynamic-selective scope without acquiring any others type for _ in 1:10 - _depth_only_checkpoint!(pool) # pushes checkpoint for others entries - _dynamic_selective_rewind!(pool) # should pop it back + _lazy_checkpoint!(pool) # pushes checkpoint for others entries + _lazy_rewind!(pool) # should pop it back end # Checkpoint stack must NOT have grown (each entry should be popped by rewind) @@ -2243,14 +2243,14 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @testset "Issue #2: double-checkpoint hazard when tracked type used by helper" begin # Bug: In typed-lazy mode (bit 14), when a tracked type T is: - # 1. Checkpointed by _typed_checkpoint_with_lazy!(pool, T) (saves n_active=0) - # 2. Acquired by macro-transformed _acquire_impl! (n_active → 1, no _mark_untracked!) - # 3. Re-acquired by a helper via acquire! → _mark_untracked! + # 1. Checkpointed by _typed_lazy_checkpoint!(pool, T) (saves n_active=0) + # 2. Acquired by macro-transformed _acquire_impl! (n_active → 1, no _record_type_touch!) + # 3. Re-acquired by a helper via acquire! → _record_type_touch! # Step 3 sees bit 14 set + T's bit unset → calls _checkpoint_typed_pool! again # with n_active=1 (wrong!). On rewind, restores n_active=1 instead of 0. using AdaptiveArrayPools: _acquire_impl! - # Helper that uses acquire! (goes through _mark_untracked!) + # Helper that uses acquire! (goes through _record_type_touch!) function _issue2_helper!(pool) acquire!(pool, Float64, 3) end @@ -2258,20 +2258,20 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() # Enter typed-lazy mode for Float64 - _typed_checkpoint_with_lazy!(pool, Float64) + _typed_lazy_checkpoint!(pool, Float64) try - # Simulate macro-transformed code: bypasses _mark_untracked! + # Simulate macro-transformed code: bypasses _record_type_touch! _acquire_impl!(pool, Float64, 5) @test pool.float64.n_active == 1 - # Helper: goes through acquire! → _mark_untracked! - # BUG: _mark_untracked! sees bit 14 + Float64 bit not yet set + # Helper: goes through acquire! → _record_type_touch! + # BUG: _record_type_touch! sees bit 14 + Float64 bit not yet set # → redundant _checkpoint_typed_pool! with n_active=1 _issue2_helper!(pool) @test pool.float64.n_active == 2 finally tracked_mask = _tracked_mask_for_types(Float64) - _typed_selective_rewind!(pool, tracked_mask) + _typed_lazy_rewind!(pool, tracked_mask) end # After rewind, n_active should be 0 (parent state before scope entry) @@ -2292,13 +2292,13 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() initial_f32_stack = length(pool.float32._checkpoint_depths) # 1 (sentinel) - _typed_checkpoint_with_lazy!(pool, Float32) + _typed_lazy_checkpoint!(pool, Float32) try - _acquire_impl!(pool, Float32, 5) # n_active=1, no _mark_untracked! - _issue2b_helper!(pool) # acquire! → _mark_untracked! → double checkpoint + _acquire_impl!(pool, Float32, 5) # n_active=1, no _record_type_touch! + _issue2b_helper!(pool) # acquire! → _record_type_touch! → double checkpoint finally tracked_mask = _tracked_mask_for_types(Float32) - _typed_selective_rewind!(pool, tracked_mask) + _typed_lazy_rewind!(pool, tracked_mask) end # The checkpoint stack should return to its initial length (sentinel only) @@ -2325,25 +2325,25 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind end end - @testset "Issue #4: CUDA _depth_only_checkpoint! parity (has_others flag)" begin - # Bug: CUDA _depth_only_checkpoint! eagerly checkpoints pool.others but - # does NOT set _untracked_has_others = true, same as CPU Issue #1. + @testset "Issue #4: CUDA _lazy_checkpoint! parity (has_others flag)" begin + # Bug: CUDA _lazy_checkpoint! eagerly checkpoints pool.others but + # does NOT set _touched_has_others = true, same as CPU Issue #1. # Verify via source code inspection (no GPU needed). cuda_state_path = joinpath(@__DIR__, "..", "ext", "AdaptiveArrayPoolsCUDAExt", "state.jl") if isfile(cuda_state_path) code = read(cuda_state_path, String) - # Extract _depth_only_checkpoint! function body + # Extract _lazy_checkpoint! function body func_match = match( - r"function\s+AdaptiveArrayPools\._depth_only_checkpoint!\(pool::CuAdaptiveArrayPool\).*?^end"ms, + r"function\s+AdaptiveArrayPools\._lazy_checkpoint!\(pool::CuAdaptiveArrayPool\).*?^end"ms, code ) @test func_match !== nothing if func_match !== nothing func_body = func_match.match # If it eagerly checkpoints others (has `for p in values(pool.others)`), - # then it MUST also set _untracked_has_others[...] = true within the loop + # then it MUST also set _touched_has_others[...] = true within the loop if contains(func_body, "values(pool.others)") - @test occursin(r"_untracked_has_others\[.*\]\s*=\s*true", func_body) + @test occursin(r"_touched_has_others\[.*\]\s*=\s*true", func_body) end end else @@ -2351,15 +2351,15 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind end end - @testset "Issue #5: CUDA _typed_checkpoint_with_lazy! parity" begin + @testset "Issue #5: CUDA _typed_lazy_checkpoint! parity" begin # Bug: CUDA version is missing two features present in CPU version: # 1. Double-checkpoint guard: `_checkpoint_depths[end] != d` - # 2. has_others flag: `_untracked_has_others[d] = true` + # 2. has_others flag: `_touched_has_others[d] = true` cuda_state_path = joinpath(@__DIR__, "..", "ext", "AdaptiveArrayPoolsCUDAExt", "state.jl") if isfile(cuda_state_path) code = read(cuda_state_path, String) func_match = match( - r"function\s+AdaptiveArrayPools\._typed_checkpoint_with_lazy!\(pool::CuAdaptiveArrayPool.*?^end"ms, + r"function\s+AdaptiveArrayPools\._typed_lazy_checkpoint!\(pool::CuAdaptiveArrayPool.*?^end"ms, code ) @test func_match !== nothing @@ -2369,8 +2369,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Must have double-checkpoint guard (like CPU version) @test contains(func_body, "_checkpoint_depths[end]") - # Must set _untracked_has_others flag (like CPU version) - @test contains(func_body, "_untracked_has_others") + # Must set _touched_has_others flag (like CPU version) + @test contains(func_body, "_touched_has_others") end else @warn "CUDA extension not found, skipping parity test" From e645a1c13500c5b8462f9ca668bdc75f114ee269 Mon Sep 17 00:00:00 2001 From: Min-Gu Yoo Date: Wed, 18 Feb 2026 13:00:17 -0800 Subject: [PATCH 5/5] refactor(state): update comments and variable names to reflect type-touch tracking --- docs/design/naming_refactor_proposal.md | 429 ------------------------ ext/AdaptiveArrayPoolsCUDAExt/state.jl | 10 +- src/state.jl | 6 +- test/test_state.jl | 62 ++++ 4 files changed, 70 insertions(+), 437 deletions(-) delete mode 100644 docs/design/naming_refactor_proposal.md diff --git a/docs/design/naming_refactor_proposal.md b/docs/design/naming_refactor_proposal.md deleted file mode 100644 index 28085c6..0000000 --- a/docs/design/naming_refactor_proposal.md +++ /dev/null @@ -1,429 +0,0 @@ -# Naming & Refactoring Proposal: Post-Optimization Cleanup - -> **Status**: Brainstorm / RFC -> **Context**: After Phase 3 (dynamic-selective) and Phase 5 (typed-fallback) optimizations, -> many internal function names still reflect the original "untracked acquire detection" mental -> model. This document proposes renaming to match the evolved architecture. - ---- - -## 1. Current Architecture: Three Execution Modes - -The `@with_pool` macro generates one of three checkpoint/rewind strategies: - -| Mode | Checkpoint | Acquire | Rewind | When | -|------|-----------|---------|--------|------| -| **Typed** | `checkpoint!(pool, T...)` | `_acquire_impl!(pool, T, ...)` | `rewind!(pool, T...)` | All types statically known | -| **Dynamic-Selective** | `_depth_only_checkpoint!(pool)` | `acquire!(pool, T, ...)` *(unchanged)* | `_dynamic_selective_rewind!(pool)` | Types only known at runtime | -| **Full** (manual) | `checkpoint!(pool)` | `acquire!(pool, T, ...)` | `rewind!(pool)` | User calls manually | - -Additionally, the **Typed** mode has a runtime fallback: -- If `_can_use_typed_path()` is false → `_typed_checkpoint_with_lazy!` + `_typed_selective_rewind!` - ---- - -## 2. Naming Tensions - -### 2.1 `_mark_untracked!` — The Core Irony - -**Current name**: `_mark_untracked!` -**What it does**: Records type usage in bitmask. Triggers lazy checkpoint on first touch. - -The word "untracked" is **doubly misleading**: -1. The function **tracks** type usage (sets bitmask bits) -2. The data it records is used to **selectively rewind** (the opposite of "untracked") - -The original semantics: "mark that this acquire happened in a path the macro doesn't track." -The actual semantics now: "record that type T was touched at this depth, and lazily checkpoint if needed." - -#### Candidates - -| Candidate | Pros | Cons | -|-----------|------|------| -| `_record_type_touch!` | "touch" captures first-touch/lazy-checkpoint semantics; action-oriented | Doesn't convey the bitmask mechanism | -| `_track_type_usage!` | Most literal description of what happens | "track" is overloaded (macro "tracks" types too) | -| `_notify_acquire!` | Observer-pattern feel; captures side-effect (lazy checkpoint) | Too generic; doesn't convey type-specificity | -| `_register_type!` | Clean, idiomatic ("register X in a registry") | Doesn't convey the "at this depth" scoping | -| `_touch_type!` | Shortest; "touch" is a Unix/DB idiom for "first access triggers action" | Might be too terse for complex semantics | -| `_mark_type_used!` | Simple and accurate | Still has "mark" which is vague | - -### 2.2 `_acquire_impl!` — The "Fast Path" Naming - -**Current name**: `_acquire_impl!` -**What it does**: Core acquire logic without type tracking. Called by macro-transformed code. - -The `_impl!` suffix is conventional but **non-descriptive**. It doesn't convey *why* this variant -exists (to skip tracking overhead when the macro already knows the types). - -#### Candidates - -| Candidate | Pros | Cons | -|-----------|------|------| -| `_acquire_direct!` | "direct" = no intermediary tracking step | Might imply "direct memory access" | -| `_acquire_bare!` | "bare" = stripped of wrapper logic | Non-standard terminology | -| `_acquire_core!` | "core" = the essential operation | Generic; doesn't explain *why* it's separate | -| `_acquire_scoped!` | "scoped" = macro already manages this scope | Misleading — the function itself isn't scoped | -| Keep `_acquire_impl!` | Well-understood `_impl` convention in Julia | Doesn't explain the tracking bypass | - -### 2.3 `_untracked_fixed_masks` / `_untracked_has_others` — Field Names - -**Current names**: Pool fields storing per-depth bitmask data. -**What they store**: Which types were acquired at each depth (for selective rewind). - -These fields are the **runtime type tracking** data structure, yet named "untracked." - -#### Candidates - -| Candidate | Pros | Cons | -|-----------|------|------| -| `_touched_fixed_masks` / `_touched_has_others` | "touched" matches first-touch semantics | Might confuse with "dirty" bit patterns | -| `_used_fixed_masks` / `_used_has_others` | Simplest, most literal | Too generic | -| `_acquired_fixed_masks` / `_acquired_has_others` | Directly describes the event (acquire happened) | Slightly long | -| `_runtime_fixed_masks` / `_runtime_has_others` | Contrasts with "compile-time" tracked types | Doesn't describe *what* is tracked | -| Keep current names | Consistency with existing code, comments, tests | Perpetuates the "untracked" confusion | - -### 2.4 Mode-Specific Functions — Consistency - -The three modes don't follow a consistent naming pattern: - -``` -Typed: checkpoint!(pool, T...) / rewind!(pool, T...) - + fallback: _typed_checkpoint_with_lazy!(pool, T...) / _typed_selective_rewind!(pool, mask) -Dynamic-Selective: _depth_only_checkpoint!(pool) / _dynamic_selective_rewind!(pool) -Full: checkpoint!(pool) / rewind!(pool) -``` - -**Observation**: The "typed" fallback functions have long compound names that mix the *mode* -(`typed`) with the *mechanism* (`with_lazy`, `selective`). - -#### Possible Consistent Scheme - -Option A — Mode prefix: -``` -_typed_checkpoint! → checkpoint!(pool, T...) (already clean) -_typed_lazy_checkpoint! → _typed_checkpoint_with_lazy!(pool, T...) -_typed_selective_rewind! → _typed_selective_rewind!(pool, mask) (already clean) -_dynamic_checkpoint! → _depth_only_checkpoint!(pool) -_dynamic_rewind! → _dynamic_selective_rewind!(pool) -``` - -Option B — Mechanism suffix: -``` -_checkpoint_typed! (checkpoint the typed pools) -_checkpoint_lazy! (checkpoint with lazy first-touch) -_checkpoint_depth_only! (only increment depth) -_rewind_typed! (rewind typed pools) -_rewind_selective! (rewind based on bitmask) -_rewind_dynamic! (dynamic bitmask rewind) -``` - ---- - -## 3. Holistic Renaming Proposals - -### Proposal A: "Touch" Metaphor (First-Touch Semantics) - -The architecture's key insight is **first-touch tracking**: when a type is first used at a depth, -it gets recorded (and lazily checkpointed). The "touch" metaphor captures this cleanly. - -``` -# Type tracking -_mark_untracked!(pool, T) → _touch_type!(pool, T) - -# Pool fields -_untracked_fixed_masks → _touched_fixed_masks -_untracked_has_others → _touched_has_others - -# Acquire internals (keep _impl convention) -_acquire_impl!(pool, T, n) → (keep as is, or _acquire_core!) -_unsafe_acquire_impl!(...) → (keep as is, or _unsafe_acquire_core!) - -# Dynamic mode (rename for symmetry) -_depth_only_checkpoint!(pool) → _lazy_checkpoint!(pool) # "lazy" = defers to first touch -_dynamic_selective_rewind!(pool) → _lazy_rewind!(pool) # symmetric with checkpoint - -# Typed fallback (simplify) -_typed_checkpoint_with_lazy! → _typed_lazy_checkpoint! # adjective before noun -_typed_selective_rewind! → _typed_lazy_rewind! # symmetric pair - -# Macro generators (follow function names) -_generate_dynamic_selective_checkpoint_call → _generate_lazy_checkpoint_call -_generate_dynamic_selective_rewind_call → _generate_lazy_rewind_call - -# Guards -_can_use_typed_path → (keep as is — already clear) -_tracked_mask_for_types → (keep as is — already clear) -``` - -**Pros**: Concise, consistent metaphor, captures the core mechanism. -**Cons**: "lazy" is overloaded in CS (lazy evaluation, lazy initialization). - -### Proposal B: "Record/Direct" Pair (Action-Based) - -Focus on what each function *does* as an action: - -``` -# Type tracking -_mark_untracked!(pool, T) → _record_type_touch!(pool, T) - -# Pool fields -_untracked_fixed_masks → _acquired_type_masks -_untracked_has_others → _acquired_has_others - -# Acquire internals -_acquire_impl!(pool, T, n) → _acquire_direct!(pool, T, n) # "direct" = no recording -_unsafe_acquire_impl!(...) → _unsafe_acquire_direct!(...) - -# All convenience _impl! follow: -_zeros_impl! → _zeros_direct! -_ones_impl! → _ones_direct! -_similar_impl! → _similar_direct! - -# Dynamic mode -_depth_only_checkpoint!(pool) → _deferred_checkpoint!(pool) # "deferred" = save later -_dynamic_selective_rewind!(pool) → _deferred_selective_rewind!(pool) # rewind what was deferred - -# Typed fallback -_typed_checkpoint_with_lazy! → _typed_deferred_checkpoint! # typed + deferred for extras -_typed_selective_rewind! → (keep — already descriptive) - -# Macro generators -_generate_dynamic_selective_* → _generate_deferred_* -``` - -**Pros**: Very descriptive, each name tells you exactly what happens. -**Cons**: Longer names, "deferred" is less intuitive than "lazy." - -### Proposal C: "Scope" Metaphor (Inside/Outside Macro Scope) - -Frame the naming around the key architectural distinction: code inside `@with_pool` scope -(macro-managed) vs outside (self-tracking): - -``` -# Type tracking — called from "outside scope" or "dynamic scope" -_mark_untracked!(pool, T) → _track_type!(pool, T) - -# Pool fields -_untracked_fixed_masks → _scope_type_masks # per-scope tracking -_untracked_has_others → _scope_has_others - -# Acquire internals — used by "in-scope" (macro-managed) code -_acquire_impl!(pool, T, n) → _scoped_acquire!(pool, T, n) # "scoped" = macro handles tracking -_unsafe_acquire_impl!(...) → _scoped_unsafe_acquire!(...) - -# Dynamic mode -_depth_only_checkpoint!(pool) → _open_scope!(pool) # "open" a new tracking scope -_dynamic_selective_rewind!(pool) → _close_scope!(pool) # "close" and rewind the scope - -# Typed fallback -_typed_checkpoint_with_lazy! → _open_typed_scope_with_fallback! -_typed_selective_rewind! → _close_typed_scope_with_fallback! - -# Guards -_can_use_typed_path → _scope_is_typed_only -``` - -**Pros**: Captures the architectural mental model cleanly. -**Cons**: "scope" semantics might clash with Julia's lexical scoping concepts. - -### Proposal D: Minimal Rename (Conservative) - -Only rename the most confusing items, keep everything else: - -``` -# The one truly misleading name: -_mark_untracked!(pool, T) → _record_type_touch!(pool, T) - -# The confusing fields: -_untracked_fixed_masks → _touched_type_masks -_untracked_has_others → _touched_has_others - -# Everything else stays as-is -_acquire_impl! → (keep) -_depth_only_checkpoint! → (keep) -_dynamic_selective_rewind! → (keep) -_typed_checkpoint_with_lazy! → (keep) -_typed_selective_rewind! → (keep) -``` - -**Pros**: Minimal churn, only fixes the genuinely confusing names. -**Cons**: Misses the opportunity for holistic consistency. - ---- - -## 4. Cross-Cutting Concerns - -### 4.1 Public API — Should NOT Change - -These are stable public APIs and should **never** be renamed: -- `acquire!`, `unsafe_acquire!`, `acquire_view!`, `acquire_array!` -- `checkpoint!`, `rewind!`, `reset!`, `empty!` -- `zeros!`, `ones!`, `trues!`, `falses!`, `similar!` -- `@with_pool`, `@maybe_with_pool` -- `get_task_local_pool` - -### 4.2 CUDA Extension Parity - -Any rename must be mirrored in: -- `ext/AdaptiveArrayPoolsCUDAExt/types.jl` -- `ext/AdaptiveArrayPoolsCUDAExt/state.jl` -- `ext/AdaptiveArrayPoolsCUDAExt/acquire.jl` - -### 4.3 Test Impact - -Renaming internal functions affects: -- `test/test_macro_internals.jl` (directly calls `_depth_only_checkpoint!`, `_dynamic_selective_rewind!`, etc.) -- `test/test_state.jl` (checkpoint/rewind tests) -- `test/test_macroexpand.jl` (checks expanded code contains specific function names) -- Any benchmarks referencing internal functions - -### 4.4 `Bit` 15 / Bit 14 Constants - -Currently the mode flags are raw hex literals (`0x8000`, `0x4000`). A related cleanup: -```julia -const _DYNAMIC_MODE_BIT = UInt16(0x8000) # bit 15 -const _LAZY_MODE_BIT = UInt16(0x4000) # bit 14 -const _MODE_BITS_MASK = UInt16(0xC000) # bits 14-15 -const _TYPE_BITS_MASK = UInt16(0x00FF) # bits 0-7 -``` - -These constants would replace scattered magic numbers throughout `state.jl` and `acquire.jl`. - -### 4.5 The `_impl!` Convention — Keep or Replace? - -The `_impl!` suffix is a **widely understood Julia convention** (e.g., `Base._similar_impl`). -Replacing it with `_direct!`, `_core!`, or `_scoped!` trades familiarity for specificity. - -Arguments for keeping `_impl!`: -- Julia developers immediately understand it as "internal implementation" -- No ambiguity about the function's role as a building block -- Grep-friendly: `_*_impl!` finds all implementation functions - -Arguments for replacing: -- `_impl!` doesn't explain *why* the split exists (tracking bypass) -- New developers might not realize the critical difference between `acquire!` and `_acquire_impl!` - ---- - -## 5. Recommended Changes (for Discussion) - -### Tier 1: High Impact, Low Risk (Do First) - -| Current | Proposed | Rationale | -|---------|----------|-----------| -| `_mark_untracked!` | `_record_type_touch!` | Most misleading name; "touch" captures first-touch + lazy checkpoint | -| `_untracked_fixed_masks` | `_touched_type_masks` | Field stores which types were *touched*, not which are "untracked" | -| `_untracked_has_others` | `_touched_has_others` | Consistent with above | - -### Tier 2: Medium Impact, Medium Risk - -| Current | Proposed | Rationale | -|---------|----------|-----------| -| `_depth_only_checkpoint!` | `_lazy_checkpoint!` | "lazy" captures the deferred-to-first-touch semantics | -| `_dynamic_selective_rewind!` | `_lazy_rewind!` | Symmetric with `_lazy_checkpoint!` | -| `_typed_checkpoint_with_lazy!` | `_typed_lazy_checkpoint!` | Cleaner word order | -| Magic numbers `0x8000`, `0x4000`, `0xC000`, `0x00FF` | Named constants (see 4.4) | Self-documenting code | - -### Tier 3: Low Impact, Higher Risk (Optional) - -| Current | Proposed | Rationale | -|---------|----------|-----------| -| `_acquire_impl!` | Keep as `_acquire_impl!` | Julia convention, well-understood | -| `_generate_dynamic_selective_*` | `_generate_lazy_*` | Follows Tier 2 rename | -| `_typed_selective_rewind!` | `_typed_lazy_rewind!` | Consistent with pair | - ---- - -## 6. Alternative: Do Nothing - -**Case for not renaming**: The current names work. They're documented. Tests pass. -"Untracked" has a clear historical meaning in the codebase, and commit history explains -the evolution. Renaming has a nonzero risk of introducing bugs (missed references, -CUDA extension drift) and makes git blame harder to follow. - -**Counter-argument**: The package is pre-1.0 and has few external users. Now is the -cheapest time to fix naming before the API surface solidifies. - ---- - -## 7. Open Questions - -1. **Should `_impl!` functions be renamed?** They work fine as a convention, but - `_acquire_direct!` or `_acquire_core!` would be more self-documenting. - -2. **Is "lazy" the right word?** In Julia, `lazy` is associated with `Lazy.jl` and - lazy evaluation. "Deferred" is more precise but longer. - -3. **Should mode names be formalized?** Currently modes are described in comments - as "typed", "dynamic-selective", "full". Should there be an enum or named constants? - -4. **How deep should the rename go?** Renaming `_mark_untracked!` alone fixes 80% - of the confusion. Is a holistic rename worth the churn? - -5. **Should the bitmask `_touched_type_masks` also track mode bits?** Currently - bits 0-7 = types, bits 14-15 = mode flags, all in the same field. Should mode - flags be a separate field for clarity? - ---- - -## Appendix: Complete Current → Proposed Mapping (Proposal A: "Touch/Lazy") - -``` -# === Type Tracking === -_mark_untracked!(pool, T) → _record_type_touch!(pool, T) - -# === Pool Fields === -_untracked_fixed_masks → _touched_type_masks -_untracked_has_others → _touched_has_others - -# === Acquire (keep _impl convention) === -_acquire_impl!(pool, T, n) → (no change) -_unsafe_acquire_impl!(pool, T, n) → (no change) -_zeros_impl!(pool, T, dims...) → (no change) -_ones_impl!(pool, T, dims...) → (no change) -_similar_impl!(pool, T, dims...) → (no change) - -# === Dynamic-Selective Mode === -_depth_only_checkpoint!(pool) → _lazy_checkpoint!(pool) -_dynamic_selective_rewind!(pool) → _lazy_rewind!(pool) - -# === Typed Fallback === -_typed_checkpoint_with_lazy!(pool, T...) → _typed_lazy_checkpoint!(pool, T...) -_typed_selective_rewind!(pool, mask) → _typed_lazy_rewind!(pool, mask) - -# === Selective Rewind Helper === -_selective_rewind_fixed_slots!(pool, mask) → (no change — already descriptive) - -# === Guards === -_can_use_typed_path(pool, mask) → (no change) -_tracked_mask_for_types(T...) → (no change) - -# === Macro Generators === -_generate_dynamic_selective_checkpoint_call → _generate_lazy_checkpoint_call -_generate_dynamic_selective_rewind_call → _generate_lazy_rewind_call -_generate_typed_checkpoint_call → (no change) -_generate_typed_rewind_call → (no change) - -# === Constants (new) === -(raw 0x8000) → _LAZY_MODE_BIT (or _DYNAMIC_MODE_BIT) -(raw 0x4000) → _TYPED_LAZY_BIT -(raw 0xC000) → _MODE_BITS_MASK -(raw 0x00FF) → _TYPE_BITS_MASK -``` - -### Files Affected - -| File | Changes | -|------|---------| -| `src/types.jl` | Field renames: `_untracked_*` → `_touched_*` | -| `src/acquire.jl` | `_mark_untracked!` → `_record_type_touch!` | -| `src/state.jl` | Mode functions, field references, constants | -| `src/macros.jl` | Generator function renames, field references | -| `src/convenience.jl` | `_mark_untracked!` calls | -| `src/task_local_pool.jl` | (unlikely changes) | -| `src/utils.jl` | (unlikely changes) | -| `ext/AdaptiveArrayPoolsCUDAExt/*.jl` | Mirror all renames | -| `test/test_macro_internals.jl` | Direct calls to renamed functions | -| `test/test_state.jl` | Field references | -| `test/test_macroexpand.jl` | String matching on expanded names | -| `test/test_allocation.jl` | (unlikely changes) | diff --git a/ext/AdaptiveArrayPoolsCUDAExt/state.jl b/ext/AdaptiveArrayPoolsCUDAExt/state.jl index e800c9f..f5e572f 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/state.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/state.jl @@ -32,7 +32,7 @@ end # ============================================================================== function AdaptiveArrayPools.checkpoint!(pool::CuAdaptiveArrayPool) - # Increment depth and initialize untracked bitmask state + # Increment depth and initialize type-touch tracking state pool._current_depth += 1 push!(pool._touched_type_masks, UInt16(0)) push!(pool._touched_has_others, false) @@ -223,14 +223,14 @@ end nothing end -# _typed_lazy_rewind!: selective rewind of (tracked | untracked) mask. +# _typed_lazy_rewind!: selective rewind of (tracked | touched) mask. # Uses direct field access with bit checks — foreach_fixed_slot is single-argument (no bit yield). # Bit 7: Float16 (CUDA-specific; lazy-checkpointed on first touch by _record_type_touch!). # has_others: genuine others types (UInt8, Int8, etc.) — eagerly checkpointed at scope entry. @inline function AdaptiveArrayPools._typed_lazy_rewind!(pool::CuAdaptiveArrayPool, tracked_mask::UInt16) d = pool._current_depth - untracked = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK - combined = tracked_mask | untracked + touched = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK + combined = tracked_mask | touched _has_bit(combined, Float64) && _rewind_typed_pool!(pool.float64, d) _has_bit(combined, Float32) && _rewind_typed_pool!(pool.float32, d) _has_bit(combined, Int64) && _rewind_typed_pool!(pool.int64, d) @@ -238,7 +238,7 @@ end _has_bit(combined, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d) _has_bit(combined, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d) _has_bit(combined, Bool) && _rewind_typed_pool!(pool.bool, d) - # Float16: bit 7 is set by _record_type_touch! on first untracked touch (lazy first-touch). + # Float16: bit 7 is set by _record_type_touch! on first touch (lazy first-touch). # Also rewind when Float16 was a *tracked* type in the macro: _typed_lazy_checkpoint! # calls checkpoint!(pool, Float16) which pushes a checkpoint at depth d, but _acquire_impl! # (macro transform) bypasses _record_type_touch!, leaving bit 7 = 0. diff --git a/src/state.jl b/src/state.jl index 0eaea1e..86b27d9 100644 --- a/src/state.jl +++ b/src/state.jl @@ -312,8 +312,8 @@ guaranteed by the `_TYPED_LAZY_BIT` mode set in `_typed_lazy_checkpoint!`. """ @inline function _typed_lazy_rewind!(pool::AdaptiveArrayPool, tracked_mask::UInt16) d = pool._current_depth - untracked = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK - combined = tracked_mask | untracked + touched = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK + combined = tracked_mask | touched _selective_rewind_fixed_slots!(pool, combined) if @inbounds(pool._touched_has_others[d]) for tp in values(pool.others) @@ -587,7 +587,7 @@ in `touched_mask` is also set in `tracked_mask`. """ @inline function _can_use_typed_path(pool::AbstractArrayPool, tracked_mask::UInt16) depth = pool._current_depth - touched_mask = @inbounds pool._touched_type_masks[depth] + touched_mask = @inbounds(pool._touched_type_masks[depth]) & _TYPE_BITS_MASK has_others = @inbounds pool._touched_has_others[depth] return (touched_mask & ~tracked_mask) == UInt16(0) && !has_others end diff --git a/test/test_state.jl b/test/test_state.jl index 0b352ec..a0f67b3 100644 --- a/test/test_state.jl +++ b/test/test_state.jl @@ -1738,6 +1738,68 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke rewind!(pool) end + @testset "_can_use_typed_path: mode bits do not pollute subset check" begin + # Issue: _can_use_typed_path reads raw _touched_type_masks[depth] which may + # contain mode bits (14-15) from _lazy_checkpoint! or _typed_lazy_checkpoint!. + # These mode bits leak into the subset check `(touched_mask & ~tracked_mask) == 0`, + # causing false negatives: the typed fast path is rejected even when only + # tracked types were touched. + using AdaptiveArrayPools: _can_use_typed_path, _tracked_mask_for_types, + _lazy_checkpoint!, _lazy_rewind!, _LAZY_MODE_BIT, _TYPED_LAZY_BIT, + _acquire_impl! + + # --- Case 1: _LAZY_MODE_BIT (bit 15) should be ignored --- + pool = AdaptiveArrayPool() + checkpoint!(pool) # depth 2 + pool._touched_type_masks[2] = _LAZY_MODE_BIT # simulate lazy parent scope + # Only mode bit set, no type bits → typed path should be safe + @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == true + + # Mode bit + tracked type bit → still safe (type is tracked) + pool._touched_type_masks[2] = _LAZY_MODE_BIT | _fixed_slot_bit(Float64) + @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == true + + # Mode bit + untracked type bit → correctly fails + pool._touched_type_masks[2] = _LAZY_MODE_BIT | _fixed_slot_bit(Int32) + @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == false + rewind!(pool) + + # --- Case 2: _TYPED_LAZY_BIT (bit 14) should be ignored --- + pool2 = AdaptiveArrayPool() + checkpoint!(pool2) + pool2._touched_type_masks[2] = _TYPED_LAZY_BIT + @test _can_use_typed_path(pool2, _tracked_mask_for_types(Float64)) == true + + pool2._touched_type_masks[2] = _TYPED_LAZY_BIT | _fixed_slot_bit(Float64) + @test _can_use_typed_path(pool2, _tracked_mask_for_types(Float64)) == true + rewind!(pool2) + + # --- Case 3: Both mode bits set (bits 14+15) should be ignored --- + pool3 = AdaptiveArrayPool() + checkpoint!(pool3) + pool3._touched_type_masks[2] = _LAZY_MODE_BIT | _TYPED_LAZY_BIT + @test _can_use_typed_path(pool3, _tracked_mask_for_types(Float64)) == true + rewind!(pool3) + + # --- Case 4: End-to-end — nested typed scope inside lazy scope --- + pool4 = AdaptiveArrayPool() + _lazy_checkpoint!(pool4) # outer lazy scope (depth 2, mask has _LAZY_MODE_BIT) + + # Before entering inner typed scope, macro calls _can_use_typed_path at parent depth + tracked_mask = _tracked_mask_for_types(Float64) + @test _can_use_typed_path(pool4, tracked_mask) == true # parent has no extra type bits + + # Enter inner typed scope + checkpoint!(pool4, Float64) # depth 3 + a = _acquire_impl!(pool4, Float64, 10) + a .= 1.0 + # At rewind time: inner mask is clean (no mode bits from checkpoint!) + @test _can_use_typed_path(pool4, tracked_mask) == true + + rewind!(pool4, Float64) + _lazy_rewind!(pool4) + end + # ================================================================== # Phase 3: End-to-end runtime scenarios # ==================================================================