diff --git a/docs/src/architecture/macro-internals.md b/docs/src/architecture/macro-internals.md index 4c2c920..d983237 100644 --- a/docs/src/architecture/macro-internals.md +++ b/docs/src/architecture/macro-internals.md @@ -115,24 +115,24 @@ end # If only checkpoint!(pool, Int64), Float64 arrays won't be rewound! ``` -### The Solution: Bitmask-Based Untracked Tracking +### The Solution: Bitmask-Based Type Touch Tracking -Every `acquire!` call (and convenience functions) marks itself as "untracked" with type-specific bitmask information: +Every `acquire!` call (and convenience functions) records the type touch with type-specific bitmask information: ```julia # Public API (called from user code outside macro) @inline function acquire!(pool, ::Type{T}, n::Int) where {T} - _mark_untracked!(pool, T) # ← Sets type-specific bitmask! + _record_type_touch!(pool, T) # ← Records type-specific bitmask! _acquire_impl!(pool, T, n) end -# Macro-transformed calls skip the marking +# Macro-transformed calls skip the recording # (because macro already knows about them) -_acquire_impl!(pool, T, n) # ← No marking +_acquire_impl!(pool, T, n) # ← No recording ``` Each fixed-slot type maps to a bit in a `UInt16` bitmask via `_fixed_slot_bit(T)`. -Non-fixed-slot types set a separate `_untracked_has_others` flag. +Non-fixed-slot types set a separate `_touched_has_others` flag. ### Flow Diagram @@ -144,7 +144,7 @@ Non-fixed-slot types set a separate `_untracked_has_others` flag. │ A = _acquire_impl!(...) (macro-transformed, no mark) │ B = helper!(pool) │ └─► zeros!(pool, Float64, N) - │ └─► _mark_untracked!(pool, Float64) + │ └─► _record_type_touch!(pool, Float64) │ masks[2] |= 0x0001 (Float64 bit) ←───┐ │ │ │ ... more code ... │ @@ -161,9 +161,9 @@ end ### Why This Works -1. **Macro-tracked calls**: Transformed to `_acquire_impl!` → no bitmask mark → typed path -2. **Untracked calls**: Use public API → sets type-specific bitmask → subset check at rewind -3. **Subset optimization**: If untracked types are a subset of tracked types, the typed path is still safe +1. **Macro-tracked calls**: Transformed to `_acquire_impl!` → no bitmask touch → typed path +2. **External calls**: Use public API → records type-specific bitmask → subset check at rewind +3. **Subset optimization**: If touched types are a subset of tracked types, the typed path is still safe 4. **Result**: Always safe, with finer-grained optimization than a single boolean flag ## Nested `@with_pool` Handling @@ -191,14 +191,14 @@ end depth: 2 → 1, bitmask checked struct AdaptiveArrayPool # ... type pools ... _current_depth::Int # Current scope depth (1 = global) - _untracked_fixed_masks::Vector{UInt16} # Per-depth: which fixed slots untracked - _untracked_has_others::Vector{Bool} # Per-depth: any non-fixed-slot untracked + _touched_type_masks::Vector{UInt16} # Per-depth: which fixed slots were touched + _touched_has_others::Vector{Bool} # Per-depth: any non-fixed-slot type touched end # Initialized with sentinel: _current_depth = 1 # Global scope -_untracked_fixed_masks = [UInt16(0)] # Sentinel for depth=1 -_untracked_has_others = [false] # Sentinel for depth=1 +_touched_type_masks = [UInt16(0)] # Sentinel for depth=1 +_touched_has_others = [false] # Sentinel for depth=1 ``` ## Performance Impact @@ -256,7 +256,7 @@ end | `_extract_acquire_types(expr, pool_name)` | AST walk to find types | | `_filter_static_types(types, local_vars)` | Filter out locally-defined types | | `_transform_acquire_calls(expr, pool_name)` | Replace `acquire!` → `_acquire_impl!` | -| `_mark_untracked!(pool, T)` | Set type-specific bitmask for current depth | +| `_record_type_touch!(pool, T)` | Record type touch in bitmask for current depth | | `_can_use_typed_path(pool, mask)` | Bitmask subset check for typed vs full path | | `_tracked_mask_for_types(T...)` | Compile-time bitmask for tracked types | | `_generate_typed_checkpoint_call(pool, types)` | Generate bitmask-aware checkpoint | diff --git a/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl b/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl index 23cbb36..437d516 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl @@ -28,7 +28,8 @@ # ============================================================================== using AdaptiveArrayPools: get_view!, get_nd_view!, get_nd_array!, allocate_vector, safe_prod, - _mark_untracked!, _fixed_slot_bit, _checkpoint_typed_pool! + _record_type_touch!, _fixed_slot_bit, _checkpoint_typed_pool!, + _MODE_BITS_MASK """ get_view!(tp::CuTypedPool{T}, n::Int) -> CuVector{T} @@ -165,44 +166,44 @@ Used by `unsafe_acquire!` - same zero-allocation behavior as `acquire!`. end # ============================================================================== -# CUDA _mark_untracked! override (Issue #2 / #2a fix) +# CUDA _record_type_touch! override (Issue #2 / #2a fix) # ============================================================================== # Float16 on CUDA: direct struct field with _fixed_slot_bit(Float16)=0. # We track Float16 via bit 7 (CUDA reassignment; CPU uses bit 7 for Bit type, absent on GPU). # This gives Float16 lazy first-touch checkpointing in bit-14 (typed lazy) and bit-15 (dynamic) # modes, ensuring Case A (not Case B) fires at rewind and parent n_active is preserved. -@inline function AdaptiveArrayPools._mark_untracked!(pool::CuAdaptiveArrayPool, ::Type{T}) where {T} +@inline function AdaptiveArrayPools._record_type_touch!(pool::CuAdaptiveArrayPool, ::Type{T}) where {T} depth = pool._current_depth b = _fixed_slot_bit(T) if b == UInt16(0) if T === Float16 # Float16: CUDA direct field tracked via bit 7 (not in pool.others dict). b16 = UInt16(1) << 7 - current_mask = @inbounds pool._untracked_fixed_masks[depth] + current_mask = @inbounds pool._touched_type_masks[depth] # Lazy first-touch checkpoint: bit 14 (typed lazy) OR bit 15 (dynamic), first touch only. # Guard: skip if already checkpointed at this depth (prevents double-push). - if (current_mask & 0xC000) != 0 && (current_mask & b16) == 0 + if (current_mask & _MODE_BITS_MASK) != 0 && (current_mask & b16) == 0 if @inbounds(pool.float16._checkpoint_depths[end]) != depth _checkpoint_typed_pool!(pool.float16, depth) end end - @inbounds pool._untracked_fixed_masks[depth] = current_mask | b16 + @inbounds pool._touched_type_masks[depth] = current_mask | b16 else # Genuine others type (UInt8, Int8, etc.) — eagerly snapshotted at scope entry. - @inbounds pool._untracked_has_others[depth] = true + @inbounds pool._touched_has_others[depth] = true end else - current_mask = @inbounds pool._untracked_fixed_masks[depth] + current_mask = @inbounds pool._touched_type_masks[depth] # Lazy first-touch checkpoint for fixed-slot types in bit 14/15 modes. # Guard: skip if already checkpointed at this depth (prevents double-push). - if (current_mask & 0xC000) != 0 && (current_mask & b) == 0 + if (current_mask & _MODE_BITS_MASK) != 0 && (current_mask & b) == 0 tp = AdaptiveArrayPools.get_typed_pool!(pool, T) if @inbounds(tp._checkpoint_depths[end]) != depth _checkpoint_typed_pool!(tp, depth) end end - @inbounds pool._untracked_fixed_masks[depth] = current_mask | b + @inbounds pool._touched_type_masks[depth] = current_mask | b end nothing end diff --git a/ext/AdaptiveArrayPoolsCUDAExt/state.jl b/ext/AdaptiveArrayPoolsCUDAExt/state.jl index 23d4ba6..f5e572f 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/state.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/state.jl @@ -6,7 +6,8 @@ # AbstractTypedPool, so they work for CuTypedPool automatically. using AdaptiveArrayPools: checkpoint!, rewind!, reset!, - _checkpoint_typed_pool!, _rewind_typed_pool!, _has_bit + _checkpoint_typed_pool!, _rewind_typed_pool!, _has_bit, + _LAZY_MODE_BIT, _TYPED_LAZY_BIT, _TYPE_BITS_MASK # ============================================================================== # GPU Fixed Slot Iteration @@ -31,10 +32,10 @@ end # ============================================================================== function AdaptiveArrayPools.checkpoint!(pool::CuAdaptiveArrayPool) - # Increment depth and initialize untracked bitmask state + # Increment depth and initialize type-touch tracking state pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) depth = pool._current_depth # Fixed slots - zero allocation via @generated iteration @@ -53,8 +54,8 @@ end # Type-specific checkpoint (single type) @inline function AdaptiveArrayPools.checkpoint!(pool::CuAdaptiveArrayPool, ::Type{T}) where {T} pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) _checkpoint_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, T), pool._current_depth) nothing end @@ -72,8 +73,8 @@ end checkpoint_exprs = [:(_checkpoint_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices] quote pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) $(checkpoint_exprs...) nothing end @@ -102,8 +103,8 @@ function AdaptiveArrayPools.rewind!(pool::CuAdaptiveArrayPool) _rewind_typed_pool!(tp, cur_depth) end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 return nothing @@ -116,8 +117,8 @@ end return nothing end _rewind_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, T), pool._current_depth) - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -140,17 +141,17 @@ end return nothing end $(rewind_exprs...) - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end end # ============================================================================== -# Dynamic-Selective Mode for CuAdaptiveArrayPool (use_typed=false path) +# Lazy Mode for CuAdaptiveArrayPool (use_typed=false path) # ============================================================================== -# Mirrors CPU _depth_only_checkpoint! / _dynamic_selective_rewind! in src/state.jl. +# Mirrors CPU _lazy_checkpoint! / _lazy_rewind! in src/state.jl. # # Float16 on CUDA: direct struct field (not in pool.others dict), but _fixed_slot_bit(Float16)=0. # We reassign Float16 to bit 7 (unused on CUDA; CPU uses bit 7 for Bit type which has no GPU equivalent). @@ -160,25 +161,25 @@ end # Bit 7 on CUDA is reserved for Float16 (CPU uses it for Bit; Bit type does not exist on GPU). @inline _cuda_float16_bit() = UInt16(1) << 7 -@inline function AdaptiveArrayPools._depth_only_checkpoint!(pool::CuAdaptiveArrayPool) +@inline function AdaptiveArrayPools._lazy_checkpoint!(pool::CuAdaptiveArrayPool) pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0x8000)) # bit 15: dynamic-selective mode - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, _LAZY_MODE_BIT) # lazy mode flag + push!(pool._touched_has_others, false) depth = pool._current_depth - # Eagerly checkpoint pre-existing others entries — same as CPU _depth_only_checkpoint!. + # Eagerly checkpoint pre-existing others entries — same as CPU _lazy_checkpoint!. # New types created during the scope start at n_active=0 (sentinel covers them, Case B safe). # Pre-existing types need their count saved now so Case A fires correctly at rewind. for p in values(pool.others) _checkpoint_typed_pool!(p, depth) - @inbounds pool._untracked_has_others[depth] = true + @inbounds pool._touched_has_others[depth] = true end - # Float16 uses lazy first-touch via bit 7 in _mark_untracked! — no eager checkpoint needed. + # Float16 uses lazy first-touch via bit 7 in _record_type_touch! — no eager checkpoint needed. nothing end -@inline function AdaptiveArrayPools._dynamic_selective_rewind!(pool::CuAdaptiveArrayPool) +@inline function AdaptiveArrayPools._lazy_rewind!(pool::CuAdaptiveArrayPool) d = pool._current_depth - mask = @inbounds(pool._untracked_fixed_masks[d]) & UInt16(0x00FF) + mask = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK _has_bit(mask, Float64) && _rewind_typed_pool!(pool.float64, d) _has_bit(mask, Float32) && _rewind_typed_pool!(pool.float32, d) _has_bit(mask, Int64) && _rewind_typed_pool!(pool.int64, d) @@ -188,13 +189,13 @@ end _has_bit(mask, Bool) && _rewind_typed_pool!(pool.bool, d) # Bit 7: Float16 (CUDA reassignment — _fixed_slot_bit(Float16)==0, must use explicit bit check) mask & _cuda_float16_bit() != 0 && _rewind_typed_pool!(pool.float16, d) - if @inbounds(pool._untracked_has_others[d]) + if @inbounds(pool._touched_has_others[d]) for tp in values(pool.others) _rewind_typed_pool!(tp, d) end end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -203,33 +204,33 @@ end # Typed-Fallback Helpers for CuAdaptiveArrayPool (Phase 5 parity) # ============================================================================== -# _typed_checkpoint_with_lazy!: typed checkpoint + set bit 14 for lazy extra-type tracking. +# _typed_lazy_checkpoint!: typed checkpoint + set bit 14 for lazy extra-type tracking. # Also eagerly snapshots pre-existing others entries (mirrors CPU fix for Issue #3). -@inline function AdaptiveArrayPools._typed_checkpoint_with_lazy!(pool::CuAdaptiveArrayPool, types::Type...) +@inline function AdaptiveArrayPools._typed_lazy_checkpoint!(pool::CuAdaptiveArrayPool, types::Type...) checkpoint!(pool, types...) d = pool._current_depth - @inbounds pool._untracked_fixed_masks[d] |= UInt16(0x4000) # set bit 14 - # Eagerly snapshot pre-existing others entries — same reasoning as _depth_only_checkpoint!. + @inbounds pool._touched_type_masks[d] |= _TYPED_LAZY_BIT + # Eagerly snapshot pre-existing others entries — same reasoning as _lazy_checkpoint!. # Skip re-snapshot for entries already checkpointed at d by checkpoint!(pool, types...) # (e.g. Float16 in types... was just checkpointed above — avoid double-push). for p in values(pool.others) if @inbounds(p._checkpoint_depths[end]) != d _checkpoint_typed_pool!(p, d) end - @inbounds pool._untracked_has_others[d] = true + @inbounds pool._touched_has_others[d] = true end - # Float16 uses lazy first-touch via bit 7 in _mark_untracked! — no eager checkpoint needed. + # Float16 uses lazy first-touch via bit 7 in _record_type_touch! — no eager checkpoint needed. nothing end -# _typed_selective_rewind!: selective rewind of (tracked | untracked) mask. +# _typed_lazy_rewind!: selective rewind of (tracked | touched) mask. # Uses direct field access with bit checks — foreach_fixed_slot is single-argument (no bit yield). -# Bit 7: Float16 (CUDA-specific; lazy-checkpointed on first touch by _mark_untracked!). +# Bit 7: Float16 (CUDA-specific; lazy-checkpointed on first touch by _record_type_touch!). # has_others: genuine others types (UInt8, Int8, etc.) — eagerly checkpointed at scope entry. -@inline function AdaptiveArrayPools._typed_selective_rewind!(pool::CuAdaptiveArrayPool, tracked_mask::UInt16) +@inline function AdaptiveArrayPools._typed_lazy_rewind!(pool::CuAdaptiveArrayPool, tracked_mask::UInt16) d = pool._current_depth - untracked = @inbounds(pool._untracked_fixed_masks[d]) & UInt16(0x00FF) - combined = tracked_mask | untracked + touched = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK + combined = tracked_mask | touched _has_bit(combined, Float64) && _rewind_typed_pool!(pool.float64, d) _has_bit(combined, Float32) && _rewind_typed_pool!(pool.float32, d) _has_bit(combined, Int64) && _rewind_typed_pool!(pool.int64, d) @@ -237,23 +238,23 @@ end _has_bit(combined, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d) _has_bit(combined, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d) _has_bit(combined, Bool) && _rewind_typed_pool!(pool.bool, d) - # Float16: bit 7 is set by _mark_untracked! on first untracked touch (lazy first-touch). - # Also rewind when Float16 was a *tracked* type in the macro: _typed_checkpoint_with_lazy! + # Float16: bit 7 is set by _record_type_touch! on first touch (lazy first-touch). + # Also rewind when Float16 was a *tracked* type in the macro: _typed_lazy_checkpoint! # calls checkpoint!(pool, Float16) which pushes a checkpoint at depth d, but _acquire_impl! - # (macro transform) bypasses _mark_untracked!, leaving bit 7 = 0. + # (macro transform) bypasses _record_type_touch!, leaving bit 7 = 0. # _tracked_mask_for_types(Float16) == 0 (since _fixed_slot_bit(Float16) == 0), so # tracked_mask carries no bit for Float16 either. # Solution: check _checkpoint_depths to detect "Float16 was checkpointed at this depth". if combined & _cuda_float16_bit() != 0 || @inbounds(pool.float16._checkpoint_depths[end]) == d _rewind_typed_pool!(pool.float16, d) end - if @inbounds(pool._untracked_has_others[d]) + if @inbounds(pool._touched_has_others[d]) for tp in values(pool.others) _rewind_typed_pool!(tp, d) end end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -275,10 +276,10 @@ function AdaptiveArrayPools.reset!(pool::CuAdaptiveArrayPool) # Reset depth and bitmask sentinel state pool._current_depth = 1 - empty!(pool._untracked_fixed_masks) - push!(pool._untracked_fixed_masks, UInt16(0)) # Sentinel: no bits set - empty!(pool._untracked_has_others) - push!(pool._untracked_has_others, false) # Sentinel: no others + empty!(pool._touched_type_masks) + push!(pool._touched_type_masks, UInt16(0)) # Sentinel: no bits set + empty!(pool._touched_has_others) + push!(pool._touched_has_others, false) # Sentinel: no others return pool end @@ -334,10 +335,10 @@ function Base.empty!(pool::CuAdaptiveArrayPool) # Reset depth and bitmask sentinel state pool._current_depth = 1 - empty!(pool._untracked_fixed_masks) - push!(pool._untracked_fixed_masks, UInt16(0)) # Sentinel: no bits set - empty!(pool._untracked_has_others) - push!(pool._untracked_has_others, false) # Sentinel: no others + empty!(pool._touched_type_masks) + push!(pool._touched_type_masks, UInt16(0)) # Sentinel: no bits set + empty!(pool._touched_has_others) + push!(pool._touched_has_others, false) # Sentinel: no others return pool end diff --git a/ext/AdaptiveArrayPoolsCUDAExt/types.jl b/ext/AdaptiveArrayPoolsCUDAExt/types.jl index 056bd18..a3673f2 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/types.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/types.jl @@ -112,8 +112,8 @@ mutable struct CuAdaptiveArrayPool <: AbstractArrayPool # State management (same as CPU) _current_depth::Int - _untracked_fixed_masks::Vector{UInt16} # Per-depth: which fixed slots had untracked acquires - _untracked_has_others::Vector{Bool} # Per-depth: any non-fixed-slot untracked acquire? + _touched_type_masks::Vector{UInt16} # Per-depth: which fixed slots were touched + mode flags + _touched_has_others::Vector{Bool} # Per-depth: any non-fixed-slot type touched? # Device tracking (safety) device_id::Int @@ -132,8 +132,8 @@ function CuAdaptiveArrayPool() CuTypedPool{Bool}(), IdDict{DataType, Any}(), 1, # _current_depth (1 = global scope) - [UInt16(0)], # _untracked_fixed_masks: sentinel (no bits set) - [false], # _untracked_has_others: sentinel (no others) + [UInt16(0)], # _touched_type_masks: sentinel (no bits set) + [false], # _touched_has_others: sentinel (no others) CUDA.deviceid(dev) # Use public API ) end diff --git a/src/acquire.jl b/src/acquire.jl index 716517d..b0326f1 100644 --- a/src/acquire.jl +++ b/src/acquire.jl @@ -160,53 +160,54 @@ Get an N-dimensional view via `reshape` (zero creation cost). end # ============================================================================== -# Untracked Acquire Detection +# Type Touch Recording (for selective rewind) # ============================================================================== """ - _mark_untracked!(pool::AbstractArrayPool, ::Type{T}) + _record_type_touch!(pool::AbstractArrayPool, ::Type{T}) -Mark that an untracked acquire of type `T` has occurred at the current checkpoint depth. -Called by `acquire!` wrapper; macro-transformed calls use `_acquire_impl!` directly. +Record that type `T` was touched (acquired) at the current checkpoint depth. +Called by `acquire!` and convenience wrappers; macro-transformed calls use +`_acquire_impl!` directly (bypassing this for zero overhead). -For fixed-slot types, sets the corresponding bit in `_untracked_fixed_masks`. -For non-fixed-slot types, sets `_untracked_has_others` flag. +For fixed-slot types, sets the corresponding bit in `_touched_type_masks`. +For non-fixed-slot types, sets `_touched_has_others` flag. """ -@inline function _mark_untracked!(pool::AbstractArrayPool, ::Type{T}) where {T} +@inline function _record_type_touch!(pool::AbstractArrayPool, ::Type{T}) where {T} depth = pool._current_depth b = _fixed_slot_bit(T) if b == UInt16(0) - @inbounds pool._untracked_has_others[depth] = true + @inbounds pool._touched_has_others[depth] = true else - @inbounds pool._untracked_fixed_masks[depth] |= b + @inbounds pool._touched_type_masks[depth] |= b end nothing end -# CPU-specific override: adds lazy first-touch checkpoint in dynamic-selective mode +# CPU-specific override: adds lazy first-touch checkpoint in lazy mode # and typed-lazy mode. -# Bit 15 of _untracked_fixed_masks[depth] == 1 ↔ depth entered via _depth_only_checkpoint! -# Bit 14 of _untracked_fixed_masks[depth] == 1 ↔ depth entered via _typed_checkpoint_with_lazy! +# _LAZY_MODE_BIT (bit 15) in _touched_type_masks[depth] ↔ depth entered via _lazy_checkpoint! +# _TYPED_LAZY_BIT (bit 14) in _touched_type_masks[depth] ↔ depth entered via _typed_lazy_checkpoint! # On the first acquire of each fixed-slot type T at that depth, we retroactively save # n_active BEFORE the acquire (current value is still the parent's count), so that # the subsequent rewind can restore the parent's state correctly. -@inline function _mark_untracked!(pool::AdaptiveArrayPool, ::Type{T}) where {T} +@inline function _record_type_touch!(pool::AdaptiveArrayPool, ::Type{T}) where {T} depth = pool._current_depth b = _fixed_slot_bit(T) if b == UInt16(0) - @inbounds pool._untracked_has_others[depth] = true + @inbounds pool._touched_has_others[depth] = true else - current_mask = @inbounds pool._untracked_fixed_masks[depth] - # Lazy checkpoint: dynamic mode (bit 15) OR typed lazy mode (bit 14), AND first touch. + current_mask = @inbounds pool._touched_type_masks[depth] + # Lazy checkpoint: lazy mode (bit 15) OR typed lazy mode (bit 14), AND first touch. # Guard: skip if already checkpointed at this depth (prevents double-push when a - # tracked type is also acquired by a helper via acquire! → _mark_untracked!). - if (current_mask & 0xC000) != 0 && (current_mask & b) == 0 + # tracked type is also acquired by a helper via acquire! → _record_type_touch!). + if (current_mask & _MODE_BITS_MASK) != 0 && (current_mask & b) == 0 tp = get_typed_pool!(pool, T) if @inbounds(tp._checkpoint_depths[end]) != depth _checkpoint_typed_pool!(tp, depth) end end - @inbounds pool._untracked_fixed_masks[depth] = current_mask | b + @inbounds pool._touched_type_masks[depth] = current_mask | b end nothing end @@ -220,7 +221,7 @@ end _acquire_impl!(pool, Type{T}, dims...) -> ReshapedArray{T,N,...} Internal implementation of acquire!. Called directly by macro-transformed code -(no untracked marking). User code calls `acquire!` which adds marking. +(no type touch recording). User code calls `acquire!` which adds recording. """ @inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T} tp = get_typed_pool!(pool, T) @@ -263,7 +264,7 @@ end @inline _unsafe_acquire_impl!(pool::AbstractArrayPool, x::AbstractArray) = _unsafe_acquire_impl!(pool, eltype(x), size(x)) # ============================================================================== -# Acquisition API (User-facing with untracked marking) +# Acquisition API (User-facing with type touch recording) # ============================================================================== """ @@ -299,19 +300,19 @@ end See also: [`unsafe_acquire!`](@ref) for native array access. """ @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _acquire_impl!(pool, T, n) end # Multi-dimensional support (zero-allocation with N-D cache) @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _acquire_impl!(pool, T, dims...) end # Tuple support: allows acquire!(pool, T, size(A)) where size(A) returns NTuple{N,Int} @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _acquire_impl!(pool, T, dims...) end @@ -331,7 +332,7 @@ end ``` """ @inline function acquire!(pool::AbstractArrayPool, x::AbstractArray) - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _acquire_impl!(pool, eltype(x), size(x)) end @@ -386,18 +387,18 @@ end See also: [`acquire!`](@ref) for view-based access. """ @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_acquire_impl!(pool, T, n) end @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_acquire_impl!(pool, T, dims...) end # Tuple support @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_acquire_impl!(pool, T, dims) end @@ -417,7 +418,7 @@ end ``` """ @inline function unsafe_acquire!(pool::AbstractArrayPool, x::AbstractArray) - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _unsafe_acquire_impl!(pool, eltype(x), size(x)) end diff --git a/src/convenience.jl b/src/convenience.jl index 62d919f..053f8cb 100644 --- a/src/convenience.jl +++ b/src/convenience.jl @@ -43,22 +43,22 @@ end See also: [`ones!`](@ref), [`similar!`](@ref), [`acquire!`](@ref) """ @inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _zeros_impl!(pool, T, dims...) end @inline function zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _zeros_impl!(pool, default_eltype(pool), dims...) end @inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _zeros_impl!(pool, T, dims...) end @inline function zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _zeros_impl!(pool, default_eltype(pool), dims...) end @@ -116,22 +116,22 @@ end See also: [`zeros!`](@ref), [`similar!`](@ref), [`acquire!`](@ref) """ @inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _ones_impl!(pool, T, dims...) end @inline function ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _ones_impl!(pool, default_eltype(pool), dims...) end @inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _ones_impl!(pool, T, dims...) end @inline function ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _ones_impl!(pool, default_eltype(pool), dims...) end @@ -186,11 +186,11 @@ end See also: [`falses!`](@ref), [`ones!`](@ref), [`acquire!`](@ref) """ @inline function trues!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, Bit) + _record_type_touch!(pool, Bit) _trues_impl!(pool, dims...) end @inline function trues!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, Bit) + _record_type_touch!(pool, Bit) _trues_impl!(pool, dims...) end @@ -226,11 +226,11 @@ end See also: [`trues!`](@ref), [`zeros!`](@ref), [`acquire!`](@ref) """ @inline function falses!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, Bit) + _record_type_touch!(pool, Bit) _falses_impl!(pool, dims...) end @inline function falses!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, Bit) + _record_type_touch!(pool, Bit) _falses_impl!(pool, dims...) end @@ -273,22 +273,22 @@ end See also: [`zeros!`](@ref), [`ones!`](@ref), [`acquire!`](@ref) """ @inline function similar!(pool::AbstractArrayPool, x::AbstractArray) - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _similar_impl!(pool, x) end @inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _similar_impl!(pool, x, T) end @inline function similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _similar_impl!(pool, x, dims...) end @inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _similar_impl!(pool, x, T, dims...) end @@ -336,22 +336,22 @@ end See also: [`unsafe_ones!`](@ref), [`zeros!`](@ref), [`unsafe_acquire!`](@ref) """ @inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_zeros_impl!(pool, T, dims...) end @inline function unsafe_zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) end @inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_zeros_impl!(pool, T, dims...) end @inline function unsafe_zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) end @@ -403,22 +403,22 @@ end See also: [`unsafe_zeros!`](@ref), [`ones!`](@ref), [`unsafe_acquire!`](@ref) """ @inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_ones_impl!(pool, T, dims...) end @inline function unsafe_ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _unsafe_ones_impl!(pool, default_eltype(pool), dims...) end @inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_ones_impl!(pool, T, dims...) end @inline function unsafe_ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _mark_untracked!(pool, default_eltype(pool)) + _record_type_touch!(pool, default_eltype(pool)) _unsafe_ones_impl!(pool, default_eltype(pool), dims...) end @@ -473,22 +473,22 @@ end See also: [`similar!`](@ref), [`unsafe_acquire!`](@ref) """ @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray) - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _unsafe_similar_impl!(pool, x) end @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_similar_impl!(pool, x, T) end @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} - _mark_untracked!(pool, eltype(x)) + _record_type_touch!(pool, eltype(x)) _unsafe_similar_impl!(pool, x, dims...) end @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _mark_untracked!(pool, T) + _record_type_touch!(pool, T) _unsafe_similar_impl!(pool, x, T, dims...) end diff --git a/src/macros.jl b/src/macros.jl index 28fba7a..21a7599 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -337,20 +337,20 @@ function _generate_pool_code(pool_name, expr, force_enable; source::Union{LineNu # Use typed checkpoint/rewind if all types are static, otherwise fallback to full use_typed = !has_dynamic && !isempty(static_types) - # For typed path: transform acquire! → _acquire_impl! (bypasses untracked marking) - # For dynamic path: keep acquire! untransformed so _mark_untracked! is called + # For typed path: transform acquire! → _acquire_impl! (bypasses type touch recording) + # For dynamic path: keep acquire! untransformed so _record_type_touch! is called transformed_expr = use_typed ? _transform_acquire_calls(expr, pool_name) : expr if use_typed checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types) else - checkpoint_call = _generate_dynamic_selective_checkpoint_call(esc(pool_name)) + checkpoint_call = _generate_lazy_checkpoint_call(esc(pool_name)) end if use_typed rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types) else - rewind_call = _generate_dynamic_selective_rewind_call(esc(pool_name)) + rewind_call = _generate_lazy_rewind_call(esc(pool_name)) end if force_enable @@ -429,8 +429,8 @@ function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, forc local_vars = _extract_local_assignments(expr) static_types, has_dynamic = _filter_static_types(all_types, local_vars) use_typed = !has_dynamic && !isempty(static_types) - # For typed path: transform acquire! → _acquire_impl! (bypasses untracked marking) - # For dynamic path: keep acquire! untransformed so _mark_untracked! is called + # For typed path: transform acquire! → _acquire_impl! (bypasses type touch recording) + # For dynamic path: keep acquire! untransformed so _record_type_touch! is called transformed_expr = use_typed ? _transform_acquire_calls(expr, pool_name) : expr pool_getter = :($_get_pool_for_backend($(Val{backend}()))) @@ -438,8 +438,8 @@ function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, forc checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types) rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types) else - checkpoint_call = _generate_dynamic_selective_checkpoint_call(esc(pool_name)) - rewind_call = _generate_dynamic_selective_rewind_call(esc(pool_name)) + checkpoint_call = _generate_lazy_checkpoint_call(esc(pool_name)) + rewind_call = _generate_lazy_rewind_call(esc(pool_name)) end return quote @@ -475,8 +475,8 @@ function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, forc # Use typed checkpoint/rewind if all types are static, otherwise fallback to full use_typed = !has_dynamic && !isempty(static_types) - # For typed path: transform acquire! → _acquire_impl! (bypasses untracked marking) - # For dynamic path: keep acquire! untransformed so _mark_untracked! is called + # For typed path: transform acquire! → _acquire_impl! (bypasses type touch recording) + # For dynamic path: keep acquire! untransformed so _record_type_touch! is called transformed_expr = use_typed ? _transform_acquire_calls(expr, pool_name) : expr # Use Val{backend}() for compile-time dispatch - fully inlinable @@ -485,13 +485,13 @@ function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, forc if use_typed checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types) else - checkpoint_call = _generate_dynamic_selective_checkpoint_call(esc(pool_name)) + checkpoint_call = _generate_lazy_checkpoint_call(esc(pool_name)) end if use_typed rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types) else - rewind_call = _generate_dynamic_selective_rewind_call(esc(pool_name)) + rewind_call = _generate_lazy_rewind_call(esc(pool_name)) end return quote @@ -537,8 +537,8 @@ function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, f static_types, has_dynamic = _filter_static_types(all_types, local_vars) use_typed = !has_dynamic && !isempty(static_types) - # For typed path: transform acquire! → _acquire_impl! (bypasses untracked marking) - # For dynamic path: keep acquire! untransformed so _mark_untracked! is called + # For typed path: transform acquire! → _acquire_impl! (bypasses type touch recording) + # For dynamic path: keep acquire! untransformed so _record_type_touch! is called transformed_body = use_typed ? _transform_acquire_calls(body, pool_name) : body # Use Val{backend}() for compile-time dispatch @@ -547,13 +547,13 @@ function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, f if use_typed checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types) else - checkpoint_call = _generate_dynamic_selective_checkpoint_call(esc(pool_name)) + checkpoint_call = _generate_lazy_checkpoint_call(esc(pool_name)) end if use_typed rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types) else - rewind_call = _generate_dynamic_selective_rewind_call(esc(pool_name)) + rewind_call = _generate_lazy_rewind_call(esc(pool_name)) end new_body = quote @@ -594,20 +594,20 @@ function _generate_function_pool_code(pool_name, func_def, force_enable, disable static_types, has_dynamic = _filter_static_types(all_types, local_vars) use_typed = !has_dynamic && !isempty(static_types) - # For typed path: transform acquire! → _acquire_impl! (bypasses untracked marking) - # For dynamic path: keep acquire! untransformed so _mark_untracked! is called + # For typed path: transform acquire! → _acquire_impl! (bypasses type touch recording) + # For dynamic path: keep acquire! untransformed so _record_type_touch! is called transformed_body = use_typed ? _transform_acquire_calls(body, pool_name) : body if use_typed checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types) else - checkpoint_call = _generate_dynamic_selective_checkpoint_call(esc(pool_name)) + checkpoint_call = _generate_lazy_checkpoint_call(esc(pool_name)) end if use_typed rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types) else - rewind_call = _generate_dynamic_selective_rewind_call(esc(pool_name)) + rewind_call = _generate_lazy_rewind_call(esc(pool_name)) end if force_enable @@ -910,8 +910,8 @@ end Generate bitmask-aware checkpoint call. When types are known at compile time, emits a conditional: -- if untracked types ⊆ tracked types → typed checkpoint (fast path) -- otherwise → `_typed_checkpoint_with_lazy!` (typed checkpoint + set bit 14 for +- if touched types ⊆ tracked types → typed checkpoint (fast path) +- otherwise → `_typed_lazy_checkpoint!` (typed checkpoint + set bit 14 for lazy first-touch checkpointing of extra types touched by helpers) """ function _generate_typed_checkpoint_call(pool_expr, types) @@ -920,7 +920,7 @@ function _generate_typed_checkpoint_call(pool_expr, types) else escaped_types = [esc(t) for t in types] typed_call = :($checkpoint!($pool_expr, $(escaped_types...))) - lazy_call = :($_typed_checkpoint_with_lazy!($pool_expr, $(escaped_types...))) + lazy_call = :($_typed_lazy_checkpoint!($pool_expr, $(escaped_types...))) return quote if $_can_use_typed_path($pool_expr, $_tracked_mask_for_types($(escaped_types...))) $typed_call @@ -936,8 +936,8 @@ end Generate bitmask-aware rewind call. When types are known at compile time, emits a conditional: -- if untracked types ⊆ tracked types → typed rewind (fast path) -- otherwise → `_typed_selective_rewind!` (rewinds tracked | untracked mask; +- if touched types ⊆ tracked types → typed rewind (fast path) +- otherwise → `_typed_lazy_rewind!` (rewinds tracked | touched mask; all touched types have Case A checkpoints via bit 14 lazy mode) """ function _generate_typed_rewind_call(pool_expr, types) @@ -946,7 +946,7 @@ function _generate_typed_rewind_call(pool_expr, types) else escaped_types = [esc(t) for t in types] typed_call = :($rewind!($pool_expr, $(escaped_types...))) - selective_call = :($_typed_selective_rewind!($pool_expr, + selective_call = :($_typed_lazy_rewind!($pool_expr, $_tracked_mask_for_types($(escaped_types...)))) return quote if $_can_use_typed_path($pool_expr, $_tracked_mask_for_types($(escaped_types...))) @@ -959,25 +959,25 @@ function _generate_typed_rewind_call(pool_expr, types) end """ - _generate_dynamic_selective_checkpoint_call(pool_expr) + _generate_lazy_checkpoint_call(pool_expr) Generate a depth-only checkpoint call for dynamic-selective mode (`use_typed=false`). Much lighter than full `checkpoint!`: only increments depth and pushes bitmask sentinels. """ -function _generate_dynamic_selective_checkpoint_call(pool_expr) - return :($_depth_only_checkpoint!($pool_expr)) +function _generate_lazy_checkpoint_call(pool_expr) + return :($_lazy_checkpoint!($pool_expr)) end """ - _generate_dynamic_selective_rewind_call(pool_expr) + _generate_lazy_rewind_call(pool_expr) Generate selective rewind code for dynamic-selective mode (`use_typed=false`). -Delegates to `_dynamic_selective_rewind!` — a single function call, symmetric -with `_depth_only_checkpoint!` for checkpoint. This avoids `let`-block overhead +Delegates to `_lazy_rewind!` — a single function call, symmetric +with `_lazy_checkpoint!` for checkpoint. This avoids `let`-block overhead in `finally` clauses (which can impair Julia's type inference and cause boxing). """ -function _generate_dynamic_selective_rewind_call(pool_expr) - return :($_dynamic_selective_rewind!($pool_expr)) +function _generate_lazy_rewind_call(pool_expr) + return :($_lazy_rewind!($pool_expr)) end @@ -991,7 +991,7 @@ end Transform acquire!/unsafe_acquire!/convenience function calls to their _impl! counterparts. Only transforms calls where the first argument matches `pool_name`. -This allows macro-transformed code to bypass the untracked marking overhead, +This allows macro-transformed code to bypass the type touch recording overhead, since the macro already knows about these calls at compile time. Transformation rules: diff --git a/src/state.jl b/src/state.jl index 119319c..86b27d9 100644 --- a/src/state.jl +++ b/src/state.jl @@ -13,10 +13,10 @@ After warmup, this function has **zero allocation**. See also: [`rewind!`](@ref), [`@with_pool`](@ref) """ function checkpoint!(pool::AdaptiveArrayPool) - # Increment depth and initialize untracked bitmask state + # Increment depth and initialize type touch tracking state pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) depth = pool._current_depth # Fixed slots - zero allocation via @generated iteration @@ -38,14 +38,14 @@ end Save state for a specific type only. Used by optimized macros that know which types will be used at compile time. -Also updates _current_depth and bitmask state for untracked acquire detection. +Also updates _current_depth and bitmask state for type touch tracking. ~77% faster than full checkpoint! when only one type is used. """ @inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where T pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) _checkpoint_typed_pool!(get_typed_pool!(pool, T), pool._current_depth) nothing end @@ -69,8 +69,8 @@ compile-time unrolling. Increments _current_depth once for all types. checkpoint_exprs = [:(_checkpoint_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices] quote pool._current_depth += 1 - push!(pool._untracked_fixed_masks, UInt16(0)) - push!(pool._untracked_has_others, false) + push!(pool._touched_type_masks, UInt16(0)) + push!(pool._touched_has_others, false) $(checkpoint_exprs...) nothing end @@ -84,14 +84,14 @@ end end """ - _depth_only_checkpoint!(pool::AdaptiveArrayPool) + _lazy_checkpoint!(pool::AdaptiveArrayPool) -Lightweight checkpoint for dynamic-selective mode (`use_typed=false` macro path). +Lightweight checkpoint for lazy mode (`use_typed=false` macro path). Increments `_current_depth` and pushes bitmask sentinels — but does **not** save -`n_active` for any fixed-slot typed pool. The mode flag (bit 15) in -`_untracked_fixed_masks` marks this depth as dynamic-selective so that -`_mark_untracked!` can trigger lazy first-touch checkpoints. +`n_active` for any fixed-slot typed pool. The `_LAZY_MODE_BIT` (bit 15) in +`_touched_type_masks` marks this depth as lazy mode so that +`_record_type_touch!` can trigger lazy first-touch checkpoints. Existing `others` entries are eagerly checkpointed since there is no per-type tracking for non-fixed-slot pools; Case B in `_rewind_typed_pool!` handles any @@ -99,17 +99,17 @@ new `others` entries created during the scope (n_active starts at 0 = sentinel). Performance: ~2ns vs ~540ns for full `checkpoint!`. """ -@inline function _depth_only_checkpoint!(pool::AdaptiveArrayPool) +@inline function _lazy_checkpoint!(pool::AdaptiveArrayPool) pool._current_depth += 1 - # Bit 15 = dynamic-selective mode flag (bits 0–7 are fixed-slot bits) - push!(pool._untracked_fixed_masks, UInt16(0x8000)) - push!(pool._untracked_has_others, false) + # _LAZY_MODE_BIT = lazy mode flag (bits 0–7 are fixed-slot type bits) + push!(pool._touched_type_masks, _LAZY_MODE_BIT) + push!(pool._touched_has_others, false) depth = pool._current_depth # Eagerly checkpoint any pre-existing others entries. # New others types created during the scope start at n_active=0 (sentinel covers them). for p in values(pool.others) _checkpoint_typed_pool!(p, depth) - @inbounds pool._untracked_has_others[depth] = true + @inbounds pool._touched_has_others[depth] = true end nothing end @@ -125,7 +125,7 @@ Restore the pool state (n_active counters) from internal stacks. Uses _checkpoint_depths to accurately determine which entries to pop vs restore. Only the counters are restored; allocated memory remains for reuse. -Handles untracked acquires by checking _checkpoint_depths for accurate restoration. +Handles touched types by checking _checkpoint_depths for accurate restoration. **Safety**: If called at global scope (depth=1, no pending checkpoints), automatically delegates to `reset!` to safely clear all n_active counters. @@ -152,8 +152,8 @@ function rewind!(pool::AdaptiveArrayPool) _rewind_typed_pool!(tp, cur_depth) end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 return nothing @@ -172,8 +172,8 @@ Also updates _current_depth and bitmask state. return nothing end _rewind_typed_pool!(get_typed_pool!(pool, T), pool._current_depth) - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -203,8 +203,8 @@ Decrements _current_depth once after all types are rewound. return nothing end $(rewind_exprs...) - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -239,89 +239,89 @@ end end """ - _dynamic_selective_rewind!(pool::AdaptiveArrayPool) + _lazy_rewind!(pool::AdaptiveArrayPool) -Complete rewind for dynamic-selective mode (`use_typed=false` macro path). +Complete rewind for lazy mode (`use_typed=false` macro path). Reads the combined mask at the current depth, rewinds only the fixed-slot pools whose bits are set, handles any `others` entries, then pops the depth metadata. Called directly from the macro-generated `finally` clause as a single function call -(matching the structure of `_depth_only_checkpoint!` for symmetry and performance). +(matching the structure of `_lazy_checkpoint!` for symmetry and performance). """ -@inline function _dynamic_selective_rewind!(pool::AdaptiveArrayPool) +@inline function _lazy_rewind!(pool::AdaptiveArrayPool) d = pool._current_depth - bits = @inbounds(pool._untracked_fixed_masks[d]) & UInt16(0x00FF) + bits = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK _selective_rewind_fixed_slots!(pool, bits) - if @inbounds(pool._untracked_has_others[d]) + if @inbounds(pool._touched_has_others[d]) for tp in values(pool.others) _rewind_typed_pool!(tp, d) end end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end """ - _typed_checkpoint_with_lazy!(pool::AdaptiveArrayPool, types::Type...) + _typed_lazy_checkpoint!(pool::AdaptiveArrayPool, types::Type...) Typed checkpoint that enables lazy first-touch checkpointing for extra types touched by helpers (`use_typed=true`, `_can_use_typed_path=false` path). Calls `checkpoint!(pool, types...)` (checkpoints only the statically-known types), -then sets bit 14 (`0x4000`) in `_untracked_fixed_masks[depth]` to signal typed lazy mode. +then sets `_TYPED_LAZY_BIT` (bit 14) in `_touched_type_masks[depth]` to signal typed lazy mode. -`_mark_untracked!` checks `(mask & 0xC000) != 0` (bit 14 OR bit 15) to trigger a +`_record_type_touch!` checks `(mask & _MODE_BITS_MASK) != 0` (bit 14 OR bit 15) to trigger a lazy first-touch checkpoint for each extra type on first acquire, ensuring Case A (not Case B) applies at rewind and parent `n_active` is preserved correctly. """ -@inline function _typed_checkpoint_with_lazy!(pool::AdaptiveArrayPool, types::Type...) +@inline function _typed_lazy_checkpoint!(pool::AdaptiveArrayPool, types::Type...) checkpoint!(pool, types...) d = pool._current_depth - @inbounds pool._untracked_fixed_masks[d] |= UInt16(0x4000) # set bit 14 - # Eagerly snapshot pre-existing others entries — mirrors _depth_only_checkpoint!. - # _mark_untracked! cannot lazy-checkpoint others types (b==0 branch, no per-type bit). + @inbounds pool._touched_type_masks[d] |= _TYPED_LAZY_BIT + # Eagerly snapshot pre-existing others entries — mirrors _lazy_checkpoint!. + # _record_type_touch! cannot lazy-checkpoint others types (b==0 branch, no per-type bit). # Without this, a helper that re-acquires an already-active others type triggers Case B # at rewind and restores the wrong parent n_active value. # - # Also set has_others=true when pool.others is non-empty, so _typed_selective_rewind! + # Also set has_others=true when pool.others is non-empty, so _typed_lazy_rewind! # enters the others loop even for tracked non-fixed-slot types (e.g. CPU Float16) that - # used _acquire_impl! (bypassing _mark_untracked!, leaving has_others=false otherwise). + # used _acquire_impl! (bypassing _record_type_touch!, leaving has_others=false otherwise). # Skip re-snapshot for entries already checkpointed at d by checkpoint!(pool, types...) # (e.g. Float16 in types... was just checkpointed above — avoid double-push). for p in values(pool.others) if @inbounds(p._checkpoint_depths[end]) != d _checkpoint_typed_pool!(p, d) end - @inbounds pool._untracked_has_others[d] = true + @inbounds pool._touched_has_others[d] = true end nothing end """ - _typed_selective_rewind!(pool::AdaptiveArrayPool, tracked_mask::UInt16) + _typed_lazy_rewind!(pool::AdaptiveArrayPool, tracked_mask::UInt16) Selective rewind for typed mode (`use_typed=true`) fallback path. Called when `_can_use_typed_path` returns false (helpers touched types beyond the statically-tracked set). Rewinds only pools whose bits are set in -`tracked_mask | untracked_mask`. All touched types have Case A checkpoints, -guaranteed by the bit 14 lazy mode set in `_typed_checkpoint_with_lazy!`. +`tracked_mask | touched_mask`. All touched types have Case A checkpoints, +guaranteed by the `_TYPED_LAZY_BIT` mode set in `_typed_lazy_checkpoint!`. """ -@inline function _typed_selective_rewind!(pool::AdaptiveArrayPool, tracked_mask::UInt16) +@inline function _typed_lazy_rewind!(pool::AdaptiveArrayPool, tracked_mask::UInt16) d = pool._current_depth - untracked = @inbounds(pool._untracked_fixed_masks[d]) & UInt16(0x00FF) - combined = tracked_mask | untracked + touched = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK + combined = tracked_mask | touched _selective_rewind_fixed_slots!(pool, combined) - if @inbounds(pool._untracked_has_others[d]) + if @inbounds(pool._touched_has_others[d]) for tp in values(pool.others) _rewind_typed_pool!(tp, d) end end - pop!(pool._untracked_fixed_masks) - pop!(pool._untracked_has_others) + pop!(pool._touched_type_masks) + pop!(pool._touched_has_others) pool._current_depth -= 1 nothing end @@ -333,7 +333,7 @@ Rewind only the fixed-slot typed pools whose bits are set in `mask`. Each of the 8 fixed-slot pools maps to bits 0–7 (same encoding as `_fixed_slot_bit`). Bits 8–15 (mode flags) are **not** checked here — callers must strip them -before passing the mask (e.g. `mask & UInt16(0x00FF)`). +before passing the mask (e.g. `mask & _TYPE_BITS_MASK`). Unset bits are skipped entirely: for pools that were acquired without a matching checkpoint, `_rewind_typed_pool!` Case B safely restores from the parent checkpoint. @@ -432,12 +432,12 @@ function Base.empty!(pool::AdaptiveArrayPool) end empty!(pool.others) - # Reset untracked detection state (1-based sentinel pattern) + # Reset type touch tracking state (1-based sentinel pattern) pool._current_depth = 1 # 1 = global scope (sentinel) - empty!(pool._untracked_fixed_masks) - push!(pool._untracked_fixed_masks, UInt16(0)) # Sentinel: no bits set - empty!(pool._untracked_has_others) - push!(pool._untracked_has_others, false) # Sentinel: no others + empty!(pool._touched_type_masks) + push!(pool._touched_type_masks, UInt16(0)) # Sentinel: no bits set + empty!(pool._touched_has_others) + push!(pool._touched_has_others, false) # Sentinel: no others return pool end @@ -470,7 +470,7 @@ Reset pool state without clearing allocated storage. This function: - Resets all `n_active` counters to 0 - Restores all checkpoint stacks to sentinel state -- Resets `_current_depth` and untracked bitmask state +- Resets `_current_depth` and type touch tracking state Unlike `empty!`, this **preserves** all allocated vectors, views, and N-D arrays for reuse, avoiding reallocation costs. @@ -513,12 +513,12 @@ function reset!(pool::AdaptiveArrayPool) reset!(tp) end - # Reset untracked detection state (1-based sentinel pattern) + # Reset type touch tracking state (1-based sentinel pattern) pool._current_depth = 1 # 1 = global scope (sentinel) - empty!(pool._untracked_fixed_masks) - push!(pool._untracked_fixed_masks, UInt16(0)) # Sentinel: no bits set - empty!(pool._untracked_has_others) - push!(pool._untracked_has_others, false) # Sentinel: no others + empty!(pool._touched_type_masks) + push!(pool._touched_type_masks, UInt16(0)) # Sentinel: no bits set + empty!(pool._touched_has_others) + push!(pool._touched_has_others, false) # Sentinel: no others return pool end @@ -579,17 +579,17 @@ end Check if the typed (fast) checkpoint/rewind path is safe to use. -Returns `true` when all untracked acquires at the current depth are a subset -of the tracked types (bitmask subset check) AND no non-fixed-slot types were used. +Returns `true` when all touched types at the current depth are a subset +of the tracked types (bitmask subset check) AND no non-fixed-slot types were touched. -The subset check: `(untracked_mask & ~tracked_mask) == 0` means every bit set -in `untracked_mask` is also set in `tracked_mask`. +The subset check: `(touched_mask & ~tracked_mask) == 0` means every bit set +in `touched_mask` is also set in `tracked_mask`. """ @inline function _can_use_typed_path(pool::AbstractArrayPool, tracked_mask::UInt16) depth = pool._current_depth - untracked_mask = @inbounds pool._untracked_fixed_masks[depth] - has_others = @inbounds pool._untracked_has_others[depth] - return (untracked_mask & ~tracked_mask) == UInt16(0) && !has_others + touched_mask = @inbounds(pool._touched_type_masks[depth]) & _TYPE_BITS_MASK + has_others = @inbounds pool._touched_has_others[depth] + return (touched_mask & ~tracked_mask) == UInt16(0) && !has_others end # ============================================================================== diff --git a/src/types.jl b/src/types.jl index 72b7c1e..847c1d9 100644 --- a/src/types.jl +++ b/src/types.jl @@ -367,7 +367,18 @@ Tests verify synchronization automatically. const FIXED_SLOT_FIELDS = (:float64, :float32, :int64, :int32, :complexf64, :complexf32, :bool, :bits) # ============================================================================== -# Fixed-Slot Bit Mapping (for typed untracked tracking) +# Bitmask Mode Constants +# ============================================================================== +# Bits 0-7: fixed-slot type touch tracking (one bit per type) +# Bits 14-15: mode flags set during checkpoint to control lazy behavior + +const _LAZY_MODE_BIT = UInt16(0x8000) # bit 15: lazy (dynamic-selective) checkpoint mode +const _TYPED_LAZY_BIT = UInt16(0x4000) # bit 14: typed lazy-fallback mode +const _MODE_BITS_MASK = UInt16(0xC000) # bits 14-15: all mode flags +const _TYPE_BITS_MASK = UInt16(0x00FF) # bits 0-7: fixed-slot type bits + +# ============================================================================== +# Fixed-Slot Bit Mapping (for type touch tracking) # ============================================================================== # Maps each fixed-slot type to a unique bit in a UInt16 bitmask. # Bit ordering matches FIXED_SLOT_FIELDS. Non-fixed types return UInt16(0). @@ -382,7 +393,7 @@ const FIXED_SLOT_FIELDS = (:float64, :float32, :int64, :int32, :complexf64, :com @inline _fixed_slot_bit(::Type{Bit}) = UInt16(1) << 7 @inline _fixed_slot_bit(::Type) = UInt16(0) # non-fixed-slot → triggers has_others -# Check whether a type's bit is set in a bitmask (e.g. _untracked_fixed_masks or combined). +# Check whether a type's bit is set in a bitmask (e.g. _touched_type_masks or combined). @inline _has_bit(mask::UInt16, ::Type{T}) where {T} = (mask & _fixed_slot_bit(T)) != 0 # ============================================================================== @@ -409,10 +420,10 @@ mutable struct AdaptiveArrayPool <: AbstractArrayPool # Fallback: rare types others::IdDict{DataType, Any} - # Untracked acquire detection (1-based sentinel pattern) + # Type touch tracking (1-based sentinel pattern) _current_depth::Int # Current scope depth (1 = global scope) - _untracked_fixed_masks::Vector{UInt16} # Per-depth: which fixed slots had untracked acquires - _untracked_has_others::Vector{Bool} # Per-depth: any non-fixed-slot untracked acquire? + _touched_type_masks::Vector{UInt16} # Per-depth: which fixed slots were touched + mode flags + _touched_has_others::Vector{Bool} # Per-depth: any non-fixed-slot type touched? end function AdaptiveArrayPool() @@ -427,8 +438,8 @@ function AdaptiveArrayPool() BitTypedPool(), IdDict{DataType, Any}(), 1, # _current_depth: 1 = global scope (sentinel) - [UInt16(0)], # _untracked_fixed_masks: sentinel (no bits set) - [false] # _untracked_has_others: sentinel (no others) + [UInt16(0)], # _touched_type_masks: sentinel (no bits set) + [false] # _touched_has_others: sentinel (no others) ) end diff --git a/test/test_backend_macro_expansion.jl b/test/test_backend_macro_expansion.jl index f6bd192..9721cb0 100644 --- a/test/test_backend_macro_expansion.jl +++ b/test/test_backend_macro_expansion.jl @@ -58,8 +58,8 @@ @test occursin("_get_pool_for_backend", expr_str) @test occursin("Val{:cuda}", expr_str) # Empty body → use_typed=false → dynamic selective mode - @test occursin("_depth_only_checkpoint!", expr_str) - @test occursin("_dynamic_selective_rewind!", expr_str) + @test occursin("_lazy_checkpoint!", expr_str) + @test occursin("_lazy_rewind!", expr_str) end @testset "Type extraction" begin diff --git a/test/test_macro_expansion.jl b/test/test_macro_expansion.jl index dd8ef8a..91cdc33 100644 --- a/test/test_macro_expansion.jl +++ b/test/test_macro_expansion.jl @@ -102,8 +102,8 @@ # Should still have pool management (with gensym name). # Empty body → no acquire types → use_typed=false → dynamic selective mode. @test occursin("get_task_local_pool", expr_str) - @test occursin("_depth_only_checkpoint!", expr_str) - @test occursin("_dynamic_selective_rewind!", expr_str) + @test occursin("_lazy_checkpoint!", expr_str) + @test occursin("_lazy_rewind!", expr_str) end # Test @maybe_with_pool 1-arg @@ -149,9 +149,9 @@ expr_str = string(expr) # local_arr is detected as local → falls back to dynamic selective mode. - # Checkpoint is lightweight (_depth_only_checkpoint!), rewind is selective. - @test occursin("_depth_only_checkpoint!", expr_str) - @test occursin("_dynamic_selective_rewind!", expr_str) + # Checkpoint is lightweight (_lazy_checkpoint!), rewind is selective. + @test occursin("_lazy_checkpoint!", expr_str) + @test occursin("_lazy_rewind!", expr_str) # In dynamic mode acquire! is NOT transformed to _acquire_impl! @test !occursin("_acquire_impl!", expr_str) end @@ -789,9 +789,9 @@ end @testset "Dynamic selective mode: macro expansion" begin - @testset "use_typed=false generates _depth_only_checkpoint! (dynamic selective)" begin + @testset "use_typed=false generates _lazy_checkpoint! (dynamic selective)" begin # Phase 3: when the macro cannot extract static types (local var), it uses - # _depth_only_checkpoint! instead of a full checkpoint of all 8 slots. + # _lazy_checkpoint! instead of a full checkpoint of all 8 slots. expr = @macroexpand @with_pool pool begin local_arr = rand(10) v = acquire!(pool, local_arr) # eltype(local_arr) is dynamic → use_typed=false @@ -801,13 +801,13 @@ end expr_str = string(expr) # Phase 3 behavior: depth-only checkpoint, selective rewind - @test occursin("_depth_only_checkpoint!", expr_str) + @test occursin("_lazy_checkpoint!", expr_str) @test !occursin("_can_use_typed_path", expr_str) # only in typed path end @testset "use_typed=false does NOT transform acquire! → _acquire_impl! (dynamic mode)" begin # Phase 3: _transform_acquire_calls is skipped for dynamic-selective mode. - # acquire! stays as-is so _mark_untracked! is called and the selective rewind + # acquire! stays as-is so _record_type_touch! is called and the selective rewind # can see which types were actually touched. expr = @macroexpand @with_pool pool begin local_arr = rand(10) @@ -838,8 +838,8 @@ end # RED tests: desired macro behavior after Phase 3. # —————————————————————————————————————————————————————————————— - @testset "GREEN: use_typed=false uses _depth_only_checkpoint!" begin - # Phase 3 complete: dynamic path emits _depth_only_checkpoint! instead of + @testset "GREEN: use_typed=false uses _lazy_checkpoint!" begin + # Phase 3 complete: dynamic path emits _lazy_checkpoint! instead of # the full checkpoint!(pool). This avoids the ~540ns full checkpoint cost. expr = @macroexpand @with_pool pool begin local_arr = rand(10) @@ -849,13 +849,13 @@ end expr_str = string(expr) - @test occursin("_depth_only_checkpoint!", expr_str) + @test occursin("_lazy_checkpoint!", expr_str) # Full (eager) checkpoint must NOT appear; depth-only is the entry point @test !occursin("AdaptiveArrayPools.checkpoint!", expr_str) end - @testset "GREEN: use_typed=false uses _dynamic_selective_rewind!" begin - # Phase 3 complete: dynamic rewind path uses _dynamic_selective_rewind!, + @testset "GREEN: use_typed=false uses _lazy_rewind!" begin + # Phase 3 complete: dynamic rewind path uses _lazy_rewind!, # which selectively rewinds only typed pools that were actually touched. expr = @macroexpand @with_pool pool begin local_arr = rand(10) @@ -865,7 +865,7 @@ end expr_str = string(expr) - @test occursin("_dynamic_selective_rewind!", expr_str) + @test occursin("_lazy_rewind!", expr_str) # Full rewind must NOT appear; selective rewind is the only rewind call @test !occursin("AdaptiveArrayPools.rewind!", expr_str) end @@ -874,9 +874,9 @@ end # Phase 5: Typed-Fallback Optimization expansion tests (RED) # ========================================================================= - @testset "Phase 5: use_typed=true false-branch emits _typed_checkpoint_with_lazy!" begin + @testset "Phase 5: use_typed=true false-branch emits _typed_lazy_checkpoint!" begin # After Phase 5: when _can_use_typed_path=false at runtime, the checkpoint - # side calls _typed_checkpoint_with_lazy! instead of full checkpoint!(pool). + # side calls _typed_lazy_checkpoint! instead of full checkpoint!(pool). expr = @macroexpand @with_pool pool begin v = acquire!(pool, Float64, 10) # static type Float64 → use_typed=true v .= 1.0 @@ -884,13 +884,13 @@ end expr_str = string(expr) # Phase 5: else-branch uses lazy checkpoint - @test occursin("_typed_checkpoint_with_lazy!", expr_str) + @test occursin("_typed_lazy_checkpoint!", expr_str) # Full no-arg checkpoint!(pool) must NOT appear @test !occursin("AdaptiveArrayPools.checkpoint!(pool)", expr_str) end - @testset "Phase 5: use_typed=true false-branch emits _typed_selective_rewind!" begin - # After Phase 5: the rewind else-branch uses _typed_selective_rewind! instead of full rewind!(pool). + @testset "Phase 5: use_typed=true false-branch emits _typed_lazy_rewind!" begin + # After Phase 5: the rewind else-branch uses _typed_lazy_rewind! instead of full rewind!(pool). expr = @macroexpand @with_pool pool begin v = acquire!(pool, Float64, 10) v .= 1.0 @@ -898,7 +898,7 @@ end expr_str = string(expr) # Phase 5: else-branch uses selective rewind - @test occursin("_typed_selective_rewind!", expr_str) + @test occursin("_typed_lazy_rewind!", expr_str) # Full no-arg rewind!(pool) must NOT appear @test !occursin("AdaptiveArrayPools.rewind!(pool)", expr_str) end diff --git a/test/test_macro_internals.jl b/test/test_macro_internals.jl index e0ccf7b..ca3faeb 100644 --- a/test/test_macro_internals.jl +++ b/test/test_macro_internals.jl @@ -6,8 +6,8 @@ # to ensure correct type extraction and filtering for optimized checkpoint/rewind. import AdaptiveArrayPools: _extract_local_assignments, _filter_static_types, _extract_acquire_types, _uses_local_var -import AdaptiveArrayPools: _depth_only_checkpoint!, _dynamic_selective_rewind! -import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind!, _tracked_mask_for_types +import AdaptiveArrayPools: _lazy_checkpoint!, _lazy_rewind! +import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracked_mask_for_types @testset "Macro Internals" begin @@ -1420,9 +1420,9 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # ========================================================================== # Dynamic selective mode: runtime correctness - # Phase 3: ensure n_active == 0 after _dynamic_selective_rewind! exits scope. + # Phase 3: ensure n_active == 0 after _lazy_rewind! exits scope. # - # NOTE: Uses _depth_only_checkpoint! + _dynamic_selective_rewind! directly + # NOTE: Uses _lazy_checkpoint! + _lazy_rewind! directly # with explicit fresh AdaptiveArrayPool() instances to avoid task-local pool # contamination from other tests. This mirrors what the macro generates for # the use_typed=false path, testing the state layer in isolation. @@ -1432,49 +1432,49 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @testset "Single type (Float64): n_active restored after dynamic scope" begin # Simulates: @with_pool pool begin; v = acquire!(pool, eltype(arr), 10); end - # where arr is a local var → macro emits _depth_only_checkpoint! + - # _dynamic_selective_rewind! (no _acquire_impl! transformation). + # where arr is a local var → macro emits _lazy_checkpoint! + + # _lazy_rewind! (no _acquire_impl! transformation). pool = AdaptiveArrayPool() local_arr = rand(Float64, 10) - _depth_only_checkpoint!(pool) + _lazy_checkpoint!(pool) try - v = acquire!(pool, eltype(local_arr), 10) # _mark_untracked!(pool, Float64) + v = acquire!(pool, eltype(local_arr), 10) # _record_type_touch!(pool, Float64) v .= 1.0 @test pool.float64.n_active == 1 finally - _dynamic_selective_rewind!(pool) + _lazy_rewind!(pool) end @test pool.float64.n_active == 0 end @testset "similar!(pool, Float32 ref): n_active restored after dynamic scope" begin - # similar! calls _mark_untracked!(pool, eltype(ref)) directly, so the + # similar! calls _record_type_touch!(pool, eltype(ref)) directly, so the # dynamic selective rewind sees the type even without acquire! wrapping. pool = AdaptiveArrayPool() ref = rand(Float32, 5, 5) - _depth_only_checkpoint!(pool) + _lazy_checkpoint!(pool) try - m = similar!(pool, ref) # _mark_untracked!(pool, Float32) + _acquire_impl! + m = similar!(pool, ref) # _record_type_touch!(pool, Float32) + _acquire_impl! m .= 0.0f0 @test pool.float32.n_active == 1 finally - _dynamic_selective_rewind!(pool) + _lazy_rewind!(pool) end @test pool.float32.n_active == 0 end @testset "Mixed types (Float64 + Float32): both n_active restored" begin # Simulates dynamic-mode block with two types: macro does NOT transform - # acquire! calls, so _mark_untracked! is called for each type via acquire!. + # acquire! calls, so _record_type_touch! is called for each type via acquire!. pool = AdaptiveArrayPool() local_arr = rand(Float32, 8) - _depth_only_checkpoint!(pool) + _lazy_checkpoint!(pool) try - v1 = acquire!(pool, Float64, 10) # _mark_untracked!(pool, Float64) - v2 = acquire!(pool, eltype(local_arr), 8) # _mark_untracked!(pool, Float32) + v1 = acquire!(pool, Float64, 10) # _record_type_touch!(pool, Float64) + v2 = acquire!(pool, eltype(local_arr), 8) # _record_type_touch!(pool, Float32) v1 .= 0.0; v2 .= 0.0f0 finally - _dynamic_selective_rewind!(pool) + _lazy_rewind!(pool) end @test pool.float64.n_active == 0 @test pool.float32.n_active == 0 @@ -1484,26 +1484,26 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Inner scope must only rewind its own depth entry, leaving the parent # scope's n_active intact until the outer scope calls its own rewind. pool = AdaptiveArrayPool() - _depth_only_checkpoint!(pool) # outer scope, depth 2 + _lazy_checkpoint!(pool) # outer scope, depth 2 try outer_v = acquire!(pool, Float64, 10) # lazy checkpoint for float64 outer_v .= 3.14 @test pool.float64.n_active == 1 - _depth_only_checkpoint!(pool) # inner scope, depth 3 + _lazy_checkpoint!(pool) # inner scope, depth 3 try inner_v = acquire!(pool, Float64, 5) # lazy checkpoint (first touch at depth 3) inner_v .= 0.0 @test all(outer_v .== 3.14) # parent array must survive @test pool.float64.n_active == 2 finally - _dynamic_selective_rewind!(pool) # inner rewind: depth 3 → 2 + _lazy_rewind!(pool) # inner rewind: depth 3 → 2 end @test all(outer_v .== 3.14) # outer_v survives inner rewind @test pool.float64.n_active == 1 # only outer_v remains finally - _dynamic_selective_rewind!(pool) # outer rewind: depth 2 → 1 + _lazy_rewind!(pool) # outer rewind: depth 2 → 1 end @test pool.float64.n_active == 0 end @@ -1513,12 +1513,12 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # (NOT pool.bits, which is for BitArrays acquired via acquire!(pool, Bit, ...)) pool = AdaptiveArrayPool() ref_bv = trues(64) # BitVector, eltype = Bool - _depth_only_checkpoint!(pool) + _lazy_checkpoint!(pool) try - v = similar!(pool, ref_bv) # _mark_untracked!(pool, Bool) + v = similar!(pool, ref_bv) # _record_type_touch!(pool, Bool) v .= false finally - _dynamic_selective_rewind!(pool) + _lazy_rewind!(pool) end @test pool.bool.n_active == 0 end @@ -1545,8 +1545,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test pool.int64.n_active == 1 # Child scope: typed lazy checkpoint (Float64 tracked, but helper touches Int64) - # Simulates: _can_use_typed_path=false, macro emits _typed_checkpoint_with_lazy! - _typed_checkpoint_with_lazy!(pool, Float64) + # Simulates: _can_use_typed_path=false, macro emits _typed_lazy_checkpoint! + _typed_lazy_checkpoint!(pool, Float64) try child_float = acquire!(pool, Float64, 5) _phase5_extra_int64_helper!(pool) # touches Int64 (untracked in child) @@ -1554,7 +1554,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test pool.float64.n_active >= 1 finally tracked_mask = _tracked_mask_for_types(Float64) - _typed_selective_rewind!(pool, tracked_mask) + _typed_lazy_rewind!(pool, tracked_mask) end # Parent's Int64 must be intact (= 1) diff --git a/test/test_state.jl b/test/test_state.jl index d8f77fb..a0f67b3 100644 --- a/test/test_state.jl +++ b/test/test_state.jl @@ -1,5 +1,5 @@ # Phase 5 internal functions used in tests below -import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind!, _tracked_mask_for_types +import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracked_mask_for_types @testset "State Management" begin @@ -309,8 +309,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind reset!(pool) @test pool._current_depth == 1 - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] @test pool.float64._checkpoint_n_active == [0] # Sentinel only @test pool.float64._checkpoint_depths == [0] # Sentinel only end @@ -497,7 +497,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind rewind!(pool) @test pool.float64.n_active == 0 @test pool._current_depth == 1 - @test pool._untracked_fixed_masks == [UInt16(0)] + @test pool._touched_type_masks == [UInt16(0)] end @testset "rewind! after reset!" begin @@ -753,7 +753,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind v_parent = acquire!(pool, Int64, 10) v_parent .= 42 # Initialize @test pool.int64.n_active == 1 - @test pool._untracked_fixed_masks[1] == AdaptiveArrayPools._fixed_slot_bit(Int64) + @test pool._touched_type_masks[1] == AdaptiveArrayPools._fixed_slot_bit(Int64) # Enter @with_pool - full checkpoint protects parent's Int64 arrays @with_pool pool begin @@ -779,7 +779,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind v_parent = acquire!(pool, Int32, 7) v_parent .= Int32(123) @test pool.int32.n_active == 1 - @test pool._untracked_fixed_masks[1] == AdaptiveArrayPools._fixed_slot_bit(Int32) + @test pool._touched_type_masks[1] == AdaptiveArrayPools._fixed_slot_bit(Int32) # Helper for Int32 function int32_helper(p) @@ -830,7 +830,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() # No global untracked acquire - @test pool._untracked_fixed_masks[1] == UInt16(0) + @test pool._touched_type_masks[1] == UInt16(0) # Checkpoint/rewind with typed - should work normally checkpoint!(pool) @@ -1005,7 +1005,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test pool.bool.n_active == 0 @test pool.complexf64.n_active == 0 @test pool._current_depth == 1 - @test pool._untracked_fixed_masks == [UInt16(0)] + @test pool._touched_type_masks == [UInt16(0)] empty!(pool) end @@ -1329,14 +1329,14 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() # New fields exist - @test hasfield(AdaptiveArrayPool, :_untracked_fixed_masks) - @test hasfield(AdaptiveArrayPool, :_untracked_has_others) + @test hasfield(AdaptiveArrayPool, :_touched_type_masks) + @test hasfield(AdaptiveArrayPool, :_touched_has_others) # Sentinel values at depth=1 (global scope) - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] - @test length(pool._untracked_fixed_masks) == 1 - @test length(pool._untracked_has_others) == 1 + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] + @test length(pool._touched_type_masks) == 1 + @test length(pool._touched_has_others) == 1 end @testset "Bitmask metadata: checkpoint! pushes sentinels" begin @@ -1344,15 +1344,15 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Full checkpoint checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 2 - @test length(pool._untracked_has_others) == 2 - @test pool._untracked_fixed_masks[2] == UInt16(0) - @test pool._untracked_has_others[2] == false + @test length(pool._touched_type_masks) == 2 + @test length(pool._touched_has_others) == 2 + @test pool._touched_type_masks[2] == UInt16(0) + @test pool._touched_has_others[2] == false # Another checkpoint checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 3 - @test length(pool._untracked_has_others) == 3 + @test length(pool._touched_type_masks) == 3 + @test length(pool._touched_has_others) == 3 # Cleanup rewind!(pool) @@ -1364,18 +1364,18 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Single-type checkpoint checkpoint!(pool, Float64) - @test length(pool._untracked_fixed_masks) == 2 - @test length(pool._untracked_has_others) == 2 - @test pool._untracked_fixed_masks[2] == UInt16(0) - @test pool._untracked_has_others[2] == false + @test length(pool._touched_type_masks) == 2 + @test length(pool._touched_has_others) == 2 + @test pool._touched_type_masks[2] == UInt16(0) + @test pool._touched_has_others[2] == false rewind!(pool, Float64) # Multi-type checkpoint checkpoint!(pool, Float64, Float32) - @test length(pool._untracked_fixed_masks) == 2 - @test length(pool._untracked_has_others) == 2 - @test pool._untracked_fixed_masks[2] == UInt16(0) - @test pool._untracked_has_others[2] == false + @test length(pool._touched_type_masks) == 2 + @test length(pool._touched_has_others) == 2 + @test pool._touched_type_masks[2] == UInt16(0) + @test pool._touched_has_others[2] == false rewind!(pool, Float64, Float32) end @@ -1383,33 +1383,33 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 2 - @test length(pool._untracked_has_others) == 2 + @test length(pool._touched_type_masks) == 2 + @test length(pool._touched_has_others) == 2 rewind!(pool) - @test length(pool._untracked_fixed_masks) == 1 - @test length(pool._untracked_has_others) == 1 + @test length(pool._touched_type_masks) == 1 + @test length(pool._touched_has_others) == 1 # Sentinel preserved - @test pool._untracked_fixed_masks[1] == UInt16(0) - @test pool._untracked_has_others[1] == false + @test pool._touched_type_masks[1] == UInt16(0) + @test pool._touched_has_others[1] == false end @testset "Bitmask metadata: typed rewind! pops" begin pool = AdaptiveArrayPool() checkpoint!(pool, Float64) - @test length(pool._untracked_fixed_masks) == 2 + @test length(pool._touched_type_masks) == 2 rewind!(pool, Float64) - @test length(pool._untracked_fixed_masks) == 1 - @test length(pool._untracked_has_others) == 1 + @test length(pool._touched_type_masks) == 1 + @test length(pool._touched_has_others) == 1 # Multi-type checkpoint!(pool, Float64, Int64) - @test length(pool._untracked_fixed_masks) == 2 + @test length(pool._touched_type_masks) == 2 rewind!(pool, Float64, Int64) - @test length(pool._untracked_fixed_masks) == 1 + @test length(pool._touched_type_masks) == 1 end @testset "Bitmask metadata: reset! restores sentinel" begin @@ -1418,11 +1418,11 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Build up state checkpoint!(pool) checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 3 + @test length(pool._touched_type_masks) == 3 reset!(pool) - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] @test pool._current_depth == 1 end @@ -1433,11 +1433,11 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind checkpoint!(pool) acquire!(pool, Float64, 10) checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 3 + @test length(pool._touched_type_masks) == 3 empty!(pool) - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] @test pool._current_depth == 1 end @@ -1450,8 +1450,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind end # No stack leaks — should be back to sentinel only - @test length(pool._untracked_fixed_masks) == 1 - @test length(pool._untracked_has_others) == 1 + @test length(pool._touched_type_masks) == 1 + @test length(pool._touched_has_others) == 1 @test pool._current_depth == 1 end @@ -1460,25 +1460,25 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Depth 2 checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 2 + @test length(pool._touched_type_masks) == 2 # Depth 3 checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 3 + @test length(pool._touched_type_masks) == 3 # Depth 4 checkpoint!(pool) - @test length(pool._untracked_fixed_masks) == 4 + @test length(pool._touched_type_masks) == 4 # Pop back rewind!(pool) - @test length(pool._untracked_fixed_masks) == 3 + @test length(pool._touched_type_masks) == 3 rewind!(pool) - @test length(pool._untracked_fixed_masks) == 2 + @test length(pool._touched_type_masks) == 2 rewind!(pool) - @test length(pool._untracked_fixed_masks) == 1 + @test length(pool._touched_type_masks) == 1 end # ========================================================================== @@ -1510,78 +1510,78 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test all(b -> b != UInt16(0), bits) end - @testset "Typed _mark_untracked!: fixed-slot types set mask bits" begin - using AdaptiveArrayPools: _mark_untracked!, _fixed_slot_bit + @testset "Typed _record_type_touch!: fixed-slot types set mask bits" begin + using AdaptiveArrayPools: _record_type_touch!, _fixed_slot_bit pool = AdaptiveArrayPool() checkpoint!(pool) # depth=2 # Mark Float64 untracked - _mark_untracked!(pool, Float64) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) - @test pool._untracked_has_others[2] == false + _record_type_touch!(pool, Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) + @test pool._touched_has_others[2] == false # Mark Float32 additionally — bits accumulate - _mark_untracked!(pool, Float32) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) - @test pool._untracked_has_others[2] == false + _record_type_touch!(pool, Float32) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) + @test pool._touched_has_others[2] == false # Mark Float64 again — idempotent - _mark_untracked!(pool, Float64) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) + _record_type_touch!(pool, Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) rewind!(pool) end - @testset "Typed _mark_untracked!: non-fixed-slot types set has_others" begin - using AdaptiveArrayPools: _mark_untracked!, _fixed_slot_bit + @testset "Typed _record_type_touch!: non-fixed-slot types set has_others" begin + using AdaptiveArrayPools: _record_type_touch!, _fixed_slot_bit pool = AdaptiveArrayPool() checkpoint!(pool) # depth=2 # Mark UInt8 (not a fixed slot) - _mark_untracked!(pool, UInt8) - @test pool._untracked_fixed_masks[2] == UInt16(0) # mask unchanged - @test pool._untracked_has_others[2] == true + _record_type_touch!(pool, UInt8) + @test pool._touched_type_masks[2] == UInt16(0) # mask unchanged + @test pool._touched_has_others[2] == true rewind!(pool) end - @testset "Typed _mark_untracked!: mixed fixed + others" begin - using AdaptiveArrayPools: _mark_untracked!, _fixed_slot_bit + @testset "Typed _record_type_touch!: mixed fixed + others" begin + using AdaptiveArrayPools: _record_type_touch!, _fixed_slot_bit pool = AdaptiveArrayPool() checkpoint!(pool) - _mark_untracked!(pool, Float64) - _mark_untracked!(pool, UInt8) # others - _mark_untracked!(pool, Int64) + _record_type_touch!(pool, Float64) + _record_type_touch!(pool, UInt8) # others + _record_type_touch!(pool, Int64) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Int64) - @test pool._untracked_has_others[2] == true + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Int64) + @test pool._touched_has_others[2] == true rewind!(pool) end - @testset "Typed _mark_untracked!: nested depth isolation" begin - using AdaptiveArrayPools: _mark_untracked!, _fixed_slot_bit + @testset "Typed _record_type_touch!: nested depth isolation" begin + using AdaptiveArrayPools: _record_type_touch!, _fixed_slot_bit pool = AdaptiveArrayPool() # Depth 2 checkpoint!(pool) - _mark_untracked!(pool, Float64) + _record_type_touch!(pool, Float64) # Depth 3 checkpoint!(pool) - _mark_untracked!(pool, Int32) + _record_type_touch!(pool, Int32) # Depth 3 has only Int32 - @test pool._untracked_fixed_masks[3] == _fixed_slot_bit(Int32) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) + @test pool._touched_type_masks[3] == _fixed_slot_bit(Int32) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) # Depth 1 (sentinel) untouched - @test pool._untracked_fixed_masks[1] == UInt16(0) + @test pool._touched_type_masks[1] == UInt16(0) rewind!(pool) rewind!(pool) @@ -1593,12 +1593,12 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() checkpoint!(pool) # depth=2 - # acquire! outside @with_pool calls _mark_untracked!(pool, T) + # acquire! outside @with_pool calls _record_type_touch!(pool, T) acquire!(pool, Float64, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) acquire!(pool, Int64, 5) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Int64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) | _fixed_slot_bit(Int64) rewind!(pool) end @@ -1610,7 +1610,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind checkpoint!(pool) unsafe_acquire!(pool, Float32, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float32) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float32) rewind!(pool) end @@ -1623,37 +1623,37 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # zeros! with explicit type checkpoint!(pool) zeros!(pool, Float64, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) rewind!(pool) # zeros! without type → default_eltype → Float64 checkpoint!(pool) zeros!(pool, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) rewind!(pool) # ones! with type checkpoint!(pool) ones!(pool, Int32, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Int32) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Int32) rewind!(pool) # trues! → Bit type checkpoint!(pool) trues!(pool, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Bit) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Bit) rewind!(pool) # falses! → Bit type checkpoint!(pool) falses!(pool, 10) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Bit) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Bit) rewind!(pool) # similar! with template array checkpoint!(pool) similar!(pool, rand(Float32, 5)) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float32) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float32) rewind!(pool) end @@ -1662,8 +1662,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind checkpoint!(pool) zeros!(pool, UInt8, 10) - @test pool._untracked_has_others[2] == true - @test pool._untracked_fixed_masks[2] == UInt16(0) + @test pool._touched_has_others[2] == true + @test pool._touched_type_masks[2] == UInt16(0) rewind!(pool) end @@ -1712,32 +1712,94 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == true # Case 2: untracked Float64, tracked includes Float64 → subset → OK - pool._untracked_fixed_masks[2] = _fixed_slot_bit(Float64) + pool._touched_type_masks[2] = _fixed_slot_bit(Float64) @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == true # Case 3: untracked Float64, tracked is Float32 only → NOT subset → full @test _can_use_typed_path(pool, _tracked_mask_for_types(Float32)) == false # Case 4: untracked Float64|Float32, tracked Float64 only → partial → full - pool._untracked_fixed_masks[2] = _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) + pool._touched_type_masks[2] = _fixed_slot_bit(Float64) | _fixed_slot_bit(Float32) @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == false # Case 5: untracked Float64|Float32, tracked Float64|Float32 → exact match → OK @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64, Float32)) == true # Case 6: untracked Float64 + has_others → always full - pool._untracked_fixed_masks[2] = _fixed_slot_bit(Float64) - pool._untracked_has_others[2] = true + pool._touched_type_masks[2] = _fixed_slot_bit(Float64) + pool._touched_has_others[2] = true @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == false # Case 7: no fixed untracked but has_others → always full - pool._untracked_fixed_masks[2] = UInt16(0) - pool._untracked_has_others[2] = true + pool._touched_type_masks[2] = UInt16(0) + pool._touched_has_others[2] = true @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == false rewind!(pool) end + @testset "_can_use_typed_path: mode bits do not pollute subset check" begin + # Issue: _can_use_typed_path reads raw _touched_type_masks[depth] which may + # contain mode bits (14-15) from _lazy_checkpoint! or _typed_lazy_checkpoint!. + # These mode bits leak into the subset check `(touched_mask & ~tracked_mask) == 0`, + # causing false negatives: the typed fast path is rejected even when only + # tracked types were touched. + using AdaptiveArrayPools: _can_use_typed_path, _tracked_mask_for_types, + _lazy_checkpoint!, _lazy_rewind!, _LAZY_MODE_BIT, _TYPED_LAZY_BIT, + _acquire_impl! + + # --- Case 1: _LAZY_MODE_BIT (bit 15) should be ignored --- + pool = AdaptiveArrayPool() + checkpoint!(pool) # depth 2 + pool._touched_type_masks[2] = _LAZY_MODE_BIT # simulate lazy parent scope + # Only mode bit set, no type bits → typed path should be safe + @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == true + + # Mode bit + tracked type bit → still safe (type is tracked) + pool._touched_type_masks[2] = _LAZY_MODE_BIT | _fixed_slot_bit(Float64) + @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == true + + # Mode bit + untracked type bit → correctly fails + pool._touched_type_masks[2] = _LAZY_MODE_BIT | _fixed_slot_bit(Int32) + @test _can_use_typed_path(pool, _tracked_mask_for_types(Float64)) == false + rewind!(pool) + + # --- Case 2: _TYPED_LAZY_BIT (bit 14) should be ignored --- + pool2 = AdaptiveArrayPool() + checkpoint!(pool2) + pool2._touched_type_masks[2] = _TYPED_LAZY_BIT + @test _can_use_typed_path(pool2, _tracked_mask_for_types(Float64)) == true + + pool2._touched_type_masks[2] = _TYPED_LAZY_BIT | _fixed_slot_bit(Float64) + @test _can_use_typed_path(pool2, _tracked_mask_for_types(Float64)) == true + rewind!(pool2) + + # --- Case 3: Both mode bits set (bits 14+15) should be ignored --- + pool3 = AdaptiveArrayPool() + checkpoint!(pool3) + pool3._touched_type_masks[2] = _LAZY_MODE_BIT | _TYPED_LAZY_BIT + @test _can_use_typed_path(pool3, _tracked_mask_for_types(Float64)) == true + rewind!(pool3) + + # --- Case 4: End-to-end — nested typed scope inside lazy scope --- + pool4 = AdaptiveArrayPool() + _lazy_checkpoint!(pool4) # outer lazy scope (depth 2, mask has _LAZY_MODE_BIT) + + # Before entering inner typed scope, macro calls _can_use_typed_path at parent depth + tracked_mask = _tracked_mask_for_types(Float64) + @test _can_use_typed_path(pool4, tracked_mask) == true # parent has no extra type bits + + # Enter inner typed scope + checkpoint!(pool4, Float64) # depth 3 + a = _acquire_impl!(pool4, Float64, 10) + a .= 1.0 + # At rewind time: inner mask is clean (no mode bits from checkpoint!) + @test _can_use_typed_path(pool4, tracked_mask) == true + + rewind!(pool4, Float64) + _lazy_rewind!(pool4) + end + # ================================================================== # Phase 3: End-to-end runtime scenarios # ================================================================== @@ -1765,7 +1827,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @testset "Scenario B: selective rewind when untracked NOT ⊆ tracked" begin # Helper acquires Float32 while @with_pool only tracks Float64. - # Phase 5: _can_use_typed_path=false → _typed_selective_rewind! covers + # Phase 5: _can_use_typed_path=false → _typed_lazy_rewind! covers # tracked (Float64) | untracked (Float32), so both are rewound correctly. function _scenario_b_helper!(pool) acquire!(pool, Float32, 5) @@ -1876,59 +1938,59 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # ================================================================== @testset "Phase 4: _untracked_flags field removed from AdaptiveArrayPool" begin # The legacy boolean _untracked_flags field has been replaced by - # bitmask-based tracking (_untracked_fixed_masks + _untracked_has_others). + # bitmask-based tracking (_touched_type_masks + _touched_has_others). # Verify it no longer exists as a struct field. @test !(:_untracked_flags in fieldnames(AdaptiveArrayPool)) # Verify the bitmask fields ARE present (they are the replacement) - @test :_untracked_fixed_masks in fieldnames(AdaptiveArrayPool) - @test :_untracked_has_others in fieldnames(AdaptiveArrayPool) + @test :_touched_type_masks in fieldnames(AdaptiveArrayPool) + @test :_touched_has_others in fieldnames(AdaptiveArrayPool) end @testset "Phase 4: bitmask stacks have no stale state after lifecycle ops" begin pool = AdaptiveArrayPool() # Initial sentinel state - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] # Checkpoint → mark → rewind cycle leaves no stale bits checkpoint!(pool) - _mark_untracked!(pool, Float64) - @test pool._untracked_fixed_masks[2] == _fixed_slot_bit(Float64) + _record_type_touch!(pool, Float64) + @test pool._touched_type_masks[2] == _fixed_slot_bit(Float64) rewind!(pool) - @test pool._untracked_fixed_masks == [UInt16(0)] # back to sentinel - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] # back to sentinel + @test pool._touched_has_others == [false] # Nested checkpoint → mark others → rewind cleans up checkpoint!(pool) # depth 2 checkpoint!(pool) # depth 3 - _mark_untracked!(pool, UInt8) # others at depth 3 - @test pool._untracked_has_others[3] == true + _record_type_touch!(pool, UInt8) # others at depth 3 + @test pool._touched_has_others[3] == true rewind!(pool) # back to depth 2 - @test length(pool._untracked_has_others) == 2 - @test pool._untracked_has_others[2] == false # depth 2 clean + @test length(pool._touched_has_others) == 2 + @test pool._touched_has_others[2] == false # depth 2 clean rewind!(pool) # back to depth 1 - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] # reset! restores sentinel state after deep nesting checkpoint!(pool) checkpoint!(pool) - _mark_untracked!(pool, Float32) - _mark_untracked!(pool, Int64) + _record_type_touch!(pool, Float32) + _record_type_touch!(pool, Int64) reset!(pool) - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] @test pool._current_depth == 1 # empty! also restores sentinel state checkpoint!(pool) - _mark_untracked!(pool, ComplexF64) - _mark_untracked!(pool, UInt16) + _record_type_touch!(pool, ComplexF64) + _record_type_touch!(pool, UInt16) empty!(pool) - @test pool._untracked_fixed_masks == [UInt16(0)] - @test pool._untracked_has_others == [false] + @test pool._touched_type_masks == [UInt16(0)] + @test pool._touched_has_others == [false] @test pool._current_depth == 1 end @@ -1936,21 +1998,21 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Dynamic Selective Mode — Phase 1: Characterization & Safety Locks # ================================================================== - @testset "Dynamic selective mode: _acquire_impl! bypasses _mark_untracked!" begin + @testset "Dynamic selective mode: _acquire_impl! bypasses _record_type_touch!" begin using AdaptiveArrayPools: _acquire_impl!, _fixed_slot_bit pool = AdaptiveArrayPool() checkpoint!(pool) depth = pool._current_depth # = 2 - # Internal _acquire_impl! does NOT call _mark_untracked! (by design). + # Internal _acquire_impl! does NOT call _record_type_touch! (by design). # This is the key reason a simple "combined mask" approach is insufficient: # macro-transformed calls won't appear in untracked bitmasks. _acquire_impl!(pool, Float64, 5) - @test pool._untracked_fixed_masks[depth] == UInt16(0) # mask unchanged + @test pool._touched_type_masks[depth] == UInt16(0) # mask unchanged - # Public acquire! DOES call _mark_untracked! + # Public acquire! DOES call _record_type_touch! acquire!(pool, Float32, 5) - @test pool._untracked_fixed_masks[depth] == _fixed_slot_bit(Float32) + @test pool._touched_type_masks[depth] == _fixed_slot_bit(Float32) rewind!(pool) end @@ -2000,8 +2062,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind depth = pool._current_depth acquire!(pool, UInt8, 5) - @test pool._untracked_has_others[depth] == true - @test pool._untracked_fixed_masks[depth] == UInt16(0) + @test pool._touched_has_others[depth] == true + @test pool._touched_type_masks[depth] == UInt16(0) rewind!(pool) @test get_typed_pool!(pool, UInt8).n_active == 0 @@ -2016,7 +2078,7 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Record the stack length BEFORE entering the inner scope. # (global-scope bitmask at index 1 may be non-zero due to the acquire above.) - mask_before = pool._untracked_fixed_masks[1] + mask_before = pool._touched_type_masks[1] checkpoint!(pool) # no acquires in scope @@ -2025,10 +2087,10 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test pool.float64.n_active == n_before @test pool._current_depth == 1 # Stack has returned to exactly the sentinel (length 1) - @test length(pool._untracked_fixed_masks) == 1 - @test length(pool._untracked_has_others) == 1 + @test length(pool._touched_type_masks) == 1 + @test length(pool._touched_has_others) == 1 # Global-scope bitmask is unchanged from before we entered/exited the scope - @test pool._untracked_fixed_masks[1] == mask_before + @test pool._touched_type_masks[1] == mask_before end # —————————————————————————————————————————————————————————————— @@ -2036,22 +2098,22 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # These will FAIL until Phase 2 is complete. # —————————————————————————————————————————————————————————————— - @testset "DESIRED [RED]: _depth_only_checkpoint! is exported/defined" begin - # Phase 2 will add _depth_only_checkpoint! to src/state.jl. + @testset "DESIRED [RED]: _lazy_checkpoint! is exported/defined" begin + # Phase 2 will add _lazy_checkpoint! to src/state.jl. # This test explicitly signals the missing implementation. - @test isdefined(AdaptiveArrayPools, :_depth_only_checkpoint!) + @test isdefined(AdaptiveArrayPools, :_lazy_checkpoint!) end - @testset "DESIRED [RED]: _depth_only_checkpoint! does not eagerly checkpoint typed pools" begin + @testset "DESIRED [RED]: _lazy_checkpoint! does not eagerly checkpoint typed pools" begin # A depth-only checkpoint should increment _current_depth and push bitmask # sentinels, but NOT save n_active for any typed pool. # The sentinel in _checkpoint_depths is always depth=0, so if no checkpoint # was saved at the current depth, _checkpoint_depths[end] will be < current_depth. - if !isdefined(AdaptiveArrayPools, :_depth_only_checkpoint!) + if !isdefined(AdaptiveArrayPools, :_lazy_checkpoint!) @test false # RED: function not yet defined else pool = AdaptiveArrayPool() - AdaptiveArrayPools._depth_only_checkpoint!(pool) + AdaptiveArrayPools._lazy_checkpoint!(pool) depth = pool._current_depth # = 2 # No typed pool should have an eager checkpoint at this depth @@ -2062,21 +2124,21 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # But depth metadata IS updated @test pool._current_depth == 2 - @test length(pool._untracked_fixed_masks) == 2 - @test length(pool._untracked_has_others) == 2 + @test length(pool._touched_type_masks) == 2 + @test length(pool._touched_has_others) == 2 end end @testset "DESIRED [RED]: lazy first-touch checkpoint on acquire! in dynamic mode" begin - # In dynamic-selective mode, _mark_untracked! should lazily call + # In dynamic-selective mode, _record_type_touch! should lazily call # _checkpoint_typed_pool! on the FIRST acquire of each type per depth. # Only the touched pool gets checkpointed; others remain untouched. - if !isdefined(AdaptiveArrayPools, :_depth_only_checkpoint!) + if !isdefined(AdaptiveArrayPools, :_lazy_checkpoint!) @test false # RED: prerequisite not implemented else - using AdaptiveArrayPools: _depth_only_checkpoint! + using AdaptiveArrayPools: _lazy_checkpoint! pool = AdaptiveArrayPool() - _depth_only_checkpoint!(pool) # lightweight enter + _lazy_checkpoint!(pool) # lightweight enter depth = pool._current_depth # = 2 # Before any acquire: no checkpoint for any pool at this depth @@ -2097,14 +2159,14 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Phase 5: Typed-Fallback Optimization # ================================================================== - @testset "Phase 5: _typed_checkpoint_with_lazy! sets bit 14 and checkpoints known types" begin - # _typed_checkpoint_with_lazy! must checkpoint known types AND set bit 14 for lazy mode. + @testset "Phase 5: _typed_lazy_checkpoint! sets bit 14 and checkpoints known types" begin + # _typed_lazy_checkpoint! must checkpoint known types AND set bit 14 for lazy mode. pool = AdaptiveArrayPool() - _typed_checkpoint_with_lazy!(pool, Float64) + _typed_lazy_checkpoint!(pool, Float64) d = pool._current_depth # Bit 14 (0x4000) must be set; bits 0-7 must be 0 (no acquires yet) - @test (pool._untracked_fixed_masks[d] & UInt16(0x4000)) != 0 - @test (pool._untracked_fixed_masks[d] & UInt16(0x00FF)) == 0 + @test (pool._touched_type_masks[d] & UInt16(0x4000)) != 0 + @test (pool._touched_type_masks[d] & UInt16(0x00FF)) == 0 # Float64 should be checkpointed at this depth @test pool.float64._checkpoint_depths[end] == d # Float32 should NOT be checkpointed at this depth @@ -2132,14 +2194,14 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Child scope: typed checkpoint for Float64 only, but helper touches Int64 # Simulates @with_pool with static type Float64 but _can_use_typed_path = false - _typed_checkpoint_with_lazy!(pool, Float64) + _typed_lazy_checkpoint!(pool, Float64) acquire!(pool, Float64, 5) # tracked type _p0_helper_int64!(pool) # untracked Int64 → triggers lazy first-touch checkpoint @test pool.int64.n_active == 2 # parent's 1 + helper's 1 # Child scope exits via selective rewind tracked_mask = _tracked_mask_for_types(Float64) - _typed_selective_rewind!(pool, tracked_mask) + _typed_lazy_rewind!(pool, tracked_mask) # Parent's Int64 count must be restored to 1 (NOT 0) @test pool.int64.n_active == 1 @@ -2150,10 +2212,10 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind end @testset "Phase 5: bit 14 enables lazy first-touch checkpoint for extra types" begin - # _mark_untracked! condition is (current_mask & 0xC000) != 0. + # _record_type_touch! condition is (current_mask & 0xC000) != 0. # With bit 14 set (typed lazy mode), extra-type first touch triggers _checkpoint_typed_pool!. pool = AdaptiveArrayPool() - _typed_checkpoint_with_lazy!(pool, Float64) # typed chk + set bit 14 + _typed_lazy_checkpoint!(pool, Float64) # typed chk + set bit 14 d = pool._current_depth # Before acquiring Int64: no Int64 checkpoint at this depth @@ -2171,8 +2233,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @testset "Phase 5 (Issue #3): typed lazy mode preserves parent n_active for others types" begin # If a parent scope has an active others-type (UInt8) and a child uses - # _typed_checkpoint_with_lazy!, helpers touching the same type must NOT corrupt - # the parent's n_active. _typed_checkpoint_with_lazy! eagerly snapshots pool.others + # _typed_lazy_checkpoint!, helpers touching the same type must NOT corrupt + # the parent's n_active. _typed_lazy_checkpoint! eagerly snapshots pool.others # so Case A fires at rewind (not Case B with the wrong sentinel value). function _p5_helper_uint8!(pool) acquire!(pool, UInt8, 7) @@ -2187,16 +2249,16 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @test parent_others_pool.n_active == 1 # Child scope: typed checkpoint for Float64 only; helper touches UInt8 (others) - # Without the fix: _typed_checkpoint_with_lazy! doesn't snapshot pool.others → + # Without the fix: _typed_lazy_checkpoint! doesn't snapshot pool.others → # rewind hits Case B → parent UInt8.n_active corrupted to 0. - _typed_checkpoint_with_lazy!(pool, Float64) + _typed_lazy_checkpoint!(pool, Float64) try acquire!(pool, Float64, 5) # tracked type _p5_helper_uint8!(pool) # untracked others type @test pool.others[UInt8].n_active == 2 # parent's 1 + helper's 1 finally tracked_mask = _tracked_mask_for_types(Float64) - _typed_selective_rewind!(pool, tracked_mask) + _typed_lazy_rewind!(pool, tracked_mask) end # Parent's UInt8 count must be preserved (= 1, NOT 0) @@ -2212,12 +2274,12 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # They should FAIL before the fix and PASS after. # ================================================================== - @testset "Issue #1: _depth_only_checkpoint! orphaned others stack leak" begin - # Bug: _depth_only_checkpoint! eagerly checkpoints pool.others entries, - # but sets _untracked_has_others[depth] = false. On _dynamic_selective_rewind!, + @testset "Issue #1: _lazy_checkpoint! orphaned others stack leak" begin + # Bug: _lazy_checkpoint! eagerly checkpoints pool.others entries, + # but sets _touched_has_others[depth] = false. On _lazy_rewind!, # the others loop is skipped (flag is false), leaving orphaned checkpoint entries. # In a loop, each iteration pushes one more stale entry → unbounded stack growth. - using AdaptiveArrayPools: _depth_only_checkpoint!, _dynamic_selective_rewind! + using AdaptiveArrayPools: _lazy_checkpoint!, _lazy_rewind! pool = AdaptiveArrayPool() @@ -2231,8 +2293,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Run 10 iterations of dynamic-selective scope without acquiring any others type for _ in 1:10 - _depth_only_checkpoint!(pool) # pushes checkpoint for others entries - _dynamic_selective_rewind!(pool) # should pop it back + _lazy_checkpoint!(pool) # pushes checkpoint for others entries + _lazy_rewind!(pool) # should pop it back end # Checkpoint stack must NOT have grown (each entry should be popped by rewind) @@ -2243,14 +2305,14 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind @testset "Issue #2: double-checkpoint hazard when tracked type used by helper" begin # Bug: In typed-lazy mode (bit 14), when a tracked type T is: - # 1. Checkpointed by _typed_checkpoint_with_lazy!(pool, T) (saves n_active=0) - # 2. Acquired by macro-transformed _acquire_impl! (n_active → 1, no _mark_untracked!) - # 3. Re-acquired by a helper via acquire! → _mark_untracked! + # 1. Checkpointed by _typed_lazy_checkpoint!(pool, T) (saves n_active=0) + # 2. Acquired by macro-transformed _acquire_impl! (n_active → 1, no _record_type_touch!) + # 3. Re-acquired by a helper via acquire! → _record_type_touch! # Step 3 sees bit 14 set + T's bit unset → calls _checkpoint_typed_pool! again # with n_active=1 (wrong!). On rewind, restores n_active=1 instead of 0. using AdaptiveArrayPools: _acquire_impl! - # Helper that uses acquire! (goes through _mark_untracked!) + # Helper that uses acquire! (goes through _record_type_touch!) function _issue2_helper!(pool) acquire!(pool, Float64, 3) end @@ -2258,20 +2320,20 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() # Enter typed-lazy mode for Float64 - _typed_checkpoint_with_lazy!(pool, Float64) + _typed_lazy_checkpoint!(pool, Float64) try - # Simulate macro-transformed code: bypasses _mark_untracked! + # Simulate macro-transformed code: bypasses _record_type_touch! _acquire_impl!(pool, Float64, 5) @test pool.float64.n_active == 1 - # Helper: goes through acquire! → _mark_untracked! - # BUG: _mark_untracked! sees bit 14 + Float64 bit not yet set + # Helper: goes through acquire! → _record_type_touch! + # BUG: _record_type_touch! sees bit 14 + Float64 bit not yet set # → redundant _checkpoint_typed_pool! with n_active=1 _issue2_helper!(pool) @test pool.float64.n_active == 2 finally tracked_mask = _tracked_mask_for_types(Float64) - _typed_selective_rewind!(pool, tracked_mask) + _typed_lazy_rewind!(pool, tracked_mask) end # After rewind, n_active should be 0 (parent state before scope entry) @@ -2292,13 +2354,13 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind pool = AdaptiveArrayPool() initial_f32_stack = length(pool.float32._checkpoint_depths) # 1 (sentinel) - _typed_checkpoint_with_lazy!(pool, Float32) + _typed_lazy_checkpoint!(pool, Float32) try - _acquire_impl!(pool, Float32, 5) # n_active=1, no _mark_untracked! - _issue2b_helper!(pool) # acquire! → _mark_untracked! → double checkpoint + _acquire_impl!(pool, Float32, 5) # n_active=1, no _record_type_touch! + _issue2b_helper!(pool) # acquire! → _record_type_touch! → double checkpoint finally tracked_mask = _tracked_mask_for_types(Float32) - _typed_selective_rewind!(pool, tracked_mask) + _typed_lazy_rewind!(pool, tracked_mask) end # The checkpoint stack should return to its initial length (sentinel only) @@ -2325,25 +2387,25 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind end end - @testset "Issue #4: CUDA _depth_only_checkpoint! parity (has_others flag)" begin - # Bug: CUDA _depth_only_checkpoint! eagerly checkpoints pool.others but - # does NOT set _untracked_has_others = true, same as CPU Issue #1. + @testset "Issue #4: CUDA _lazy_checkpoint! parity (has_others flag)" begin + # Bug: CUDA _lazy_checkpoint! eagerly checkpoints pool.others but + # does NOT set _touched_has_others = true, same as CPU Issue #1. # Verify via source code inspection (no GPU needed). cuda_state_path = joinpath(@__DIR__, "..", "ext", "AdaptiveArrayPoolsCUDAExt", "state.jl") if isfile(cuda_state_path) code = read(cuda_state_path, String) - # Extract _depth_only_checkpoint! function body + # Extract _lazy_checkpoint! function body func_match = match( - r"function\s+AdaptiveArrayPools\._depth_only_checkpoint!\(pool::CuAdaptiveArrayPool\).*?^end"ms, + r"function\s+AdaptiveArrayPools\._lazy_checkpoint!\(pool::CuAdaptiveArrayPool\).*?^end"ms, code ) @test func_match !== nothing if func_match !== nothing func_body = func_match.match # If it eagerly checkpoints others (has `for p in values(pool.others)`), - # then it MUST also set _untracked_has_others[...] = true within the loop + # then it MUST also set _touched_has_others[...] = true within the loop if contains(func_body, "values(pool.others)") - @test occursin(r"_untracked_has_others\[.*\]\s*=\s*true", func_body) + @test occursin(r"_touched_has_others\[.*\]\s*=\s*true", func_body) end end else @@ -2351,15 +2413,15 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind end end - @testset "Issue #5: CUDA _typed_checkpoint_with_lazy! parity" begin + @testset "Issue #5: CUDA _typed_lazy_checkpoint! parity" begin # Bug: CUDA version is missing two features present in CPU version: # 1. Double-checkpoint guard: `_checkpoint_depths[end] != d` - # 2. has_others flag: `_untracked_has_others[d] = true` + # 2. has_others flag: `_touched_has_others[d] = true` cuda_state_path = joinpath(@__DIR__, "..", "ext", "AdaptiveArrayPoolsCUDAExt", "state.jl") if isfile(cuda_state_path) code = read(cuda_state_path, String) func_match = match( - r"function\s+AdaptiveArrayPools\._typed_checkpoint_with_lazy!\(pool::CuAdaptiveArrayPool.*?^end"ms, + r"function\s+AdaptiveArrayPools\._typed_lazy_checkpoint!\(pool::CuAdaptiveArrayPool.*?^end"ms, code ) @test func_match !== nothing @@ -2369,8 +2431,8 @@ import AdaptiveArrayPools: _typed_checkpoint_with_lazy!, _typed_selective_rewind # Must have double-checkpoint guard (like CPU version) @test contains(func_body, "_checkpoint_depths[end]") - # Must set _untracked_has_others flag (like CPU version) - @test contains(func_body, "_untracked_has_others") + # Must set _touched_has_others flag (like CPU version) + @test contains(func_body, "_touched_has_others") end else @warn "CUDA extension not found, skipping parity test"