From 43c66af5477f22016986cb810b0be77878363ee7 Mon Sep 17 00:00:00 2001 From: Percy Date: Fri, 8 Aug 2025 02:35:51 -0400 Subject: [PATCH 1/5] Support reader.get_working_set_size --- libcachesim/protocols.py | 1 + libcachesim/synthetic_reader.py | 9 +++++++++ libcachesim/trace_reader.py | 7 +++++-- src/export_reader.cpp | 8 ++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/libcachesim/protocols.py b/libcachesim/protocols.py index 74a45f8..9741b78 100644 --- a/libcachesim/protocols.py +++ b/libcachesim/protocols.py @@ -28,6 +28,7 @@ def skip_n_req(self, n: int) -> int: ... def reset(self) -> None: ... def close(self) -> None: ... def clone(self) -> "ReaderProtocol": ... + def get_working_set_size(self) -> tuple[int, int]: ... def __iter__(self) -> Iterator[Request]: ... def __next__(self) -> Request: ... def __len__(self) -> int: ... diff --git a/libcachesim/synthetic_reader.py b/libcachesim/synthetic_reader.py index 936f29d..c313355 100644 --- a/libcachesim/synthetic_reader.py +++ b/libcachesim/synthetic_reader.py @@ -181,6 +181,15 @@ def set_read_pos(self, pos: float) -> None: def get_read_pos(self) -> float: """Get current read position""" return float(self.current_pos) + + def get_working_set_size(self) -> tuple[int, int]: + """Calculate working set size""" + wss_obj, wss_byte = 0, 0 + if self._obj_ids is not None: + unique_ids = np.unique(self._obj_ids[:self.current_pos]) + wss_obj = len(unique_ids) + wss_byte = wss_obj * self.obj_size + return wss_obj, wss_byte def __iter__(self) -> Iterator[Request]: """Iterator implementation""" diff --git a/libcachesim/trace_reader.py b/libcachesim/trace_reader.py index e593dbb..49b11ad 100644 --- a/libcachesim/trace_reader.py +++ b/libcachesim/trace_reader.py @@ -1,12 +1,12 @@ """Wrapper of Reader with S3 support.""" import logging -from typing import overload, Union, Optional +from typing import Tuple, overload, Union, Optional from collections.abc import Iterator from urllib.parse import urlparse from .protocols import ReaderProtocol -from .libcachesim_python import TraceType, SamplerType, Request, ReaderInitParam, Reader, Sampler, ReadDirection +from .libcachesim_python import TraceType, SamplerType, Request, ReaderInitParam, Reader, Sampler, ReadDirection, cal_working_set_size from ._s3_cache import get_data_loader logger = logging.getLogger(__name__) @@ -276,6 +276,9 @@ def go_back_one_req(self) -> None: def set_read_pos(self, pos: float) -> None: self._reader.set_read_pos(pos) + def get_working_set_size(self) -> Tuple[int, int]: + return cal_working_set_size(self._reader) + def __iter__(self) -> Iterator[Request]: self._reader.reset() return self diff --git a/src/export_reader.cpp b/src/export_reader.cpp index 8f286f3..9f7df86 100644 --- a/src/export_reader.cpp +++ b/src/export_reader.cpp @@ -61,7 +61,15 @@ struct SamplerDeleter { } }; + void export_reader(py::module& m) { + /* Helper function(s) */ + m.def("cal_working_set_size", [](reader_t& reader) { + int64_t wss_obj = 0, wss_byte = 0; + cal_working_set_size(&reader, &wss_obj, &wss_byte); + return std::make_tuple(wss_obj, wss_byte); + }, "reader"_a); + // Sampler type enumeration py::enum_(m, "SamplerType") .value("SPATIAL_SAMPLER", sampler_type::SPATIAL_SAMPLER) From f91b0d96d77debc41cba4003abd92f1bbab78f8b Mon Sep 17 00:00:00 2001 From: Percy Date: Fri, 8 Aug 2025 02:37:38 -0400 Subject: [PATCH 2/5] Update libcachesim/synthetic_reader.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- libcachesim/synthetic_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcachesim/synthetic_reader.py b/libcachesim/synthetic_reader.py index c313355..eea0bfc 100644 --- a/libcachesim/synthetic_reader.py +++ b/libcachesim/synthetic_reader.py @@ -182,7 +182,7 @@ def get_read_pos(self) -> float: """Get current read position""" return float(self.current_pos) - def get_working_set_size(self) -> tuple[int, int]: + def get_working_set_size(self) -> Tuple[int, int]: """Calculate working set size""" wss_obj, wss_byte = 0, 0 if self._obj_ids is not None: From c7ed0a154a9daa997ac36b0d7c129fa877ebaece Mon Sep 17 00:00:00 2001 From: Percy Date: Fri, 8 Aug 2025 02:38:51 -0400 Subject: [PATCH 3/5] Update libcachesim/trace_reader.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- libcachesim/trace_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcachesim/trace_reader.py b/libcachesim/trace_reader.py index 49b11ad..151885c 100644 --- a/libcachesim/trace_reader.py +++ b/libcachesim/trace_reader.py @@ -1,7 +1,7 @@ """Wrapper of Reader with S3 support.""" import logging -from typing import Tuple, overload, Union, Optional +from typing import overload, Union, Optional from collections.abc import Iterator from urllib.parse import urlparse From f37dcf26babd18b7ecb4caf3c129bb9ad2a460c9 Mon Sep 17 00:00:00 2001 From: Percy Date: Fri, 8 Aug 2025 02:39:11 -0400 Subject: [PATCH 4/5] Update libcachesim/synthetic_reader.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- libcachesim/synthetic_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcachesim/synthetic_reader.py b/libcachesim/synthetic_reader.py index eea0bfc..c5e7df6 100644 --- a/libcachesim/synthetic_reader.py +++ b/libcachesim/synthetic_reader.py @@ -185,8 +185,8 @@ def get_read_pos(self) -> float: def get_working_set_size(self) -> Tuple[int, int]: """Calculate working set size""" wss_obj, wss_byte = 0, 0 - if self._obj_ids is not None: - unique_ids = np.unique(self._obj_ids[:self.current_pos]) + if self.current_pos > 0: + unique_ids = np.unique(self.obj_ids[:self.current_pos]) wss_obj = len(unique_ids) wss_byte = wss_obj * self.obj_size return wss_obj, wss_byte From 954a9c4eae14494e8248cc946c3949284270b882 Mon Sep 17 00:00:00 2001 From: Percy Date: Fri, 8 Aug 2025 02:39:22 -0400 Subject: [PATCH 5/5] Update libcachesim/trace_reader.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- libcachesim/trace_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcachesim/trace_reader.py b/libcachesim/trace_reader.py index 151885c..deb0312 100644 --- a/libcachesim/trace_reader.py +++ b/libcachesim/trace_reader.py @@ -276,7 +276,7 @@ def go_back_one_req(self) -> None: def set_read_pos(self, pos: float) -> None: self._reader.set_read_pos(pos) - def get_working_set_size(self) -> Tuple[int, int]: + def get_working_set_size(self) -> tuple[int, int]: return cal_working_set_size(self._reader) def __iter__(self) -> Iterator[Request]: