From c02027f7c1358b4495cec4d81be761c9237fd4ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <janole@goedeke.de>
Date: Sat, 13 Dec 2025 23:30:50 +0100
Subject: [PATCH 01/12] Add pytest and tests for CLI and Python module

- Add pytest configuration for test management
- Implement test suite for CLI functionality and Python module
- Update README with testing instructions and badge
- Fix Dockerfile
- Create .dockerignore to exclude unnecessary files from Docker builds
- Add GitHub Actions workflows for testing
- Clean up makefile to include test commands
---
 .dockerignore                     |  27 ++
 .github/workflows/pypi-deploy.yml |   4 +
 .github/workflows/test.yml        |  21 ++
 .gitignore                        |   3 +
 Dockerfile                        |  13 +-
 README.md                         |   8 +
 makefile                          |  25 +-
 pytest.ini                        |   9 +
 tests/README.md                   |  30 +++
 tests/test_cli.py                 | 242 +++++++++++++++++
 tests/test_python.py              | 423 ++++++++++++++++++++++++++++++
 11 files changed, 796 insertions(+), 9 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 .github/workflows/test.yml
 create mode 100644 pytest.ini
 create mode 100644 tests/README.md
 create mode 100644 tests/test_cli.py
 create mode 100644 tests/test_python.py

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..997485c
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,27 @@
+# Build artifacts
+*.o
+*.so
+*.pyc
+imctermite
+main.cpp.cpp
+
+# Python build
+python/build/
+python/dist/
+python/*.so
+python/*.cpp
+python/lib/
+python/LICENSE
+python/README.md
+python/*.egg-info/
+__pycache__/
+
+# Git and editor
+.git/
+.venv/
+*.swp
+*.swo
+*~
+
+# Test outputs
+.pytest_cache/
diff --git a/.github/workflows/pypi-deploy.yml b/.github/workflows/pypi-deploy.yml
index aa02101..aa89ef9 100644
--- a/.github/workflows/pypi-deploy.yml
+++ b/.github/workflows/pypi-deploy.yml
@@ -8,9 +8,13 @@ on:
 
 jobs:
 
+  test:
+    uses: ./.github/workflows/test.yml
+
   build_setup:
     name: Prepare environment for wheel builds
     runs-on: ubuntu-24.04
+    needs: [test]
     steps:
       - uses: actions/checkout@v2
       - name: Prepare wheel build
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..ce243c4
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,21 @@
+name: Run Tests
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+    
+    - name: Build Docker image
+      run: docker build -t imctermite .
+    
+    - name: Run tests in container
+      run: docker run --rm imctermite make test
diff --git a/.gitignore b/.gitignore
index b4e57bd..947413f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,3 +43,6 @@ python/*.soc
 python/lib/
 python/*.cpp
 python/wheelhouse/
+
+__pycache__/
+.pytest_cache/
diff --git a/Dockerfile b/Dockerfile
index e5389a0..836f221 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,21 +1,22 @@
-
-FROM debian:bullseye-20210111
+FROM debian:bullseye
 
 USER root
 
 RUN apt-get update && apt-get install -y \
     build-essential git vim \
     python3 python3-pip
-RUN python3 -m pip install cython
+RUN python3 -m pip install cython pytest
+RUN ln -s /usr/bin/python3 /usr/bin/python
 
 RUN g++ -v
 
-COPY ./ /IMCtermite/
+WORKDIR /IMCtermite
+COPY ./ .
 
 # install CLI tool
-RUN cd /IMCtermite && ls -lh && make install && ls -lh /usr/local/bin/imctermite
+RUN make install
 
 # install Python module
-RUN cd /IMCtermite && ls -lh && make cython-install
+RUN make python-build
 
 CMD ["sleep","infinity"]
diff --git a/README.md b/README.md
index 6c167d7..f1b175d 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 
 [![LICENSE](https://img.shields.io/github/license/RecordEvolution/IMCtermite)](https://img.shields.io/github/license/RecordEvolution/IMCtermite)
 [![STARS](https://img.shields.io/github/stars/RecordEvolution/IMCtermite)](https://img.shields.io/github/stars/RecordEvolution/IMCtermite)
+![Tests](https://github.com/RecordEvolution/IMCtermite/actions/workflows/test.yml/badge.svg)
 ![CI Build Wheel](https://github.com/RecordEvolution/IMCtermite/actions/workflows/pypi-deploy.yml/badge.svg?branch=&event=push)
 [![PYPI](https://img.shields.io/pypi/v/IMCtermite.svg)](https://pypi.org/project/imctermite/)
 
@@ -27,6 +28,7 @@ Python module to integrate the _.raw_  format into any ETL workflow.
 * [File format](#Fileformat)
 * [Build and Installation](#Installation)
 * [Usage and Examples](#Usage)
+* [Testing](#Testing)
 * [References](#References)
 
 ## File format
@@ -217,6 +219,12 @@ A more complete [example](python/examples/usage.py), including the methods for
 obtaining the channels, i.a. their data and/or directly printing them to files,
 can be found in the `python/examples` folder.
 
+## Testing
+
+Run end-to-end tests: `make test`
+
+See [tests/README.md](tests/README.md) for details.
+
 ## References
 
 ### IMC
diff --git a/makefile b/makefile
index 2d88a26..d5e7b01 100644
--- a/makefile
+++ b/makefile
@@ -35,7 +35,7 @@ INST := /usr/local/bin
 # C++ and CLI tool
 
 # build executable
-$(EXE): check-tags $(GVSN) main.o
+$(EXE): check-tags main.o
 	$(CC) $(OPT) main.o -o $@
 
 # build main.cpp and include git version/commit tag
@@ -86,7 +86,7 @@ docker-run:
 #-----------------------------------------------------------------------------#
 # python
 
-python-build: check-tags $(GVSN)
+python-build: check-tags
 	make -C python/ build-inplace
 	cp python/imctermite*.so ./ -v
 
@@ -97,10 +97,29 @@ python-clean:
 python-test:
 	PYTHONPATH=./ python python/examples/usage.py
 
+#-----------------------------------------------------------------------------#
+# tests
+
+test: $(EXE) python-build
+	@echo "Running all tests..."
+	@PYTHONPATH=./ pytest
+
+test-cli: $(EXE)
+	@echo "Running CLI tests..."
+	@PYTHONPATH=./ pytest tests/test_cli.py
+
+test-python: python-build
+	@echo "Running Python tests..."
+	@PYTHONPATH=./ pytest tests/test_python.py
+
 #-----------------------------------------------------------------------------#
 # clean
 
-clean: cpp-clean python-clean
+test-clean:
+	rm -rf .pytest_cache
+	find tests/ -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+
+clean: cpp-clean python-clean test-clean
 
 #-----------------------------------------------------------------------------#
 # github actions
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..e0459a4
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,9 @@
+[pytest]
+testpaths = tests
+pythonpath = .
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts = -v --strict-markers --tb=short
+markers =
+    slow: marks tests as slow (deselect with '-m "not slow"')
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..aa8343c
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,30 @@
+# IMCtermite Tests
+
+End-to-end tests for both the CLI tool and Python module.
+
+
+## Running Tests
+
+### All Tests
+```bash
+make test              # Via makefile (builds if needed)
+pytest                 # Direct pytest
+```
+
+### CLI Tests Only
+```bash
+make test-cli
+pytest tests/test_cli.py
+```
+
+### Python Module Tests Only
+```bash
+make test-python
+pytest tests/test_python.py
+```
+
+## Prerequisites
+
+```bash
+pip install cython pytest setuptools
+```
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..6144e6c
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,242 @@
+#!/usr/bin/env python3
+"""
+End-to-end tests for IMCtermite CLI tool
+"""
+
+import pytest
+import subprocess
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).parent.parent
+CLI = PROJECT_ROOT / "imctermite"
+SAMPLES_DIR = PROJECT_ROOT / "samples" / "datasetA"
+
+
+class TestCLIBasics:
+    """Test basic CLI functionality"""
+    
+    def test_cli_exists(self):
+        """CLI binary should exist"""
+        assert CLI.exists(), f"CLI not found at {CLI}"
+    
+    def test_help_output(self):
+        """Should display help message"""
+        result = subprocess.run([str(CLI), "--help"], capture_output=True, text=True)
+        assert result.returncode == 0
+        assert "Usage:" in result.stdout or "usage:" in result.stdout.lower()
+    
+    def test_version_output(self):
+        """Should display version"""
+        result = subprocess.run([str(CLI), "--version"], capture_output=True, text=True)
+        assert result.returncode == 0
+        assert len(result.stdout) > 0
+    
+    def test_invalid_file_handling(self):
+        """Should fail gracefully on nonexistent file"""
+        result = subprocess.run(
+            [str(CLI), "/nonexistent/file.raw"],
+            capture_output=True,
+            text=True
+        )
+        assert result.returncode != 0
+
+
+class TestChannelOperations:
+    """Test channel listing and data extraction"""
+    
+    @pytest.fixture
+    def sample_file(self):
+        """Get path to sample file"""
+        sample = SAMPLES_DIR / "datasetA_1.raw"
+        if not sample.exists():
+            pytest.skip(f"Sample file not found: {sample}")
+        return sample
+    
+    def test_list_channels(self, sample_file):
+        """Should list channels with metadata"""
+        result = subprocess.run(
+            [str(CLI), str(sample_file), "--listchannels"],
+            capture_output=True,
+            text=True
+        )
+        assert result.returncode == 0
+        assert "uuid" in result.stdout
+    
+    def test_list_blocks(self, sample_file):
+        """Should list IMC blocks"""
+        result = subprocess.run(
+            [str(CLI), str(sample_file), "--listblocks"],
+            capture_output=True,
+            text=True
+        )
+        assert result.returncode == 0
+        # Block markers like CF, CK, CC, etc.
+        assert "C" in result.stdout and ("F" in result.stdout or "K" in result.stdout)
+
+
+class TestCSVOutput:
+    """Test CSV file generation"""
+    
+    @pytest.fixture
+    def sample_file(self):
+        """Get path to sample file"""
+        sample = SAMPLES_DIR / "datasetA_1.raw"
+        if not sample.exists():
+            pytest.skip(f"Sample file not found: {sample}")
+        return sample
+    
+    def test_generate_csv_output(self, sample_file, tmp_path):
+        """Should generate CSV files"""
+        output_dir = tmp_path / "csv_output"
+        output_dir.mkdir()
+        
+        result = subprocess.run(
+            [str(CLI), str(sample_file), "--output", str(output_dir)],
+            capture_output=True,
+            text=True
+        )
+        assert result.returncode == 0
+        
+        csv_files = list(output_dir.glob("*.csv"))
+        assert len(csv_files) > 0, "Should generate at least one CSV file"
+    
+    def test_csv_format_valid(self, sample_file, tmp_path):
+        """Generated CSV should have valid format"""
+        output_dir = tmp_path / "csv_output"
+        output_dir.mkdir()
+        
+        subprocess.run(
+            [str(CLI), str(sample_file), "--output", str(output_dir)],
+            capture_output=True
+        )
+        
+        csv_files = list(output_dir.glob("*.csv"))
+        assert len(csv_files) > 0
+        
+        # Check first CSV file
+        first_csv = csv_files[0]
+        content = first_csv.read_text()
+        lines = content.strip().split('\n')
+        
+        assert len(lines) > 1, "CSV should have header and data"
+        assert ',' in lines[0], "CSV should use comma delimiter"
+    
+    def test_custom_delimiter(self, sample_file, tmp_path):
+        """Should support custom delimiter"""
+        output_dir = tmp_path / "csv_delim"
+        output_dir.mkdir()
+        
+        result = subprocess.run(
+            [str(CLI), str(sample_file), "--output", str(output_dir), "--delimiter", ";"],
+            capture_output=True,
+            text=True
+        )
+        assert result.returncode == 0
+        
+        csv_files = list(output_dir.glob("*.csv"))
+        assert len(csv_files) > 0
+        
+        # Check delimiter is applied
+        first_csv = csv_files[0]
+        content = first_csv.read_text()
+        first_line = content.split('\n')[0]
+        assert ';' in first_line, "Should use semicolon delimiter"
+
+
+class TestMultipleFiles:
+    """Test processing multiple sample files"""
+    
+    def test_process_all_sample_files(self):
+        """Should successfully process all .raw and .dat files in samples directory (list channels)"""
+        samples_root = SAMPLES_DIR.parent
+        if not samples_root.exists():
+            pytest.skip(f"Samples directory not found: {samples_root}")
+        
+        # Get all .raw and .dat files recursively
+        samples = sorted(list(samples_root.glob("*.raw")) + 
+                        list(samples_root.glob("*.dat")) +
+                        list(samples_root.glob("**/*.raw")) + 
+                        list(samples_root.glob("**/*.dat")))
+        # Remove duplicates
+        samples = sorted(set(samples))
+        
+        if len(samples) == 0:
+            pytest.skip("No .raw or .dat files in samples directory")
+        
+        failed = []
+        for sample in samples:
+            result = subprocess.run(
+                [str(CLI), str(sample), "--listchannels"],
+                capture_output=True,
+                text=True,
+                errors='replace'  # Handle non-UTF8 characters in output
+            )
+            if result.returncode != 0:
+                failed.append(f"{sample.relative_to(samples_root)}: exit code {result.returncode}")
+        
+        assert len(failed) == 0, f"Failed to process {len(failed)}/{len(samples)} files: {failed}"
+    
+    def test_extract_all_sample_files_with_data(self):
+        """Should successfully extract data from all .raw and .dat files"""
+        import tempfile
+        import shutil
+        
+        samples_root = SAMPLES_DIR.parent
+        if not samples_root.exists():
+            pytest.skip(f"Samples directory not found: {samples_root}")
+        
+        # Get all .raw and .dat files recursively
+        samples = sorted(list(samples_root.glob("*.raw")) + 
+                        list(samples_root.glob("*.dat")) +
+                        list(samples_root.glob("**/*.raw")) + 
+                        list(samples_root.glob("**/*.dat")))
+        samples = sorted(set(samples))
+        
+        if len(samples) == 0:
+            pytest.skip("No .raw or .dat files in samples directory")
+        
+        # Create temp directory for output
+        temp_dir = tempfile.mkdtemp()
+        try:
+            failed = []
+            for sample in samples:
+                result = subprocess.run(
+                    [str(CLI), str(sample), "--output", temp_dir],
+                    capture_output=True,
+                    text=True,
+                    errors='replace'
+                )
+                if result.returncode != 0:
+                    failed.append(f"{sample.relative_to(samples_root)}: exit code {result.returncode}")
+            
+            assert len(failed) == 0, f"Failed to extract data from {len(failed)}/{len(samples)} files: {failed}"
+        finally:
+            shutil.rmtree(temp_dir, ignore_errors=True)
+
+
+class TestExitCodes:
+    """Test exit code behavior"""
+    
+    def test_success_exit_code(self):
+        """Should return 0 on success"""
+        sample = SAMPLES_DIR / "datasetA_1.raw"
+        if not sample.exists():
+            pytest.skip("Sample file not found")
+        
+        result = subprocess.run(
+            [str(CLI), str(sample), "--listchannels"],
+            capture_output=True
+        )
+        assert result.returncode == 0
+    
+    def test_error_exit_code(self):
+        """Should return non-zero on error"""
+        result = subprocess.run(
+            [str(CLI), "/nonexistent/file.raw"],
+            capture_output=True
+        )
+        assert result.returncode != 0
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/test_python.py b/tests/test_python.py
new file mode 100644
index 0000000..7470709
--- /dev/null
+++ b/tests/test_python.py
@@ -0,0 +1,423 @@
+#!/usr/bin/env python3
+"""
+End-to-end tests for IMCtermite Python module
+"""
+
+import pytest
+import os
+import tempfile
+import csv
+from pathlib import Path
+
+try:
+    import imctermite
+except ImportError:
+    pytest.skip("imctermite module not built - run 'make python-build' first", allow_module_level=True)
+
+PROJECT_ROOT = Path(__file__).parent.parent
+SAMPLES_DIR = PROJECT_ROOT / "samples"
+DATASET_A = SAMPLES_DIR / "datasetA"
+DATASET_B = SAMPLES_DIR / "datasetB"
+
+
+class TestModuleImport:
+    """Test basic module functionality"""
+    
+    def test_module_imports(self):
+        """Module should import without errors"""
+        assert imctermite is not None
+    
+    def test_can_instantiate(self):
+        """Should create instance with valid file"""
+        sample_file = DATASET_A / "datasetA_1.raw"
+        if not sample_file.exists():
+            pytest.skip(f"Sample file not found: {sample_file}")
+        
+        imc = imctermite.imctermite(str(sample_file).encode())
+        assert imc is not None
+
+
+class TestChannelListing:
+    """Test channel metadata retrieval"""
+    
+    @pytest.fixture
+    def imc_instance(self):
+        """Create IMC instance with sample file"""
+        sample_file = DATASET_A / "datasetA_1.raw"
+        if not sample_file.exists():
+            pytest.skip(f"Sample file not found: {sample_file}")
+        return imctermite.imctermite(str(sample_file).encode())
+    
+    def test_get_channel_list(self, imc_instance):
+        """Should return list of channel metadata"""
+        channels = imc_instance.get_channels(include_data=False)
+        assert isinstance(channels, list)
+        assert len(channels) > 0
+    
+    def test_channel_metadata_structure(self, imc_instance):
+        """Channel metadata should have required fields"""
+        channels = imc_instance.get_channels(include_data=False)
+        first_channel = channels[0]
+        
+        # Check for expected keys
+        required_keys = ['name', 'uuid']
+        for key in required_keys:
+            assert key in first_channel, f"Missing key: {key}"
+    
+    def test_get_channel_data(self, imc_instance):
+        """Should return channel data with xdata and ydata"""
+        channels = imc_instance.get_channels(include_data=True)
+        assert isinstance(channels, list)
+        assert len(channels) > 0
+        
+        first_channel = channels[0]
+        assert 'xdata' in first_channel
+        assert 'ydata' in first_channel
+        assert isinstance(first_channel['xdata'], list)
+        assert isinstance(first_channel['ydata'], list)
+        assert len(first_channel['xdata']) == len(first_channel['ydata'])
+
+
+class TestDataIntegrity:
+    """Test data extraction and validation"""
+    
+    @pytest.fixture
+    def sample_data(self):
+        """Load sample file and extract data"""
+        sample_file = DATASET_A / "datasetA_1.raw"
+        if not sample_file.exists():
+            pytest.skip(f"Sample file not found: {sample_file}")
+        
+        imc = imctermite.imctermite(str(sample_file).encode())
+        return imc.get_channels(include_data=True)
+    
+    def test_data_arrays_not_empty(self, sample_data):
+        """Data arrays should not be empty"""
+        for channel in sample_data:
+            assert len(channel['xdata']) > 0
+            assert len(channel['ydata']) > 0
+    
+    def test_data_values_are_numeric(self, sample_data):
+        """All data values should be numeric"""
+        for channel in sample_data:
+            for x in channel['xdata'][:10]:  # Check first 10
+                assert isinstance(x, (int, float))
+            for y in channel['ydata'][:10]:
+                assert isinstance(y, (int, float))
+    
+    def test_xdata_monotonic(self, sample_data):
+        """X-data (time) should be monotonically increasing"""
+        for channel in sample_data:
+            xdata = channel['xdata']
+            if len(xdata) > 1:
+                # Check if mostly increasing (allow small floating point issues)
+                increasing_count = sum(1 for i in range(len(xdata)-1) if xdata[i] <= xdata[i+1])
+                ratio = increasing_count / (len(xdata) - 1)
+                assert ratio > 0.95, f"X-data not monotonic enough: {ratio:.2%}"
+
+
+class TestCSVOutput:
+    """Test CSV file generation"""
+    
+    @pytest.fixture
+    def imc_instance(self):
+        """Create IMC instance"""
+        sample_file = DATASET_A / "datasetA_1.raw"
+        if not sample_file.exists():
+            pytest.skip(f"Sample file not found: {sample_file}")
+        return imctermite.imctermite(str(sample_file).encode())
+    
+    def test_print_channel_to_csv(self, imc_instance, tmp_path):
+        """Should create CSV file for single channel"""
+        output_file = tmp_path / "test_channel.csv"
+        
+        channels = imc_instance.get_channels(include_data=False)
+        if len(channels) == 0:
+            pytest.skip("No channels in sample file")
+        
+        channel_uuid = channels[0]['uuid']
+        imc_instance.print_channel(channel_uuid.encode(), str(output_file).encode(), b','[0])
+        
+        assert output_file.exists()
+        assert output_file.stat().st_size > 0
+    
+    def test_csv_format_valid(self, imc_instance, tmp_path):
+        """Generated CSV should be valid"""
+        output_file = tmp_path / "test_channel.csv"
+        
+        channels = imc_instance.get_channels(include_data=False)
+        if len(channels) == 0:
+            pytest.skip("No channels in sample file")
+        
+        channel_uuid = channels[0]['uuid']
+        imc_instance.print_channel(channel_uuid.encode(), str(output_file).encode(), b','[0])
+        
+        # Read and validate CSV
+        with open(output_file, 'r') as f:
+            reader = csv.reader(f)
+            rows = list(reader)
+            
+            assert len(rows) > 1, "CSV should have header and data"
+            assert len(rows[0]) == 2, "CSV should have 2 columns"
+            
+            # Check second row is numeric (first row is header with units)
+            if len(rows) > 1:
+                data_row = rows[1]
+                try:
+                    float(data_row[0])  # Should not raise
+                    float(data_row[1])  # Should not raise
+                except ValueError:
+                    # Maybe first row is header, try second data row
+                    if len(rows) > 2:
+                        data_row = rows[2]
+                        float(data_row[0])
+                        float(data_row[1])
+    
+    def test_print_all_channels(self, imc_instance, tmp_path):
+        """Should create CSV files for all channels"""
+        output_dir = tmp_path / "all_channels"
+        output_dir.mkdir()
+        
+        imc_instance.print_channels(str(output_dir).encode(), b','[0])
+        
+        csv_files = list(output_dir.glob("*.csv"))
+        assert len(csv_files) > 0, "Should generate at least one CSV file"
+
+
+class TestMultipleFiles:
+    """Test processing multiple sample files"""
+    
+    def test_process_all_sample_files(self):
+        """Should process all .raw and .dat files in samples directory (metadata only)"""
+        if not SAMPLES_DIR.exists():
+            pytest.skip(f"Samples directory not found: {SAMPLES_DIR}")
+        
+        # Get all .raw and .dat files recursively
+        files_to_test = sorted(list(SAMPLES_DIR.glob("*.raw")) + 
+                               list(SAMPLES_DIR.glob("*.dat")) +
+                               list(SAMPLES_DIR.glob("**/*.raw")) + 
+                               list(SAMPLES_DIR.glob("**/*.dat")))
+        # Remove duplicates (files in root will be in both patterns)
+        files_to_test = sorted(set(files_to_test))
+        
+        if len(files_to_test) == 0:
+            pytest.skip("No .raw or .dat files in samples directory")
+        
+        successful = 0
+        failed = []
+        for sample_file in files_to_test:
+            try:
+                imc = imctermite.imctermite(str(sample_file).encode())
+                channels = imc.get_channels(include_data=False)
+                if len(channels) > 0:
+                    successful += 1
+            except Exception as e:
+                failed.append(f"{sample_file.relative_to(SAMPLES_DIR)}: {e}")
+        
+        assert len(failed) == 0, f"Failed to process {len(failed)}/{len(files_to_test)} files: {failed}"
+        assert successful == len(files_to_test), f"Only {successful}/{len(files_to_test)} files had channels"
+    
+    def test_extract_all_sample_files_with_data(self):
+        """Should fully extract all .raw and .dat files with data"""
+        if not SAMPLES_DIR.exists():
+            pytest.skip(f"Samples directory not found: {SAMPLES_DIR}")
+        
+        # Get all .raw and .dat files recursively
+        files_to_test = sorted(list(SAMPLES_DIR.glob("*.raw")) + 
+                               list(SAMPLES_DIR.glob("*.dat")) +
+                               list(SAMPLES_DIR.glob("**/*.raw")) + 
+                               list(SAMPLES_DIR.glob("**/*.dat")))
+        files_to_test = sorted(set(files_to_test))
+        
+        if len(files_to_test) == 0:
+            pytest.skip("No .raw or .dat files in samples directory")
+        
+        successful = 0
+        failed = []
+        for sample_file in files_to_test:
+            try:
+                imc = imctermite.imctermite(str(sample_file).encode())
+                channels = imc.get_channels(include_data=True)
+                
+                # Verify we got data
+                if len(channels) > 0:
+                    # Check that at least one channel has actual data (xdata or ydata)
+                    has_data = False
+                    for channel in channels:
+                        if ('xdata' in channel and len(channel['xdata']) > 0) or \
+                           ('ydata' in channel and len(channel['ydata']) > 0):
+                            has_data = True
+                            break
+                    
+                    if has_data:
+                        successful += 1
+                    else:
+                        failed.append(f"{sample_file.relative_to(SAMPLES_DIR)}: no data in channels")
+                else:
+                    failed.append(f"{sample_file.relative_to(SAMPLES_DIR)}: no channels found")
+            except Exception as e:
+                failed.append(f"{sample_file.relative_to(SAMPLES_DIR)}: {e}")
+        
+        assert len(failed) == 0, f"Failed to extract data from {len(failed)}/{len(files_to_test)} files: {failed}"
+        assert successful == len(files_to_test), f"Only {successful}/{len(files_to_test)} files extracted with data"
+    
+    def test_reload_different_file(self):
+        """Should be able to load different files sequentially"""
+        file1 = DATASET_A / "datasetA_1.raw"
+        file2 = DATASET_A / "datasetA_2.raw"
+        
+        if not (file1.exists() and file2.exists()):
+            pytest.skip("Need at least 2 sample files")
+        
+        # Load first file
+        imc1 = imctermite.imctermite(str(file1).encode())
+        channels1 = imc1.get_channels(include_data=False)
+        
+        # Load second file
+        imc2 = imctermite.imctermite(str(file2).encode())
+        channels2 = imc2.get_channels(include_data=False)
+        
+        # Both should work
+        assert len(channels1) > 0
+        assert len(channels2) > 0
+
+
+class TestDataRegression:
+    """Test specific known values to catch parsing regressions"""
+    
+    @pytest.mark.parametrize("file_path,expected", [
+        # datasetA_1.raw - Standard .raw format with gravity unit
+        ("datasetA/datasetA_1.raw", {
+            'num_channels': 1,
+            'data_length': 6000,
+            'yunit': 'G',
+            'xstepwidth': 0.005,
+            'ydata_first': [0.010029276, 0.015780726],
+            'ydata_last': [-0.02981583, -0.030068753],  # [-2], [-1]
+            'xdata_first': [416.01],
+        }),
+        # sampleA.raw - Pressure data with mbar units
+        ("sampleA.raw", {
+            'num_channels': 1,
+            'data_length': 2402,
+            'yunit': '"mbar"',
+            'xoffset': 2044.03,
+            'ydata_first': [956.013793945, 955.484924316, 955.487670898],
+            'ydata_last': [866.840881348, 866.91619873, 866.985290527],  # [-3], [-2], [-1]
+        }),
+        # XY_dataset_example.dat - Different .dat format with explicit X-Y data
+        ("XY_dataset_example.dat", {
+            'num_channels': 1,
+            'data_length': 13094,
+            'ydata_first': [0, 0, 0],
+            'ydata_last': [2796202, 2796202, 2982616],  # [-3], [-2], [-1]
+            'xdata_first': [67.855759, 67.880796],
+            'xdata_last': [395.158317],
+        }),
+    ])
+    def test_known_values(self, file_path, expected):
+        """Verify known values from sample files to catch parsing regressions"""
+        sample_file = SAMPLES_DIR / file_path
+        if not sample_file.exists():
+            pytest.skip(f"Sample file not found: {sample_file}")
+        
+        imc = imctermite.imctermite(str(sample_file).encode())
+        channels = imc.get_channels(include_data=True)
+        
+        # Check number of channels
+        assert len(channels) == expected['num_channels'], \
+            f"Should have {expected['num_channels']} channel(s)"
+        
+        ch = channels[0]
+        
+        # Verify data length
+        ydata = ch.get('ydata', [])
+        assert len(ydata) == expected['data_length'], \
+            f"Should have {expected['data_length']} data points"
+        
+        # Verify metadata if specified
+        if 'yunit' in expected:
+            assert ch.get('yunit') == expected['yunit'], \
+                f"Unit should be {expected['yunit']}"
+        
+        if 'xstepwidth' in expected:
+            assert abs(float(ch.get('xstepwidth')) - expected['xstepwidth']) < 1e-9, \
+                f"X step width should be {expected['xstepwidth']}"
+        
+        if 'xoffset' in expected:
+            assert abs(float(ch.get('xoffset')) - expected['xoffset']) < 1e-9, \
+                f"X offset should be {expected['xoffset']}"
+        
+        # Verify ydata first values
+        tolerance = 1e-6  # Default tolerance for floating-point comparisons
+        for i, expected_val in enumerate(expected['ydata_first']):
+            if isinstance(expected_val, float):
+                assert abs(ydata[i] - expected_val) < tolerance, \
+                    f"ydata[{i}] should be {expected_val}"
+            else:
+                assert ydata[i] == expected_val, \
+                    f"ydata[{i}] should be {expected_val}"
+        
+        # Verify ydata last values
+        for i, expected_val in enumerate(expected['ydata_last']):
+            idx = -(len(expected['ydata_last']) - i)
+            if isinstance(expected_val, float):
+                assert abs(ydata[idx] - expected_val) < tolerance, \
+                    f"ydata[{idx}] should be {expected_val}"
+            else:
+                assert ydata[idx] == expected_val, \
+                    f"ydata[{idx}] should be {expected_val}"
+        
+        # Verify xdata if specified
+        if 'xdata_first' in expected:
+            xdata = ch.get('xdata', [])
+            for i, expected_val in enumerate(expected['xdata_first']):
+                assert abs(xdata[i] - expected_val) < tolerance, \
+                    f"xdata[{i}] should be {expected_val}"
+        
+        if 'xdata_last' in expected:
+            xdata = ch.get('xdata', [])
+            for i, expected_val in enumerate(expected['xdata_last']):
+                idx = -(len(expected['xdata_last']) - i)
+                assert abs(xdata[idx] - expected_val) < tolerance, \
+                    f"xdata[{idx}] should be {expected_val}"
+
+
+class TestErrorHandling:
+    """Test error conditions"""
+    
+    def test_nonexistent_file(self):
+        """Should raise error for nonexistent file"""
+        with pytest.raises(Exception):
+            imctermite.imctermite(b"/nonexistent/file.raw")
+    
+    def test_invalid_channel_name(self):
+        """Should handle invalid channel name gracefully"""
+        sample_file = DATASET_A / "datasetA_1.raw"
+        if not sample_file.exists():
+            pytest.skip(f"Sample file not found: {sample_file}")
+        
+        imc = imctermite.imctermite(str(sample_file).encode())
+        
+        # This should either raise or return empty - both are acceptable
+        try:
+            with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
+                output_file = f.name
+            
+            imc.print_channel(b"NONEXISTENT_CHANNEL_UUID", output_file.encode(), b','[0])
+            
+            # If it didn't raise, check if file is empty or has minimal content
+            if os.path.exists(output_file):
+                size = os.path.getsize(output_file)
+                # Either file doesn't exist or is very small (just header)
+                assert size < 100
+        except Exception:
+            # Raising an exception is also acceptable behavior
+            pass
+        finally:
+            if os.path.exists(output_file):
+                os.unlink(output_file)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

From 574393c1d0c6123954788f4e312419dcb25e323f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <janole@goedeke.de>
Date: Sun, 14 Dec 2025 22:01:20 +0100
Subject: [PATCH 02/12] Implement chunked NumPy export for large files and add
 corresponding example and tests

---
 README.md                             | 22 ++++++++
 lib/imc_raw.hpp                       | 62 ++++++++++++++++++++++
 python/examples/usage_numpy_chunks.py | 56 ++++++++++++++++++++
 python/imctermite.pxd                 | 13 +++++
 python/imctermite.pyx                 | 32 +++++++++++-
 python/setup.py                       |  2 +
 tests/test_python.py                  | 75 +++++++++++++++++++++++----
 7 files changed, 251 insertions(+), 11 deletions(-)
 create mode 100644 python/examples/usage_numpy_chunks.py

diff --git a/README.md b/README.md
index f1b175d..31db211 100644
--- a/README.md
+++ b/README.md
@@ -219,6 +219,28 @@ A more complete [example](python/examples/usage.py), including the methods for
 obtaining the channels, i.a. their data and/or directly printing them to files,
 can be found in the `python/examples` folder.
 
+### Chunked NumPy export (fast path)
+
+For large files, you can iterate over channel data in chunks as NumPy arrays. This avoids creating large Python lists and allows for streaming processing (e.g. writing to Parquet).
+
+```python
+import imctermite
+import numpy as np
+
+imcraw = imctermite.imctermite(b"samples/large_file.raw")
+channels = imcraw.get_channels(False)
+uuid = channels[0]['uuid'].encode('utf-8')
+
+# Iterate over channel data in chunks of 1 million samples
+for chunk in imcraw.iter_channel_numpy(uuid, include_x=True, chunk_rows=1_000_000):
+    y_data = chunk['y'] # NumPy array
+    x_data = chunk.get('x') # NumPy array (if include_x=True)
+    start_index = chunk['start']
+    
+    # Process chunk (e.g. write to parquet)
+    print(f"Processed chunk starting at {start_index}, size {len(y_data)}")
+```
+
 ## Testing
 
 Run end-to-end tests: `make test`
diff --git a/lib/imc_raw.hpp b/lib/imc_raw.hpp
index ee77daa..379bf7b 100644
--- a/lib/imc_raw.hpp
+++ b/lib/imc_raw.hpp
@@ -19,6 +19,14 @@
 
 namespace imc
 {
+  struct channel_chunk {
+    std::vector<double> x;
+    std::vector<double> y;
+    unsigned long int start;
+    unsigned long int count;
+    bool has_x;
+  };
+
   class raw
   {
     // (path of) raw-file and its basename
@@ -393,6 +401,60 @@ namespace imc
       return channels;
     }
 
+    // get length of a channel
+    unsigned long int get_channel_length(std::string uuid)
+    {
+      if ( channels_.count(uuid) )
+      {
+        return (unsigned long int)channels_.at(uuid).ydata_.size();
+      }
+      else
+      {
+        throw std::runtime_error(std::string("channel does not exist:") + uuid);
+      }
+    }
+
+    // read a chunk of channel data
+    channel_chunk read_channel_chunk(std::string uuid, unsigned long int start, unsigned long int count, bool include_x)
+    {
+      if ( !channels_.count(uuid) )
+      {
+        throw std::runtime_error(std::string("channel does not exist:") + uuid);
+      }
+
+      imc::channel& ch = channels_.at(uuid);
+      unsigned long int total_len = ch.ydata_.size();
+
+      if ( start >= total_len )
+      {
+         return { {}, {}, start, 0, include_x };
+      }
+
+      unsigned long int end = start + count;
+      if ( end > total_len ) end = total_len;
+      unsigned long int actual_count = end - start;
+
+      channel_chunk chunk;
+      chunk.start = start;
+      chunk.count = actual_count;
+      chunk.has_x = include_x;
+      chunk.y.reserve(actual_count);
+      if (include_x) chunk.x.reserve(actual_count);
+
+      for (unsigned long int i = 0; i < actual_count; ++i)
+      {
+        chunk.y.push_back(ch.ydata_[start + i].as_double());
+        if (include_x)
+        {
+           if (start + i < ch.xdata_.size())
+             chunk.x.push_back(ch.xdata_[start + i].as_double());
+           else
+             chunk.x.push_back(0.0);
+        }
+      }
+      return chunk;
+    }
+
     // print single specific channel
     void print_channel(std::string channeluuid, std::string outputfile, const char sep)
     {
diff --git a/python/examples/usage_numpy_chunks.py b/python/examples/usage_numpy_chunks.py
new file mode 100644
index 0000000..b111b5d
--- /dev/null
+++ b/python/examples/usage_numpy_chunks.py
@@ -0,0 +1,56 @@
+
+import imctermite
+import json
+import os
+import numpy as np
+
+# Path to a sample file
+raw_file = b"samples/datasetA/datasetA_1.raw"
+if not os.path.exists(raw_file):
+    print(f"Sample file {raw_file} not found.")
+    exit(1)
+
+print(f"Loading {raw_file}")
+
+try:
+    imcraw = imctermite.imctermite(raw_file)
+except RuntimeError as e:
+    print(f"Failed to load/parse raw-file: {e}")
+    exit(1)
+
+# Get channels metadata
+channels = imcraw.get_channels(False)
+if not channels:
+    print("No channels found.")
+    exit(0)
+
+# Pick the first channel
+first_channel_uuid = channels[0]['uuid'].encode('utf-8')
+print(f"Iterating over channel {first_channel_uuid}")
+
+# Iterate in chunks
+total_rows = 0
+chunk_size = 100
+
+for chunk in imcraw.iter_channel_numpy(first_channel_uuid, include_x=True, chunk_rows=chunk_size):
+    start = chunk['start']
+    y = chunk['y']
+    x = chunk.get('x')
+    
+    count = len(y)
+    total_rows += count
+    
+    print(f"Chunk start={start}, count={count}, y_shape={y.shape}, y_dtype={y.dtype}")
+    if x is not None:
+        print(f"  x_shape={x.shape}, x_dtype={x.dtype}")
+        
+    # Verify data (optional, just checking first few values)
+    if start == 0 and count > 0:
+        print(f"  First y value: {y[0]}")
+
+    # Here you could write the chunk to a Parquet file using pyarrow or fastparquet
+    # e.g.
+    # table = pa.Table.from_pydict({"x": x, "y": y})
+    # pq.write_table(table, output_file)
+
+print(f"Total rows read: {total_rows}")
diff --git a/python/imctermite.pxd b/python/imctermite.pxd
index f76521e..cbeae15 100644
--- a/python/imctermite.pxd
+++ b/python/imctermite.pxd
@@ -6,6 +6,13 @@ from libcpp cimport bool
 
 cdef extern from "lib/imc_raw.hpp" namespace "imc":
 
+  cdef struct channel_chunk:
+    vector[double] x
+    vector[double] y
+    unsigned long int start
+    unsigned long int count
+    bool has_x
+
   cdef cppclass cppimctermite "imc::raw":
 
     # constructor(s)
@@ -18,6 +25,12 @@ cdef extern from "lib/imc_raw.hpp" namespace "imc":
     # get JSON list of channels
     vector[string] get_channels(bool json, bool data) except +
 
+    # get length of a channel
+    unsigned long int get_channel_length(string uuid) except +
+
+    # read a chunk of channel data
+    channel_chunk read_channel_chunk(string uuid, unsigned long int start, unsigned long int count, bool include_x) except +
+
     # print single channel/all channels
     void print_channel(string channeluuid, string outputdir, char delimiter) except +
     void print_channels(string outputdir, char delimiter) except +
diff --git a/python/imctermite.pyx b/python/imctermite.pyx
index 3bbc7fa..4c27e0b 100644
--- a/python/imctermite.pyx
+++ b/python/imctermite.pyx
@@ -1,7 +1,9 @@
 # distutils: language = c++
 # cython: language_level = 3
 
-from imctermite cimport cppimctermite
+from imctermite cimport cppimctermite, channel_chunk
+cimport numpy as cnp
+import numpy as np
 
 import json as jn
 import decimal
@@ -35,6 +37,34 @@ cdef class imctermite:
     chnlstjn = [jn.loads(chn.decode(get_codepage(chn),errors="ignore")) for chn in chnlst]
     return chnlstjn
 
+  def iter_channel_numpy(self, string channeluuid, bool include_x=True, int chunk_rows=1000000):
+    cdef unsigned long int total_len = self.cppimc.get_channel_length(channeluuid)
+    cdef unsigned long int start = 0
+    cdef channel_chunk chunk
+    cdef cnp.ndarray x_arr
+    cdef cnp.ndarray y_arr
+
+    while start < total_len:
+        chunk = self.cppimc.read_channel_chunk(channeluuid, start, chunk_rows, include_x)
+        
+        # Create numpy arrays from vectors
+        y_arr = np.array(chunk.y, dtype=np.float64)
+        
+        result = {
+            "start": chunk.start,
+            "y": y_arr
+        }
+        
+        if include_x:
+            x_arr = np.array(chunk.x, dtype=np.float64)
+            result["x"] = x_arr
+            
+        yield result
+        
+        start += chunk.count
+        if chunk.count == 0:
+            break
+
   # print single channel/all channels
   def print_channel(self, string channeluuid, string outputfile, char delimiter):
     self.cppimc.print_channel(channeluuid,outputfile,delimiter)
diff --git a/python/setup.py b/python/setup.py
index 98ebef7..8d2e8a2 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -1,6 +1,7 @@
 from setuptools import Extension, setup
 from Cython.Build import cythonize
 import sys
+import numpy
 
 print("building on platform: "+sys.platform)
 
@@ -13,6 +14,7 @@
 extension = Extension(
     "imctermite",
     sources=["imctermite.pyx"],
+    include_dirs=[numpy.get_include()],
     extra_compile_args=cmpArgs[sys.platform]
 )
 
diff --git a/tests/test_python.py b/tests/test_python.py
index 7470709..ee8e654 100644
--- a/tests/test_python.py
+++ b/tests/test_python.py
@@ -7,6 +7,7 @@
 import os
 import tempfile
 import csv
+import numpy as np
 from pathlib import Path
 
 try:
@@ -104,17 +105,71 @@ def test_data_values_are_numeric(self, sample_data):
                 assert isinstance(x, (int, float))
             for y in channel['ydata'][:10]:
                 assert isinstance(y, (int, float))
-    
-    def test_xdata_monotonic(self, sample_data):
-        """X-data (time) should be monotonically increasing"""
-        for channel in sample_data:
-            xdata = channel['xdata']
-            if len(xdata) > 1:
-                # Check if mostly increasing (allow small floating point issues)
-                increasing_count = sum(1 for i in range(len(xdata)-1) if xdata[i] <= xdata[i+1])
-                ratio = increasing_count / (len(xdata) - 1)
-                assert ratio > 0.95, f"X-data not monotonic enough: {ratio:.2%}"
+            for val in channel['ydata']:
+                assert isinstance(val, (int, float))
+
+class TestChunkedNumpy:
+    """Test chunked NumPy API"""
 
+    def test_chunked_iteration_all_samples(self):
+        """Verify chunked iteration against get_channels for all samples"""
+        
+        raw_files = list(DATASET_A.glob("*.raw")) + list(DATASET_B.glob("*.raw"))
+        # Sort for deterministic order
+        raw_files.sort()
+        
+        for raw_file in raw_files:
+            # print(f"Testing {raw_file.name}")
+            try:
+                imc = imctermite.imctermite(str(raw_file).encode())
+                
+                # Get reference data
+                channels_ref = imc.get_channels(include_data=True)
+                
+                for ch_ref in channels_ref:
+                    uuid = ch_ref['uuid'].encode('utf-8')
+                    
+                    # Test with include_x=True
+                    y_chunks = []
+                    x_chunks = []
+                    
+                    # Use a small chunk size to ensure we test chunking logic even on small files
+                    # Some files might be very small, so 100 is a good stress test
+                    for chunk in imc.iter_channel_numpy(uuid, include_x=True, chunk_rows=100):
+                        y_chunks.append(chunk['y'])
+                        x_chunks.append(chunk['x'])
+                    
+                    if not y_chunks:
+                        assert len(ch_ref['ydata']) == 0
+                        continue
+                        
+                    y_full = np.concatenate(y_chunks)
+                    x_full = np.concatenate(x_chunks)
+                    
+                    # Compare with reference
+                    # Note: get_channels returns lists of floats. 
+                    # We compare them with numpy arrays.
+                    
+                    # Check lengths first
+                    assert len(y_full) == len(ch_ref['ydata']), f"Length mismatch in {raw_file.name} channel {uuid}"
+                    
+                    # Check values
+                    assert np.allclose(y_full, ch_ref['ydata'], equal_nan=True), f"Y data mismatch in {raw_file.name} channel {uuid}"
+                    assert np.allclose(x_full, ch_ref['xdata'], equal_nan=True), f"X data mismatch in {raw_file.name} channel {uuid}"
+                    
+                    # Test with include_x=False
+                    y_chunks_nox = []
+                    for chunk in imc.iter_channel_numpy(uuid, include_x=False, chunk_rows=100):
+                        y_chunks_nox.append(chunk['y'])
+                        assert 'x' not in chunk
+                    
+                    if y_chunks_nox:
+                        y_full_nox = np.concatenate(y_chunks_nox)
+                        assert np.allclose(y_full_nox, ch_ref['ydata'], equal_nan=True), f"Y data mismatch (no x) in {raw_file.name} channel {uuid}"
+            
+            except Exception as e:
+                pytest.fail(f"Failed processing {raw_file.name}: {str(e)}")
+    
 
 class TestCSVOutput:
     """Test CSV file generation"""

From 393240ba5513e72a74b0e7c1f78ee8ea1925ff74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <janole@goedeke.de>
Date: Sun, 14 Dec 2025 23:09:40 +0100
Subject: [PATCH 03/12] Enhance channel data handling with raw mode support and
 update examples for chunked NumPy export

---
 README.md                             |  20 +---
 lib/imc_channel.hpp                   |   3 +
 lib/imc_raw.hpp                       | 135 +++++++++++++++++++++++---
 python/examples/usage_numpy_chunks.py |  71 ++++++++++----
 python/imctermite.pxd                 |  11 ++-
 python/imctermite.pyx                 |  44 ++++++++-
 tests/test_python.py                  |  36 ++++---
 7 files changed, 243 insertions(+), 77 deletions(-)

diff --git a/README.md b/README.md
index 31db211..0436816 100644
--- a/README.md
+++ b/README.md
@@ -221,25 +221,7 @@ can be found in the `python/examples` folder.
 
 ### Chunked NumPy export (fast path)
 
-For large files, you can iterate over channel data in chunks as NumPy arrays. This avoids creating large Python lists and allows for streaming processing (e.g. writing to Parquet).
-
-```python
-import imctermite
-import numpy as np
-
-imcraw = imctermite.imctermite(b"samples/large_file.raw")
-channels = imcraw.get_channels(False)
-uuid = channels[0]['uuid'].encode('utf-8')
-
-# Iterate over channel data in chunks of 1 million samples
-for chunk in imcraw.iter_channel_numpy(uuid, include_x=True, chunk_rows=1_000_000):
-    y_data = chunk['y'] # NumPy array
-    x_data = chunk.get('x') # NumPy array (if include_x=True)
-    start_index = chunk['start']
-    
-    # Process chunk (e.g. write to parquet)
-    print(f"Processed chunk starting at {start_index}, size {len(y_data)}")
-```
+For large files, you can iterate over channel data in chunks as NumPy arrays. This avoids creating large Python lists and allows for streaming processing (e.g. writing to Parquet). See [`python/examples/usage_numpy_chunks.py`](python/examples/usage_numpy_chunks.py) for a complete example.
 
 ## Testing
 
diff --git a/lib/imc_channel.hpp b/lib/imc_channel.hpp
index 6e19e1c..0a83f0a 100644
--- a/lib/imc_channel.hpp
+++ b/lib/imc_channel.hpp
@@ -716,12 +716,15 @@ namespace imc
              <<"\",\"codepage\":\""<<codepage_
              <<"\",\"yname\":\""<<prepjsonstr(yname_)
              <<"\",\"yunit\":\""<<prepjsonstr(yunit_)
+             <<"\",\"datatype\":\""<<ydatatp_
              <<"\",\"significantbits\":\""<<ysignbits_
              <<"\",\"buffer-size\":\""<<ybuffer_size_
              <<"\",\"xname\":\""<<prepjsonstr(xname_)
              <<"\",\"xunit\":\""<<prepjsonstr(xunit_)
              <<"\",\"xstepwidth\":\""<<xstepwidth_
              <<"\",\"xoffset\":\""<<xstart_
+             <<"\",\"factor\":\""<<yfactor_
+             <<"\",\"offset\":\""<<yoffset_
              <<"\",\"group\":{"<<"\"index\":\""<<group_index_
                                <<"\",\"name\":\""<<group_name_
                                <<"\",\"comment\":\""<<group_comment_<<"\""<<"}";
diff --git a/lib/imc_raw.hpp b/lib/imc_raw.hpp
index 379bf7b..77804db 100644
--- a/lib/imc_raw.hpp
+++ b/lib/imc_raw.hpp
@@ -20,11 +20,13 @@
 namespace imc
 {
   struct channel_chunk {
-    std::vector<double> x;
-    std::vector<double> y;
+    std::vector<unsigned char> x_bytes;
+    std::vector<unsigned char> y_bytes;
     unsigned long int start;
     unsigned long int count;
     bool has_x;
+    int x_type;
+    int y_type;
   };
 
   class raw
@@ -414,8 +416,21 @@ namespace imc
       }
     }
 
+    // get numeric type of a channel
+    int get_channel_numeric_type(std::string uuid)
+    {
+      if ( channels_.count(uuid) )
+      {
+        return (int)channels_.at(uuid).ydatatp_;
+      }
+      else
+      {
+        throw std::runtime_error(std::string("channel does not exist:") + uuid);
+      }
+    }
+
     // read a chunk of channel data
-    channel_chunk read_channel_chunk(std::string uuid, unsigned long int start, unsigned long int count, bool include_x)
+    channel_chunk read_channel_chunk(std::string uuid, unsigned long int start, unsigned long int count, bool include_x, bool raw_mode)
     {
       if ( !channels_.count(uuid) )
       {
@@ -427,7 +442,7 @@ namespace imc
 
       if ( start >= total_len )
       {
-         return { {}, {}, start, 0, include_x };
+         return { {}, {}, start, 0, include_x, 0, 0 };
       }
 
       unsigned long int end = start + count;
@@ -438,20 +453,108 @@ namespace imc
       chunk.start = start;
       chunk.count = actual_count;
       chunk.has_x = include_x;
-      chunk.y.reserve(actual_count);
-      if (include_x) chunk.x.reserve(actual_count);
 
-      for (unsigned long int i = 0; i < actual_count; ++i)
-      {
-        chunk.y.push_back(ch.ydata_[start + i].as_double());
-        if (include_x)
-        {
-           if (start + i < ch.xdata_.size())
-             chunk.x.push_back(ch.xdata_[start + i].as_double());
-           else
-             chunk.x.push_back(0.0);
-        }
+      // Handle Y data
+      if (raw_mode) {
+          // Raw mode: read bytes directly from buffer
+          int type = (int)ch.ydatatp_;
+          unsigned long int bytes_per_sample = ch.ysignbits_ / 8;
+          
+          if (mapblocks_.count(ch.chnenv_.CSuuid_) == 0) {
+              throw std::runtime_error("CS block not found for channel");
+          }
+          imc::block& cs_block = mapblocks_.at(ch.chnenv_.CSuuid_);
+          std::vector<imc::parameter> prms = cs_block.get_parameters();
+          if (prms.size() < 4) throw std::runtime_error("Invalid CS block parameters");
+          unsigned long int buffstrt = prms[3].begin();
+          
+          unsigned long int abs_start = buffstrt + ch.ybuffer_offset_ + 1 + start * bytes_per_sample;
+          unsigned long int byte_count = actual_count * bytes_per_sample;
+          
+          if (abs_start + byte_count > buffer_.size()) {
+               throw std::runtime_error("Buffer read out of bounds");
+          }
+          
+          if (type == 13) { // six_byte_unsigned_long -> promote to 8 byte (uint64)
+              chunk.y_type = 13; // Keep original type ID, but data is promoted
+              chunk.y_bytes.resize(actual_count * 8);
+              uint64_t* dest = reinterpret_cast<uint64_t*>(chunk.y_bytes.data());
+              
+              for (unsigned long int i = 0; i < actual_count; ++i) {
+                  unsigned long int src_idx = abs_start + i * 6;
+                  uint64_t val = 0;
+                  // Assuming Little Endian storage in file
+                  for (int b = 0; b < 6; ++b) {
+                      val |= (uint64_t)buffer_[src_idx + b] << (b * 8);
+                  }
+                  dest[i] = val;
+              }
+          } else {
+              chunk.y_type = type;
+              chunk.y_bytes.resize(byte_count);
+              std::copy(buffer_.begin() + abs_start, buffer_.begin() + abs_start + byte_count, chunk.y_bytes.begin());
+          }
+      } else {
+          // Scaled mode: convert to double
+          chunk.y_type = 8; // imc::numtype::ddouble
+          chunk.y_bytes.resize(actual_count * sizeof(double));
+          double* ptr = reinterpret_cast<double*>(chunk.y_bytes.data());
+          
+          for (unsigned long int i = 0; i < actual_count; ++i) {
+              ptr[i] = ch.ydata_[start + i].as_double();
+          }
+      }
+
+      // Handle X data
+      if (include_x) {
+          if (ch.dimension_ == 2 && raw_mode) {
+              // XY channel, raw mode
+              int type = (int)ch.xdatatp_;
+              unsigned long int bytes_per_sample = ch.xsignbits_ / 8;
+              
+              imc::block& cs_block = mapblocks_.at(ch.chnenv_.CSuuid_);
+              std::vector<imc::parameter> prms = cs_block.get_parameters();
+              unsigned long int buffstrt = prms[3].begin();
+              
+              unsigned long int abs_start = buffstrt + ch.xbuffer_offset_ + 1 + start * bytes_per_sample;
+              unsigned long int byte_count = actual_count * bytes_per_sample;
+              
+              if (abs_start + byte_count > buffer_.size()) {
+                   throw std::runtime_error("Buffer read out of bounds (X)");
+              }
+              
+              if (type == 13) { // six_byte_unsigned_long -> promote to 8 byte
+                  chunk.x_type = 13; // Keep original type ID
+                  chunk.x_bytes.resize(actual_count * 8);
+                  uint64_t* dest = reinterpret_cast<uint64_t*>(chunk.x_bytes.data());
+                  for (unsigned long int i = 0; i < actual_count; ++i) {
+                      unsigned long int src_idx = abs_start + i * 6;
+                      uint64_t val = 0;
+                      for (int b = 0; b < 6; ++b) {
+                          val |= (uint64_t)buffer_[src_idx + b] << (b * 8);
+                      }
+                      dest[i] = val;
+                  }
+              } else {
+                  chunk.x_type = type;
+                  chunk.x_bytes.resize(byte_count);
+                  std::copy(buffer_.begin() + abs_start, buffer_.begin() + abs_start + byte_count, chunk.x_bytes.begin());
+              }
+          } else {
+              // Generated X or scaled X
+              chunk.x_type = 8; // imc::numtype::ddouble
+              chunk.x_bytes.resize(actual_count * sizeof(double));
+              double* ptr = reinterpret_cast<double*>(chunk.x_bytes.data());
+              
+              for (unsigned long int i = 0; i < actual_count; ++i) {
+                  if (start + i < ch.xdata_.size())
+                      ptr[i] = ch.xdata_[start + i].as_double();
+                  else
+                      ptr[i] = 0.0;
+              }
+          }
       }
+
       return chunk;
     }
 
diff --git a/python/examples/usage_numpy_chunks.py b/python/examples/usage_numpy_chunks.py
index b111b5d..6aeaf09 100644
--- a/python/examples/usage_numpy_chunks.py
+++ b/python/examples/usage_numpy_chunks.py
@@ -5,7 +5,8 @@
 import numpy as np
 
 # Path to a sample file
-raw_file = b"samples/datasetA/datasetA_1.raw"
+# Using sampleB.raw because it has integer data with scaling (factor=0.01, offset=327.68)
+raw_file = b"samples/sampleB.raw"
 if not os.path.exists(raw_file):
     print(f"Sample file {raw_file} not found.")
     exit(1)
@@ -25,14 +26,23 @@
     exit(0)
 
 # Pick the first channel
-first_channel_uuid = channels[0]['uuid'].encode('utf-8')
-print(f"Iterating over channel {first_channel_uuid}")
+# For sampleB.raw, channel 347 is the interesting one
+target_uuid = "347"
+channel_info = next((ch for ch in channels if ch['uuid'] == target_uuid), channels[0])
 
-# Iterate in chunks
+first_channel_uuid = channel_info['uuid'].encode('utf-8')
+print(f"Iterating over channel {first_channel_uuid} ({channel_info.get('name', 'unnamed')})")
+
+# Check native datatype
+if 'datatype' in channel_info:
+    print(f"Native IMC datatype ID: {channel_info['datatype']}")
+
+# Example 1: Scaled mode (default) - returns floats (physical units)
+print("\n--- Scaled Mode (Physical Units) ---")
 total_rows = 0
-chunk_size = 100
+chunk_size = 1000
 
-for chunk in imcraw.iter_channel_numpy(first_channel_uuid, include_x=True, chunk_rows=chunk_size):
+for chunk in imcraw.iter_channel_numpy(first_channel_uuid, include_x=True, chunk_rows=chunk_size, mode="scaled"):
     start = chunk['start']
     y = chunk['y']
     x = chunk.get('x')
@@ -40,17 +50,38 @@
     count = len(y)
     total_rows += count
     
-    print(f"Chunk start={start}, count={count}, y_shape={y.shape}, y_dtype={y.dtype}")
-    if x is not None:
-        print(f"  x_shape={x.shape}, x_dtype={x.dtype}")
-        
-    # Verify data (optional, just checking first few values)
-    if start == 0 and count > 0:
-        print(f"  First y value: {y[0]}")
-
-    # Here you could write the chunk to a Parquet file using pyarrow or fastparquet
-    # e.g.
-    # table = pa.Table.from_pydict({"x": x, "y": y})
-    # pq.write_table(table, output_file)
-
-print(f"Total rows read: {total_rows}")
+    if total_rows <= chunk_size * 2: # Print only first few chunks
+        print(f"Chunk start={start}, count={count}, y_shape={y.shape}, y_dtype={y.dtype}")
+        if x is not None:
+            print(f"  x_shape={x.shape}, x_dtype={x.dtype}")
+        if count > 0:
+            print(f"  First y value: {y[0]}")
+
+print(f"Total rows read (scaled): {total_rows}")
+
+# Example 2: Raw mode - returns native types (e.g. integers)
+print("\n--- Raw Mode (Native Types) ---")
+
+# Get scaling factors
+factor = float(channel_info.get('factor', 1.0))
+offset = float(channel_info.get('offset', 0.0))
+print(f"Scaling: factor={factor}, offset={offset}")
+
+total_rows = 0
+
+for chunk in imcraw.iter_channel_numpy(first_channel_uuid, include_x=True, chunk_rows=chunk_size, mode="raw"):
+    start = chunk['start']
+    y = chunk['y']
+    
+    count = len(y)
+    total_rows += count
+    
+    if total_rows <= chunk_size * 2:
+        print(f"Chunk start={start}, count={count}, y_shape={y.shape}, y_dtype={y.dtype}")
+        if count > 0:
+            raw_val = y[0]
+            scaled_val = raw_val * factor + offset
+            print(f"  First y value (raw): {raw_val}")
+            print(f"  First y value (manually scaled): {scaled_val}")
+
+print(f"Total rows read (raw): {total_rows}")
diff --git a/python/imctermite.pxd b/python/imctermite.pxd
index cbeae15..262f57a 100644
--- a/python/imctermite.pxd
+++ b/python/imctermite.pxd
@@ -7,11 +7,13 @@ from libcpp cimport bool
 cdef extern from "lib/imc_raw.hpp" namespace "imc":
 
   cdef struct channel_chunk:
-    vector[double] x
-    vector[double] y
+    vector[unsigned char] x_bytes
+    vector[unsigned char] y_bytes
     unsigned long int start
     unsigned long int count
     bool has_x
+    int x_type
+    int y_type
 
   cdef cppclass cppimctermite "imc::raw":
 
@@ -28,8 +30,11 @@ cdef extern from "lib/imc_raw.hpp" namespace "imc":
     # get length of a channel
     unsigned long int get_channel_length(string uuid) except +
 
+    # get numeric type of a channel
+    int get_channel_numeric_type(string uuid) except +
+
     # read a chunk of channel data
-    channel_chunk read_channel_chunk(string uuid, unsigned long int start, unsigned long int count, bool include_x) except +
+    channel_chunk read_channel_chunk(string uuid, unsigned long int start, unsigned long int count, bool include_x, bool raw_mode) except +
 
     # print single channel/all channels
     void print_channel(string channeluuid, string outputdir, char delimiter) except +
diff --git a/python/imctermite.pyx b/python/imctermite.pyx
index 4c27e0b..5618592 100644
--- a/python/imctermite.pyx
+++ b/python/imctermite.pyx
@@ -4,6 +4,7 @@
 from imctermite cimport cppimctermite, channel_chunk
 cimport numpy as cnp
 import numpy as np
+from libc.string cimport memcpy
 
 import json as jn
 import decimal
@@ -37,18 +38,44 @@ cdef class imctermite:
     chnlstjn = [jn.loads(chn.decode(get_codepage(chn),errors="ignore")) for chn in chnlst]
     return chnlstjn
 
-  def iter_channel_numpy(self, string channeluuid, bool include_x=True, int chunk_rows=1000000):
+  def iter_channel_numpy(self, string channeluuid, bool include_x=True, int chunk_rows=1000000, str mode="scaled"):
     cdef unsigned long int total_len = self.cppimc.get_channel_length(channeluuid)
     cdef unsigned long int start = 0
     cdef channel_chunk chunk
     cdef cnp.ndarray x_arr
     cdef cnp.ndarray y_arr
+    cdef bool raw_mode = (mode == "raw")
+    
+    # Map imc::numtype to numpy dtype
+    # Types 9 (imc_devices_transitional_recording) and 10 (timestamp_ascii) 
+    # are not currently supported by the underlying C++ library.
+    dtype_map = {
+        1: np.uint8,   # unsigned_byte
+        2: np.int8,    # signed_byte
+        3: np.uint16,  # unsigned_short
+        4: np.int16,   # signed_short
+        5: np.uint32,  # unsigned_long (imc_Ulongint is unsigned int (32-bit) on x86_64 usually)
+        6: np.int32,   # signed_long (imc_Slongint is signed int)
+        7: np.float32, # ffloat
+        8: np.float64, # ddouble
+        11: np.uint16, # two_byte_word_digital
+        12: np.uint64, # eight_byte_unsigned_long
+        13: np.uint64, # six_byte_unsigned_long (promoted to 8 bytes in C++)
+        14: np.int64   # eight_byte_signed_long
+    }
 
     while start < total_len:
-        chunk = self.cppimc.read_channel_chunk(channeluuid, start, chunk_rows, include_x)
+        chunk = self.cppimc.read_channel_chunk(channeluuid, start, chunk_rows, include_x, raw_mode)
         
-        # Create numpy arrays from vectors
-        y_arr = np.array(chunk.y, dtype=np.float64)
+        # Create numpy arrays from bytes
+        y_dtype = dtype_map.get(chunk.y_type, np.float64)
+        
+        y_arr = np.empty(chunk.count, dtype=y_dtype)
+             
+        if chunk.y_bytes.size() > 0:
+            memcpy(<void*> cnp.PyArray_DATA(y_arr), 
+                   <void*> chunk.y_bytes.data(), 
+                   chunk.y_bytes.size())
         
         result = {
             "start": chunk.start,
@@ -56,7 +83,14 @@ cdef class imctermite:
         }
         
         if include_x:
-            x_arr = np.array(chunk.x, dtype=np.float64)
+            x_dtype = dtype_map.get(chunk.x_type, np.float64)
+            x_arr = np.empty(chunk.count, dtype=x_dtype)
+            
+            if chunk.x_bytes.size() > 0:
+                memcpy(<void*> cnp.PyArray_DATA(x_arr), 
+                       <void*> chunk.x_bytes.data(), 
+                       chunk.x_bytes.size())
+            
             result["x"] = x_arr
             
         yield result
diff --git a/tests/test_python.py b/tests/test_python.py
index ee8e654..dd6a5df 100644
--- a/tests/test_python.py
+++ b/tests/test_python.py
@@ -114,9 +114,9 @@ class TestChunkedNumpy:
     def test_chunked_iteration_all_samples(self):
         """Verify chunked iteration against get_channels for all samples"""
         
-        raw_files = list(DATASET_A.glob("*.raw")) + list(DATASET_B.glob("*.raw"))
-        # Sort for deterministic order
-        raw_files.sort()
+        # Get all .raw and .dat files recursively
+        raw_files = sorted(list(SAMPLES_DIR.glob("**/*.raw")) + 
+                           list(SAMPLES_DIR.glob("**/*.dat")))
         
         for raw_file in raw_files:
             # print(f"Testing {raw_file.name}")
@@ -135,7 +135,7 @@ def test_chunked_iteration_all_samples(self):
                     
                     # Use a small chunk size to ensure we test chunking logic even on small files
                     # Some files might be very small, so 100 is a good stress test
-                    for chunk in imc.iter_channel_numpy(uuid, include_x=True, chunk_rows=100):
+                    for chunk in imc.iter_channel_numpy(uuid, include_x=True, chunk_rows=100, mode="scaled"):
                         y_chunks.append(chunk['y'])
                         x_chunks.append(chunk['x'])
                     
@@ -159,13 +159,28 @@ def test_chunked_iteration_all_samples(self):
                     
                     # Test with include_x=False
                     y_chunks_nox = []
-                    for chunk in imc.iter_channel_numpy(uuid, include_x=False, chunk_rows=100):
+                    for chunk in imc.iter_channel_numpy(uuid, include_x=False, chunk_rows=100, mode="scaled"):
                         y_chunks_nox.append(chunk['y'])
                         assert 'x' not in chunk
                     
                     if y_chunks_nox:
                         y_full_nox = np.concatenate(y_chunks_nox)
                         assert np.allclose(y_full_nox, ch_ref['ydata'], equal_nan=True), f"Y data mismatch (no x) in {raw_file.name} channel {uuid}"
+
+                    # Test raw mode (basic check that it runs and returns correct length)
+                    # We can't easily verify values without reimplementing the scaling logic,
+                    # but we can check that it returns something valid.
+                    y_chunks_raw = []
+                    for chunk in imc.iter_channel_numpy(uuid, include_x=False, chunk_rows=100, mode="raw"):
+                        y_chunks_raw.append(chunk['y'])
+                        # Check that dtype is not float64 unless it really is float data
+                        # Most samples are likely int16 or similar
+                        # print(f"Raw dtype: {chunk['y'].dtype}")
+                    
+                    if y_chunks_raw:
+                        y_full_raw = np.concatenate(y_chunks_raw)
+                        assert len(y_full_raw) == len(ch_ref['ydata']), f"Raw length mismatch in {raw_file.name} channel {uuid}"
+
             
             except Exception as e:
                 pytest.fail(f"Failed processing {raw_file.name}: {str(e)}")
@@ -248,12 +263,8 @@ def test_process_all_sample_files(self):
             pytest.skip(f"Samples directory not found: {SAMPLES_DIR}")
         
         # Get all .raw and .dat files recursively
-        files_to_test = sorted(list(SAMPLES_DIR.glob("*.raw")) + 
-                               list(SAMPLES_DIR.glob("*.dat")) +
-                               list(SAMPLES_DIR.glob("**/*.raw")) + 
+        files_to_test = sorted(list(SAMPLES_DIR.glob("**/*.raw")) + 
                                list(SAMPLES_DIR.glob("**/*.dat")))
-        # Remove duplicates (files in root will be in both patterns)
-        files_to_test = sorted(set(files_to_test))
         
         if len(files_to_test) == 0:
             pytest.skip("No .raw or .dat files in samples directory")
@@ -278,11 +289,8 @@ def test_extract_all_sample_files_with_data(self):
             pytest.skip(f"Samples directory not found: {SAMPLES_DIR}")
         
         # Get all .raw and .dat files recursively
-        files_to_test = sorted(list(SAMPLES_DIR.glob("*.raw")) + 
-                               list(SAMPLES_DIR.glob("*.dat")) +
-                               list(SAMPLES_DIR.glob("**/*.raw")) + 
+        files_to_test = sorted(list(SAMPLES_DIR.glob("**/*.raw")) + 
                                list(SAMPLES_DIR.glob("**/*.dat")))
-        files_to_test = sorted(set(files_to_test))
         
         if len(files_to_test) == 0:
             pytest.skip("No .raw or .dat files in samples directory")

From 4cee020a365f396f305d02dba8ef3eea88fc67a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <janole@goedeke.de>
Date: Mon, 15 Dec 2025 22:29:28 +0100
Subject: [PATCH 04/12] Refactor IMC library for improved data handling and
 performance

- Modified `component_group` and `channel` constructors to accept raw buffer pointers instead of vectors.
- Enhanced `load_all_data` and `init_metadata` methods for better data initialization and loading.
- Implemented `read_chunk` method in `channel` to facilitate chunked data reading with support for raw and scaled modes.
- Updated `convert_data_to_type` and `convert_chunk_to_double` functions to handle raw data more efficiently.
- Removed redundant `imc_result.hpp` file to streamline the codebase.
- Adjusted Python bindings in `imctermite.pyx` to manage C++ instance memory correctly.
---
 lib/imc_block.hpp      |  22 ++--
 lib/imc_buffer.hpp     | 127 +++++++++++++++++++++
 lib/imc_channel.hpp    | 246 +++++++++++++++++++++++++++++++++--------
 lib/imc_conversion.hpp |  61 +++++++++-
 lib/imc_object.hpp     |  38 +++----
 lib/imc_raw.hpp        | 194 ++++++--------------------------
 lib/imc_result.hpp     |  30 -----
 python/imctermite.pyx  |  10 +-
 8 files changed, 456 insertions(+), 272 deletions(-)
 create mode 100644 lib/imc_buffer.hpp
 delete mode 100644 lib/imc_result.hpp

diff --git a/lib/imc_block.hpp b/lib/imc_block.hpp
index 332a3b6..71b9d58 100644
--- a/lib/imc_block.hpp
+++ b/lib/imc_block.hpp
@@ -34,7 +34,8 @@ namespace imc
 
     // name and buffer of associated raw file
     std::string raw_file_;
-    const std::vector<unsigned char>* buffer_;
+    const unsigned char* buffer_;
+    size_t buffer_size_;
 
     // offset of first/last byte of parameters in block (separated by ch_sep_)
     // w.r.t. to first byte of block (=0)
@@ -44,7 +45,7 @@ namespace imc
 
     // constructor
     block(key thekey, unsigned long int begin, unsigned long int end,
-                      std::string raw_file, const std::vector<unsigned char>* buffer):
+                      std::string raw_file, const unsigned char* buffer, size_t buffer_size):
       thekey_(thekey), uuid_(std::to_string(begin))
     {
       if ( !imc::check_key(thekey) ) throw std::logic_error("unknown key");
@@ -56,14 +57,15 @@ namespace imc
       }
       raw_file_ = raw_file;
       buffer_ = buffer;
+      buffer_size_ = buffer_size;
 
       // make sure "end_" does not exceed buffer size due to invalid "length" parameter of block
-      if ( end_ > buffer_->size() )
+      if ( end_ > buffer_size_ )
       {
         std::cout<<"WARNING: invalid length parameter in "<<thekey_.name_<<"-block "
-                 <<"(block-end:"<<end_<<",buffer-size:"<<buffer_->size()<<")"
+                 <<"(block-end:"<<end_<<",buffer-size:"<<buffer_size_<<")"
                  <<" => resetting block-end to buffer-size\n";
-        end_ = (unsigned long int)(buffer_->size());
+        end_ = (unsigned long int)(buffer_size_);
       }
 
       try {
@@ -86,7 +88,7 @@ namespace imc
       for ( unsigned long int b = begin_;
         b < end_ && ( ! (thekey_.name_== "CS") || count < 4 ); b++ )
       {
-        if ( buffer_->at(b) == imc::ch_sep_ )
+        if ( buffer_[b] == imc::ch_sep_ )
         {
           // define range of parameter with first byte = ch_sep_
           parameters_.push_back(imc::parameter(b,b));
@@ -124,8 +126,8 @@ namespace imc
       {
         throw std::logic_error("inconsistent parameter offsets");
       }
-      std::vector<unsigned char> parambuff(buffer_->begin()+begin_+param.begin(),
-                                           buffer_->begin()+begin_+param.end());
+      std::vector<unsigned char> parambuff(buffer_+begin_+param.begin(),
+                                           buffer_+begin_+param.end());
       return parambuff;
     }
 
@@ -140,7 +142,7 @@ namespace imc
       std::string prm("");
       for ( unsigned long int i = param.begin()+1; i <= param.end(); i++ )
       {
-        prm.push_back( (char)((*buffer_)[i]) );
+        prm.push_back( (char)(buffer_[i]) );
       }
       return prm;
     }
@@ -163,7 +165,7 @@ namespace imc
         <<std::setw(width)<<std::left<<"begin:"<<begin_<<"\n"
         <<std::setw(width)<<std::left<<"end:"<<end_<<"\n"
         <<std::setw(width)<<std::left<<"rawfile:"<<raw_file_<<"\n"
-        <<std::setw(width)<<std::left<<"buffersize:"<<buffer_->size()<<"\n"
+        <<std::setw(width)<<std::left<<"buffersize:"<<buffer_size_<<"\n"
         <<std::setw(width)<<std::left<<"parameters:"<<prsstr<<"\n";
 
       return ss.str();
diff --git a/lib/imc_buffer.hpp b/lib/imc_buffer.hpp
new file mode 100644
index 0000000..d6708f7
--- /dev/null
+++ b/lib/imc_buffer.hpp
@@ -0,0 +1,127 @@
+#ifndef IMCBUFFER
+#define IMCBUFFER
+
+#include <string>
+#include <stdexcept>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <iostream>
+
+namespace imc
+{
+    class MemoryMappedFile
+    {
+    private:
+        const unsigned char* data_;
+        size_t size_;
+        int fd_;
+
+    public:
+        MemoryMappedFile() : data_(nullptr), size_(0), fd_(-1) {}
+
+        ~MemoryMappedFile()
+        {
+            close_file();
+        }
+
+        // Delete copy constructor and assignment operator to prevent double-free
+        MemoryMappedFile(const MemoryMappedFile&) = delete;
+        MemoryMappedFile& operator=(const MemoryMappedFile&) = delete;
+
+        // Implement move constructor
+        MemoryMappedFile(MemoryMappedFile&& other) noexcept
+            : data_(other.data_), size_(other.size_), fd_(other.fd_)
+        {
+            other.data_ = nullptr;
+            other.size_ = 0;
+            other.fd_ = -1;
+        }
+
+        // Implement move assignment operator
+        MemoryMappedFile& operator=(MemoryMappedFile&& other) noexcept
+        {
+            if (this != &other)
+            {
+                close_file();
+                data_ = other.data_;
+                size_ = other.size_;
+                fd_ = other.fd_;
+                other.data_ = nullptr;
+                other.size_ = 0;
+                other.fd_ = -1;
+            }
+            return *this;
+        }
+
+        void map(const std::string& filename)
+        {
+            close_file();
+
+            fd_ = open(filename.c_str(), O_RDONLY);
+            if (fd_ == -1)
+            {
+                throw std::runtime_error("Failed to open file: " + filename);
+            }
+
+            struct stat sb;
+            if (fstat(fd_, &sb) == -1)
+            {
+                close(fd_);
+                fd_ = -1;
+                throw std::runtime_error("Failed to get file size: " + filename);
+            }
+            size_ = sb.st_size;
+
+            if (size_ == 0)
+            {
+                data_ = nullptr;
+                return;
+            }
+
+            void* mapped = mmap(NULL, size_, PROT_READ, MAP_PRIVATE, fd_, 0);
+            if (mapped == MAP_FAILED)
+            {
+                close(fd_);
+                fd_ = -1;
+                size_ = 0;
+                throw std::runtime_error("Failed to mmap file: " + filename);
+            }
+
+            data_ = static_cast<const unsigned char*>(mapped);
+        }
+
+        void close_file()
+        {
+            if (data_)
+            {
+                munmap(const_cast<unsigned char*>(data_), size_);
+                data_ = nullptr;
+            }
+            if (fd_ != -1)
+            {
+                close(fd_);
+                fd_ = -1;
+            }
+            size_ = 0;
+        }
+
+        const unsigned char* data() const
+        {
+            return data_;
+        }
+
+        size_t size() const
+        {
+            return size_;
+        }
+
+        const unsigned char& operator[](size_t index) const
+        {
+            return data_[index];
+        }
+    };
+}
+
+#endif
diff --git a/lib/imc_channel.hpp b/lib/imc_channel.hpp
index 0a83f0a..c86e394 100644
--- a/lib/imc_channel.hpp
+++ b/lib/imc_channel.hpp
@@ -11,6 +11,7 @@
 #include <chrono>
 #include <ctime>
 #include <time.h>
+#include <cstring>
 #if defined(__linux__) || defined(__APPLE__)
 #include <iconv.h>
 #elif defined(__WIN32__) || defined(_WIN32)
@@ -21,6 +22,16 @@
 
 namespace imc
 {
+  struct channel_chunk {
+    std::vector<unsigned char> x_bytes;
+    std::vector<unsigned char> y_bytes;
+    unsigned long int start;
+    unsigned long int count;
+    bool has_x;
+    int x_type;
+    int y_type;
+  };
+
   struct component_env
   {
     std::string uuid_;
@@ -274,7 +285,7 @@ namespace imc
     component_env compenv_;
 
     // Constructor to parse the associated blocks
-    component_group(component_env &compenv, std::map<std::string, imc::block>* blocks, std::vector<unsigned char>* buffer)
+    component_group(component_env &compenv, std::map<std::string, imc::block>* blocks, const unsigned char* buffer)
         : compenv_(compenv)
     {
         if (blocks->count(compenv.CCuuid_) == 1)
@@ -311,7 +322,7 @@ namespace imc
     // associated environment of blocks and map of blocks
     channel_env chnenv_;
     std::map<std::string,imc::block>* blocks_;
-    std::vector<unsigned char>* buffer_;
+    const unsigned char* buffer_;
 
     imc::origin_data NO_;
     imc::language NL_;
@@ -348,6 +359,8 @@ namespace imc
     // range, factor and offset
     double xfactor_, yfactor_;
     double xoffset_, yoffset_;
+    
+    unsigned long int number_of_samples_ = 0;
 
     // group reference the channel belongs to
     unsigned long int group_index_;
@@ -355,7 +368,7 @@ namespace imc
 
     // constructor takes channel's block environment
     channel(channel_env &chnenv, std::map<std::string,imc::block>* blocks,
-                                 std::vector<unsigned char>* buffer):
+                                 const unsigned char* buffer):
       chnenv_(chnenv), blocks_(blocks), buffer_(buffer),
       xfactor_(1.), yfactor_(1.), xoffset_(0.), yoffset_(0.),
       group_index_(-1)
@@ -475,15 +488,15 @@ namespace imc
       }
 
       // start converting binary buffer to imc::datatype
-      if ( !chnenv_.CSuuid_.empty() ) convert_buffer();
+      if ( !chnenv_.CSuuid_.empty() ) init_metadata();
 
       // convert any non-UTF-8 codepage to UTF-8 and cleanse any text
       convert_encoding();
       cleanse_text();
     }
 
-    // convert buffer to actual datatype
-    void convert_buffer()
+    // initialize metadata without loading data
+    void init_metadata()
     {
       std::vector<imc::parameter> prms = blocks_->at(chnenv_.CSuuid_).get_parameters();
       if ( prms.size() < 4)
@@ -492,65 +505,203 @@ namespace imc
       }
 
       // extract (channel dependent) part of buffer
-      unsigned long int buffstrt = prms[3].begin();
-      std::vector<unsigned char> yCSbuffer( buffer_->begin()+buffstrt+ybuffer_offset_+1,
-                                           buffer_->begin()+buffstrt+ybuffer_offset_+ybuffer_size_+1 );
+      size_t yCSbuffer_size = ybuffer_size_;
 
       // determine number of values in buffer
-      unsigned long int ynum_values = (unsigned long int)(yCSbuffer.size()/(ysignbits_/8));
-      if ( ynum_values*(ysignbits_/8) != yCSbuffer.size() )
+      unsigned long int ynum_values = (unsigned long int)(yCSbuffer_size/(ysignbits_/8));
+      if ( ynum_values*(ysignbits_/8) != yCSbuffer_size )
       {
         throw std::runtime_error("CSbuffer and significant bits of y datatype don't match");
       }
-
+      
+      number_of_samples_ = ynum_values;
 
       if (dimension_ ==  1)
       {
-        // process y-data
-        process_data(ydata_, ynum_values, ydatatp_, yCSbuffer);
-
         // find appropriate precision for "xdata_" by means of "xstepwidth_"
         xprec_ = (xstepwidth_ > 0 ) ? (int)ceil(fabs(log10(xstepwidth_))) : 10;
-
-        // fill xdata_
-        for ( unsigned long int i = 0; i < ynum_values; i++ )
-        {
-          xdata_.push_back(xstart_+(double)i*xstepwidth_);
-        }
       }
       else if (dimension_ == 2)
       {
-        // process x- and y-data
-        std::vector<unsigned char> xCSbuffer( buffer_->begin()+buffstrt+xbuffer_offset_+1,
-                                            buffer_->begin()+buffstrt+xbuffer_offset_+xbuffer_size_+1 );
-
-        // determine number of values in buffer
-        unsigned long int xnum_values = (unsigned long int)(xCSbuffer.size()/(xsignbits_/8));
-        if ( xnum_values*(xsignbits_/8) != xCSbuffer.size() )
-        {
-          throw std::runtime_error("CSbuffer and significant bits of x datatype don't match");
-        }
+        // const unsigned char* xCSbuffer = buffer_ + buffstrt + xbuffer_offset_ + 1;
+        size_t xCSbuffer_size = xbuffer_size_;
+        unsigned long int xnum_values = (unsigned long int)(xCSbuffer_size/(xsignbits_/8));
+        
         if ( xnum_values != ynum_values )
         {
           throw std::runtime_error("x and y data have different number of values");
         }
-
         xprec_ = 9;
-
-        process_data(xdata_, xnum_values, xdatatp_, xCSbuffer);
-        process_data(ydata_, ynum_values, ydatatp_, yCSbuffer);
       }
       else
       {
         throw std::runtime_error("unsupported dimension");
       }
+    }
+
+    // convert buffer to actual datatype (loads all data)
+    void load_all_data()
+    {
+      std::vector<imc::parameter> prms = blocks_->at(chnenv_.CSuuid_).get_parameters();
+      unsigned long int buffstrt = prms[3].begin();
+      const unsigned char* yCSbuffer = buffer_ + buffstrt + ybuffer_offset_ + 1;
+      size_t yCSbuffer_size = ybuffer_size_;
+      unsigned long int ynum_values = number_of_samples_;
+
+      if (dimension_ ==  1)
+      {
+        process_data(ydata_, ynum_values, ydatatp_, yCSbuffer, yCSbuffer_size);
+        for ( unsigned long int i = 0; i < ynum_values; i++ )
+        {
+          xdata_.push_back(xstart_+(double)i*xstepwidth_);
+        }
+      }
+      else if (dimension_ == 2)
+      {
+        const unsigned char* xCSbuffer = buffer_ + buffstrt + xbuffer_offset_ + 1;
+        size_t xCSbuffer_size = xbuffer_size_;
+        process_data(xdata_, ynum_values, xdatatp_, xCSbuffer, xCSbuffer_size);
+        process_data(ydata_, ynum_values, ydatatp_, yCSbuffer, yCSbuffer_size);
+      }
 
       transformData(xdata_, xfactor_, xoffset_);
       transformData(ydata_, yfactor_, yoffset_);
     }
 
+    channel_chunk read_chunk(unsigned long int start, unsigned long int count, bool include_x, bool raw_mode)
+    {
+        unsigned long int total_len = number_of_samples_;
+
+        if ( start >= total_len )
+        {
+            return { {}, {}, start, 0, include_x, 0, 0 };
+        }
+
+        unsigned long int end = start + count;
+        if ( end > total_len ) end = total_len;
+        unsigned long int actual_count = end - start;
+
+        channel_chunk chunk;
+        chunk.start = start;
+        chunk.count = actual_count;
+        chunk.has_x = include_x;
+        chunk.x_type = 0;
+        chunk.y_type = 0;
+        
+        std::vector<imc::parameter> prms = blocks_->at(chnenv_.CSuuid_).get_parameters();
+        unsigned long int buffstrt = prms[3].begin();
+
+        // Handle Y data
+        if (raw_mode) {
+            int type = (int)ydatatp_;
+            unsigned long int bytes_per_sample = ysignbits_ / 8;
+            unsigned long int abs_start = buffstrt + ybuffer_offset_ + 1 + start * bytes_per_sample;
+            unsigned long int byte_count = actual_count * bytes_per_sample;
+            
+            if (type == 13) { // six_byte_unsigned_long -> promote to 8 byte (uint64)
+                chunk.y_type = 13;
+                chunk.y_bytes.resize(actual_count * 8);
+                uint64_t* dest = reinterpret_cast<uint64_t*>(chunk.y_bytes.data());
+                for (unsigned long int i = 0; i < actual_count; ++i) {
+                    unsigned long int src_idx = abs_start + i * 6;
+                    uint64_t val = 0;
+                    for (int b = 0; b < 6; ++b) val |= (uint64_t)buffer_[src_idx + b] << (b * 8);
+                    dest[i] = val;
+                }
+            } else {
+                chunk.y_type = type;
+                chunk.y_bytes.resize(byte_count);
+                std::copy(buffer_ + abs_start, buffer_ + abs_start + byte_count, chunk.y_bytes.begin());
+            }
+        } else {
+            // Scaled mode: convert to double
+            chunk.y_type = 8; // imc::numtype::ddouble
+            chunk.y_bytes.resize(actual_count * sizeof(double));
+            std::vector<double> temp_data;
+            
+            unsigned long int abs_start = buffstrt + ybuffer_offset_ + 1; // Base start
+            
+            switch (ydatatp_) {
+                case numtype::unsigned_byte: imc::convert_chunk_to_double<imc_Ubyte>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::signed_byte: imc::convert_chunk_to_double<imc_Sbyte>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::unsigned_short: imc::convert_chunk_to_double<imc_Ushort>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::signed_short: imc::convert_chunk_to_double<imc_Sshort>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::unsigned_long: imc::convert_chunk_to_double<imc_Ulongint>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::signed_long: imc::convert_chunk_to_double<imc_Slongint>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::ffloat: imc::convert_chunk_to_double<imc_float>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::ddouble: imc::convert_chunk_to_double<imc_double>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::two_byte_word_digital: imc::convert_chunk_to_double<imc_digital>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::eight_byte_unsigned_long: imc::convert_chunk_to_double<uint64_t>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::six_byte_unsigned_long: imc::convert_chunk_to_double<imc_sixbyte>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                case numtype::eight_byte_signed_long: imc::convert_chunk_to_double<int64_t>(buffer_ + abs_start, start, actual_count, yfactor_, yoffset_, temp_data); break;
+                default: throw std::runtime_error("Unsupported type for scaled chunk reading (Y): " + std::to_string(ydatatp_));
+            }
+            
+            memcpy(chunk.y_bytes.data(), temp_data.data(), temp_data.size() * sizeof(double));
+        }
+
+        // Handle X data
+        if (include_x) {
+            if (dimension_ == 2 && raw_mode) {
+                int type = (int)xdatatp_;
+                unsigned long int bytes_per_sample = xsignbits_ / 8;
+                unsigned long int abs_start = buffstrt + xbuffer_offset_ + 1 + start * bytes_per_sample;
+                unsigned long int byte_count = actual_count * bytes_per_sample;
+                
+                if (type == 13) {
+                    chunk.x_type = 13;
+                    chunk.x_bytes.resize(actual_count * 8);
+                    uint64_t* dest = reinterpret_cast<uint64_t*>(chunk.x_bytes.data());
+                    for (unsigned long int i = 0; i < actual_count; ++i) {
+                        unsigned long int src_idx = abs_start + i * 6;
+                        uint64_t val = 0;
+                        for (int b = 0; b < 6; ++b) val |= (uint64_t)buffer_[src_idx + b] << (b * 8);
+                        dest[i] = val;
+                    }
+                } else {
+                    chunk.x_type = type;
+                    chunk.x_bytes.resize(byte_count);
+                    std::copy(buffer_ + abs_start, buffer_ + abs_start + byte_count, chunk.x_bytes.begin());
+                }
+            } else {
+                // Generated X or scaled X
+                chunk.x_type = 8; // imc::numtype::ddouble
+                chunk.x_bytes.resize(actual_count * sizeof(double));
+                double* ptr = reinterpret_cast<double*>(chunk.x_bytes.data());
+                
+                if (dimension_ == 2) {
+                     // Read X from file and scale
+                     std::vector<double> temp_data;
+                     unsigned long int abs_start = buffstrt + xbuffer_offset_ + 1;
+                     switch (xdatatp_) {
+                        case numtype::unsigned_byte: imc::convert_chunk_to_double<imc_Ubyte>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::signed_byte: imc::convert_chunk_to_double<imc_Sbyte>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::unsigned_short: imc::convert_chunk_to_double<imc_Ushort>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::signed_short: imc::convert_chunk_to_double<imc_Sshort>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::unsigned_long: imc::convert_chunk_to_double<imc_Ulongint>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::signed_long: imc::convert_chunk_to_double<imc_Slongint>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::ffloat: imc::convert_chunk_to_double<imc_float>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::ddouble: imc::convert_chunk_to_double<imc_double>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::two_byte_word_digital: imc::convert_chunk_to_double<imc_digital>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::eight_byte_unsigned_long: imc::convert_chunk_to_double<uint64_t>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::six_byte_unsigned_long: imc::convert_chunk_to_double<imc_sixbyte>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        case numtype::eight_byte_signed_long: imc::convert_chunk_to_double<int64_t>(buffer_ + abs_start, start, actual_count, xfactor_, xoffset_, temp_data); break;
+                        default: throw std::runtime_error("Unsupported type for scaled chunk reading (X): " + std::to_string(xdatatp_));
+                    }
+                    memcpy(ptr, temp_data.data(), temp_data.size() * sizeof(double));
+                } else {
+                    // Generated X
+                    for (unsigned long int i = 0; i < actual_count; ++i) {
+                        ptr[i] = xstart_ + (double)(start + i) * xstepwidth_;
+                    }
+                }
+            }
+        }
+        return chunk;
+    }
+
     // handle data type conversion
-    void process_data(std::vector<imc::datatype>& data_, size_t num_values, numtype datatp_, std::vector<unsigned char>& CSbuffer)
+    void process_data(std::vector<imc::datatype>& data_, size_t num_values, numtype datatp_, const unsigned char* CSbuffer, size_t CSbuffer_size)
     {
       // adjust size of data
       data_.resize(num_values);
@@ -559,34 +710,34 @@ namespace imc
       switch (datatp_)
       {
           case numtype::unsigned_byte:
-              imc::convert_data_to_type<imc_Ubyte>(CSbuffer, data_);
+              imc::convert_data_to_type<imc_Ubyte>(CSbuffer, CSbuffer_size, data_);
               break;
           case numtype::signed_byte:
-              imc::convert_data_to_type<imc_Sbyte>(CSbuffer, data_);
+              imc::convert_data_to_type<imc_Sbyte>(CSbuffer, CSbuffer_size, data_);
               break;
           case numtype::unsigned_short:
-              imc::convert_data_to_type<imc_Ushort>(CSbuffer, data_);
+              imc::convert_data_to_type<imc_Ushort>(CSbuffer, CSbuffer_size, data_);
               break;
           case numtype::signed_short:
-              imc::convert_data_to_type<imc_Sshort>(CSbuffer, data_);
+              imc::convert_data_to_type<imc_Sshort>(CSbuffer, CSbuffer_size, data_);
               break;
           case numtype::unsigned_long:
-              imc::convert_data_to_type<imc_Ulongint>(CSbuffer, data_);
+              imc::convert_data_to_type<imc_Ulongint>(CSbuffer, CSbuffer_size, data_);
               break;
           case numtype::signed_long:
-              imc::convert_data_to_type<imc_Slongint>(CSbuffer, data_);
+              imc::convert_data_to_type<imc_Slongint>(CSbuffer, CSbuffer_size, data_);
               break;
           case numtype::ffloat:
-              imc::convert_data_to_type<imc_float>(CSbuffer, data_);
+              imc::convert_data_to_type<imc_float>(CSbuffer, CSbuffer_size, data_);
               break;
           case numtype::ddouble:
-              imc::convert_data_to_type<imc_double>(CSbuffer, data_);
+              imc::convert_data_to_type<imc_double>(CSbuffer, CSbuffer_size, data_);
               break;
           case numtype::two_byte_word_digital:
-              imc::convert_data_to_type<imc_digital>(CSbuffer, data_);
+              imc::convert_data_to_type<imc_digital>(CSbuffer, CSbuffer_size, data_);
               break;
           case numtype::six_byte_unsigned_long:
-              imc::convert_data_to_type<imc_sixbyte>(CSbuffer, data_);
+              imc::convert_data_to_type<imc_sixbyte>(CSbuffer, CSbuffer_size, data_);
               break;
           default:
               throw std::runtime_error(std::string("unsupported/unknown datatype ") + std::to_string(datatp_));
@@ -699,6 +850,9 @@ namespace imc
     // provide JSON string of metadata
     std::string get_json(bool include_data = false)
     {
+      if (include_data && ydata_.empty() && number_of_samples_ > 0) {
+          load_all_data();
+      }
       // prepare printable trigger-time
       std::time_t tt = std::chrono::system_clock::to_time_t(trigger_time_);
       std::time_t att = std::chrono::system_clock::to_time_t(absolute_trigger_time_);
diff --git a/lib/imc_conversion.hpp b/lib/imc_conversion.hpp
index 9cdb71e..dac3a38 100644
--- a/lib/imc_conversion.hpp
+++ b/lib/imc_conversion.hpp
@@ -11,14 +11,14 @@ namespace imc
 {
   // convert raw data in buffer into specific datatype
   template<typename datatype>
-  void convert_data_to_type(std::vector<unsigned char>& subbuffer,
+  void convert_data_to_type(const unsigned char* subbuffer, size_t subbuffer_size,
                             std::vector<imc::datatype>& channel)
   {
     // check number of elements of type "datatype" in buffer
-    if ( subbuffer.size() != channel.size()*sizeof(datatype) )
+    if ( subbuffer_size != channel.size()*sizeof(datatype) )
     {
       throw std::runtime_error( std::string("size mismatch between subbuffer (")
-                              + std::to_string(subbuffer.size())
+                              + std::to_string(subbuffer_size)
                               + std::string(") and datatype (")
                               + std::to_string(channel.size()) + std::string("*")
                               + std::to_string(sizeof(datatype)) + std::string(")") );
@@ -44,6 +44,61 @@ namespace imc
     // for ( auto el: channel ) std::cout<<el<<"\n";
   }
 
+  // convert raw chunk to double with scaling
+  template<typename SourceType>
+  void convert_chunk_to_double(const unsigned char* buffer, size_t start_index, size_t count,
+                               double factor, double offset, std::vector<double>& out)
+  {
+      size_t type_size = sizeof(SourceType);
+      const unsigned char* start_ptr = buffer + start_index * type_size;
+      
+      out.resize(count);
+      
+      for (size_t i = 0; i < count; ++i) {
+          SourceType val;
+          
+          const unsigned char* val_ptr = start_ptr + i * type_size;
+          uint8_t* dest_ptr = reinterpret_cast<uint8_t*>(&val);
+          for(size_t j=0; j<type_size; ++j) {
+              dest_ptr[j] = val_ptr[j];
+          }
+          
+          // Convert to double and scale
+          double dval = static_cast<double>(val);
+          if (factor != 1.0 || offset != 0.0) {
+              double fact = (factor == 0.0) ? 1.0 : factor;
+              dval = dval * fact + offset;
+          }
+          out[i] = dval;
+      }
+  }
+
+  // Specialization for imc_sixbyte
+  template<>
+  inline void convert_chunk_to_double<imc_sixbyte>(const unsigned char* buffer, size_t start_index, size_t count,
+                               double factor, double offset, std::vector<double>& out)
+  {
+      size_t type_size = 6;
+      const unsigned char* start_ptr = buffer + start_index * type_size;
+      
+      out.resize(count);
+      
+      for (size_t i = 0; i < count; ++i) {
+          const unsigned char* val_ptr = start_ptr + i * type_size;
+          uint64_t val = 0;
+          for(int j=0; j<6; ++j) {
+              val |= (uint64_t)val_ptr[j] << (j*8);
+          }
+          
+          double dval = static_cast<double>(val);
+          if (factor != 1.0 || offset != 0.0) {
+              double fact = (factor == 0.0) ? 1.0 : factor;
+              dval = dval * fact + offset;
+          }
+          out[i] = dval;
+      }
+  }
+
 }
 
 #endif
diff --git a/lib/imc_object.hpp b/lib/imc_object.hpp
index 1fc1a44..6090a18 100644
--- a/lib/imc_object.hpp
+++ b/lib/imc_object.hpp
@@ -12,12 +12,12 @@
 namespace imc
 {
   // obtain specific parameters as string
-  std::string get_parameter(const std::vector<unsigned char>* buffer, const imc::parameter* param)
+  std::string get_parameter(const unsigned char* buffer, const imc::parameter* param)
   {
     std::string prm("");
     for ( unsigned long int i = param->begin()+1; i <= param->end(); i++ )
     {
-      prm.push_back((char)(*buffer)[i]);
+      prm.push_back((char)buffer[i]);
     }
     return prm;
   }
@@ -29,7 +29,7 @@ namespace imc
     int processor_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 3 ) throw std::runtime_error("invalid number of parameters in CF");
       fileformat_ = std::stoi(get_parameter(buffer,&parameters[0]));
@@ -56,7 +56,7 @@ namespace imc
     bool closed_;  // corresponds to true = 1 and false = 0 in file
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 2 ) throw std::runtime_error("invalid number of parameters in CK");
       version_ = std::stoi(get_parameter(buffer,&parameters[0]));
@@ -83,7 +83,7 @@ namespace imc
     std::string comment_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 7 ) throw std::runtime_error("invalid number of parameters in CB");
       group_index_ = std::stoul(get_parameter(buffer,&parameters[2]));
@@ -111,7 +111,7 @@ namespace imc
     std::string comment_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 9 ) throw std::runtime_error("invalid number of parameters in CT");
       group_index_ = std::stoul(get_parameter(buffer,&parameters[2]));
@@ -149,7 +149,7 @@ namespace imc
     int dimension_; // corresponding to fieldtype \in {1,}
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 5 ) throw std::runtime_error("invalid number of parameters in CG");
       number_components_ = std::stoul(get_parameter(buffer,&parameters[2]));
@@ -176,7 +176,7 @@ namespace imc
     std::string unit_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 6 ) throw std::runtime_error("invalid number of parameters in CD1");
       dx_ = std::stod(get_parameter(buffer,&parameters[2]));
@@ -208,7 +208,7 @@ namespace imc
     int pretriggerapp_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 11 ) throw std::runtime_error("invalid number of parameters in CD2");
       dx_ = std::stod(get_parameter(buffer,&parameters[2]));
@@ -244,7 +244,7 @@ namespace imc
     bool analog_digital_; // 1 => false (analog), 2 => true (digital)
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 4 ) throw std::runtime_error("invalid number of parameters in CC");
       component_index_ = std::stoi(get_parameter(buffer,&parameters[2]));
@@ -291,7 +291,7 @@ namespace imc
     unsigned long int distance_bytes_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 10 ) throw std::runtime_error("invalid number of parameters in CP");
       buffer_reference_ = std::stoi(get_parameter(buffer,&parameters[2]));
@@ -337,7 +337,7 @@ namespace imc
     // bool new_event_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 13 ) throw std::runtime_error("invalid number of parameters in Cb");
       number_buffers_ = std::stoul(get_parameter(buffer,&parameters[2]));
@@ -379,7 +379,7 @@ namespace imc
     std::string unit_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 8 ) throw std::runtime_error("invalid number of parameters in CR");
       transform_ = (get_parameter(buffer,&parameters[2]) == std::string("1"));
@@ -411,7 +411,7 @@ namespace imc
     std::string comment_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 9 ) throw std::runtime_error("invalid number of parameters in CN");
       group_index_ = std::stoul(get_parameter(buffer,&parameters[2]));
@@ -440,7 +440,7 @@ namespace imc
     // unsigned long int begin_buffer_, end_buffer_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 4 ) throw std::runtime_error("invalid number of parameters in CS");
       index_ = std::stoul(get_parameter(buffer,&parameters[2]));
@@ -464,7 +464,7 @@ namespace imc
     std::string language_code_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if (parameters.size() < 4) throw std::runtime_error("invalid number of parameters in NL");
       codepage_ = get_parameter(buffer, &parameters[2]);
@@ -480,7 +480,7 @@ namespace imc
     std::string comment_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 7 ) throw std::runtime_error("invalid number of parameters in NO");
       origin_ = ( get_parameter(buffer,&parameters[2]) == std::string("1") );
@@ -506,7 +506,7 @@ namespace imc
     double trigger_time_frac_secs_;
 
     // construct members by parsing particular parameters from buffer
-    void parse(const std::vector<unsigned char>* buffer, const std::vector<parameter>& parameters)
+    void parse(const unsigned char* buffer, const std::vector<parameter>& parameters)
     {
       if ( parameters.size() < 8 ) throw std::runtime_error("invalid number of parameters in NT1");
       tms_ = std::tm();
@@ -560,7 +560,7 @@ namespace imc {
 
     rawobject(): objidx_(-1) { }
 
-    void parse(imc::key key, const std::vector<unsigned char>* buffer,
+    void parse(imc::key key, const unsigned char* buffer,
                              const std::vector<parameter>& parameters)
     {
       if ( key.name_ == std::string("CF") )
diff --git a/lib/imc_raw.hpp b/lib/imc_raw.hpp
index 77804db..47fc011 100644
--- a/lib/imc_raw.hpp
+++ b/lib/imc_raw.hpp
@@ -7,35 +7,24 @@
 #include <filesystem>
 #include <iostream>
 
-// #include "hexshow.hpp"
+#include "imc_buffer.hpp"
 #include "imc_key.hpp"
 #include "imc_block.hpp"
 #include "imc_datatype.hpp"
 #include "imc_object.hpp"
-#include "imc_result.hpp"
 #include "imc_channel.hpp"
 
 //---------------------------------------------------------------------------//
 
 namespace imc
 {
-  struct channel_chunk {
-    std::vector<unsigned char> x_bytes;
-    std::vector<unsigned char> y_bytes;
-    unsigned long int start;
-    unsigned long int count;
-    bool has_x;
-    int x_type;
-    int y_type;
-  };
-
   class raw
   {
     // (path of) raw-file and its basename
     std::string raw_file_, file_name_;
 
     // buffer of raw-file
-    std::vector<unsigned char> buffer_;
+    imc::MemoryMappedFile buffer_;
 
     // list and map of imc-blocks
     std::vector<imc::block> rawblocks_;
@@ -53,6 +42,12 @@ namespace imc
     raw() { };
     raw(std::string raw_file): raw_file_(raw_file) { set_file(raw_file); };
 
+    // Delete copy and move operations because of self-referential pointers in channels_
+    raw(const raw&) = delete;
+    raw& operator=(const raw&) = delete;
+    raw(raw&&) = delete;
+    raw& operator=(raw&&) = delete;
+
     // provide new raw-file
     void set_file(std::string raw_file)
     {
@@ -68,16 +63,9 @@ namespace imc
     // open file and stream data into buffer
     void fill_buffer()
     {
-      buffer_.clear();
-
       // open file and put data in buffer
       try {
-        std::ifstream fin(raw_file_.c_str(),std::ifstream::binary);
-        if ( !fin.good() ) throw std::runtime_error("failed to open file");
-        std::vector<unsigned char> buffer((std::istreambuf_iterator<char>(fin)),
-                                          (std::istreambuf_iterator<char>()));
-        buffer_ = buffer;
-        fin.close();
+        buffer_.map(raw_file_);
       } catch ( const std::exception& e ) {
         throw std::runtime_error(
           std::string("failed to open raw-file and stream data in buffer: ") + e.what()
@@ -93,31 +81,33 @@ namespace imc
       // reset counter to identify computational complexity
       cplxcnt_ = 0;
 
+      const unsigned char* data = buffer_.data();
+      size_t size = buffer_.size();
+
       // start parsing raw-blocks in buffer
-      for ( std::vector<unsigned char>::iterator it=buffer_.begin();
-                                                 it!=buffer_.end(); ++it )
+      for ( unsigned long int i = 0; i < size; ++i )
       {
         cplxcnt_++;
 
         // check for "magic byte"
-        if ( *it == ch_bgn_ )
+        if ( data[i] == ch_bgn_ )
         {
           // check for (non)critical key
-          if ( *(it+1) == imc::key_crit_ || *(it+1) == imc::key_non_crit_ )
+          if ( data[i+1] == imc::key_crit_ || data[i+1] == imc::key_non_crit_ )
           {
             // compose (entire) key
-            std::string newkey = { (char)*(it+1), (char)*(it+2) };
-            imc::key itkey(*(it+1) == imc::key_crit_,newkey);
+            std::string newkey = { (char)data[i+1], (char)data[i+2] };
+            imc::key itkey(data[i+1] == imc::key_crit_,newkey);
 
             // expecting ch_sep_ after key
-            if ( *(it+3) == ch_sep_ )
+            if ( data[i+3] == ch_sep_ )
             {
               // extract key version
               std::string vers("");
               unsigned long int pos = 4;
-              while ( *(it+pos) != ch_sep_ )
+              while ( data[i+pos] != ch_sep_ )
               {
-                vers.push_back((char)*(it+pos));
+                vers.push_back((char)data[i+pos]);
                 pos++;
               }
               int version = std::stoi(vers);
@@ -132,9 +122,9 @@ namespace imc
                 // get block length
                 std::string leng("");
                 pos++;
-                while ( *(it+pos) != ch_sep_ )
+                while ( data[i+pos] != ch_sep_ )
                 {
-                  leng.push_back((char)*(it+pos));
+                  leng.push_back((char)data[i+pos]);
                   pos++;
                 }
                 unsigned long int length = std::stoul(leng);
@@ -142,23 +132,23 @@ namespace imc
                 // declare and initialize corresponding key and block
                 // imc::key bkey( *(it+1)==imc::key_crit_ , newkey,
                 //                imc::keys.at(newkey).description_, version );
-                imc::block blk(itkey,(unsigned long int)(it-buffer_.begin()),
-                                     (unsigned long int)(it-buffer_.begin()+pos+1+length),
-                                     raw_file_, &buffer_);
+                imc::block blk(itkey,i,
+                                     i+pos+1+length,
+                                     raw_file_, data, size);
 
                 // add block to list
                 rawblocks_.push_back(blk);
 
                 // skip the remaining block according to its length
-                if ( (unsigned long int)(it-buffer_.begin()+length) < (unsigned long int)(buffer_.size()) )
+                if ( i+length < size )
                 {
-                  std::advance(it,length);
+                  i += length;
                 }
               }
               else
               {
                 // all critical must be known !! while a noncritical may be ignored
-                if ( *(it+1) == imc::key_crit_ )
+                if ( data[i+1] == imc::key_crit_ )
                 {
                   throw std::runtime_error(
                     std::string("unknown critical key: ") + newkey + std::to_string(version)
@@ -175,7 +165,7 @@ namespace imc
             {
               throw std::runtime_error(
                   std::string("invalid block or corrupt buffer at byte: ")
-                + std::to_string(it+3-buffer_.begin())
+                + std::to_string(i+3)
               );
             }
           }
@@ -242,7 +232,7 @@ namespace imc
           // a new component group is started
           // TODO: can we avoid to parse the whole component here?
           imc::component component;
-          component.parse(&buffer_, blk.get_parameters());
+          component.parse(buffer_.data(), blk.get_parameters());
           if ( component.component_index_ == 1 ) compenv_ptr = &chnenv.compenv1_;
           else if ( component.component_index_ == 2 ) compenv_ptr = &chnenv.compenv2_;
           else throw std::runtime_error("invalid component index in CC block");
@@ -293,7 +283,7 @@ namespace imc
 
             // create channel object and add it to the map of channels
             channels_.insert( std::pair<std::string,imc::channel>
-              (chnenv.CNuuid_,imc::channel(chnenv,&mapblocks_,&buffer_))
+              (chnenv.CNuuid_,imc::channel(chnenv,&mapblocks_,buffer_.data()))
             );
 
             // reset channel uuid
@@ -408,7 +398,7 @@ namespace imc
     {
       if ( channels_.count(uuid) )
       {
-        return (unsigned long int)channels_.at(uuid).ydata_.size();
+        return channels_.at(uuid).number_of_samples_;
       }
       else
       {
@@ -437,125 +427,7 @@ namespace imc
         throw std::runtime_error(std::string("channel does not exist:") + uuid);
       }
 
-      imc::channel& ch = channels_.at(uuid);
-      unsigned long int total_len = ch.ydata_.size();
-
-      if ( start >= total_len )
-      {
-         return { {}, {}, start, 0, include_x, 0, 0 };
-      }
-
-      unsigned long int end = start + count;
-      if ( end > total_len ) end = total_len;
-      unsigned long int actual_count = end - start;
-
-      channel_chunk chunk;
-      chunk.start = start;
-      chunk.count = actual_count;
-      chunk.has_x = include_x;
-
-      // Handle Y data
-      if (raw_mode) {
-          // Raw mode: read bytes directly from buffer
-          int type = (int)ch.ydatatp_;
-          unsigned long int bytes_per_sample = ch.ysignbits_ / 8;
-          
-          if (mapblocks_.count(ch.chnenv_.CSuuid_) == 0) {
-              throw std::runtime_error("CS block not found for channel");
-          }
-          imc::block& cs_block = mapblocks_.at(ch.chnenv_.CSuuid_);
-          std::vector<imc::parameter> prms = cs_block.get_parameters();
-          if (prms.size() < 4) throw std::runtime_error("Invalid CS block parameters");
-          unsigned long int buffstrt = prms[3].begin();
-          
-          unsigned long int abs_start = buffstrt + ch.ybuffer_offset_ + 1 + start * bytes_per_sample;
-          unsigned long int byte_count = actual_count * bytes_per_sample;
-          
-          if (abs_start + byte_count > buffer_.size()) {
-               throw std::runtime_error("Buffer read out of bounds");
-          }
-          
-          if (type == 13) { // six_byte_unsigned_long -> promote to 8 byte (uint64)
-              chunk.y_type = 13; // Keep original type ID, but data is promoted
-              chunk.y_bytes.resize(actual_count * 8);
-              uint64_t* dest = reinterpret_cast<uint64_t*>(chunk.y_bytes.data());
-              
-              for (unsigned long int i = 0; i < actual_count; ++i) {
-                  unsigned long int src_idx = abs_start + i * 6;
-                  uint64_t val = 0;
-                  // Assuming Little Endian storage in file
-                  for (int b = 0; b < 6; ++b) {
-                      val |= (uint64_t)buffer_[src_idx + b] << (b * 8);
-                  }
-                  dest[i] = val;
-              }
-          } else {
-              chunk.y_type = type;
-              chunk.y_bytes.resize(byte_count);
-              std::copy(buffer_.begin() + abs_start, buffer_.begin() + abs_start + byte_count, chunk.y_bytes.begin());
-          }
-      } else {
-          // Scaled mode: convert to double
-          chunk.y_type = 8; // imc::numtype::ddouble
-          chunk.y_bytes.resize(actual_count * sizeof(double));
-          double* ptr = reinterpret_cast<double*>(chunk.y_bytes.data());
-          
-          for (unsigned long int i = 0; i < actual_count; ++i) {
-              ptr[i] = ch.ydata_[start + i].as_double();
-          }
-      }
-
-      // Handle X data
-      if (include_x) {
-          if (ch.dimension_ == 2 && raw_mode) {
-              // XY channel, raw mode
-              int type = (int)ch.xdatatp_;
-              unsigned long int bytes_per_sample = ch.xsignbits_ / 8;
-              
-              imc::block& cs_block = mapblocks_.at(ch.chnenv_.CSuuid_);
-              std::vector<imc::parameter> prms = cs_block.get_parameters();
-              unsigned long int buffstrt = prms[3].begin();
-              
-              unsigned long int abs_start = buffstrt + ch.xbuffer_offset_ + 1 + start * bytes_per_sample;
-              unsigned long int byte_count = actual_count * bytes_per_sample;
-              
-              if (abs_start + byte_count > buffer_.size()) {
-                   throw std::runtime_error("Buffer read out of bounds (X)");
-              }
-              
-              if (type == 13) { // six_byte_unsigned_long -> promote to 8 byte
-                  chunk.x_type = 13; // Keep original type ID
-                  chunk.x_bytes.resize(actual_count * 8);
-                  uint64_t* dest = reinterpret_cast<uint64_t*>(chunk.x_bytes.data());
-                  for (unsigned long int i = 0; i < actual_count; ++i) {
-                      unsigned long int src_idx = abs_start + i * 6;
-                      uint64_t val = 0;
-                      for (int b = 0; b < 6; ++b) {
-                          val |= (uint64_t)buffer_[src_idx + b] << (b * 8);
-                      }
-                      dest[i] = val;
-                  }
-              } else {
-                  chunk.x_type = type;
-                  chunk.x_bytes.resize(byte_count);
-                  std::copy(buffer_.begin() + abs_start, buffer_.begin() + abs_start + byte_count, chunk.x_bytes.begin());
-              }
-          } else {
-              // Generated X or scaled X
-              chunk.x_type = 8; // imc::numtype::ddouble
-              chunk.x_bytes.resize(actual_count * sizeof(double));
-              double* ptr = reinterpret_cast<double*>(chunk.x_bytes.data());
-              
-              for (unsigned long int i = 0; i < actual_count; ++i) {
-                  if (start + i < ch.xdata_.size())
-                      ptr[i] = ch.xdata_[start + i].as_double();
-                  else
-                      ptr[i] = 0.0;
-              }
-          }
-      }
-
-      return chunk;
+      return channels_.at(uuid).read_chunk(start, count, include_x, raw_mode);
     }
 
     // print single specific channel
diff --git a/lib/imc_result.hpp b/lib/imc_result.hpp
deleted file mode 100644
index 1961ace..0000000
--- a/lib/imc_result.hpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//---------------------------------------------------------------------------//
-
-#ifndef IMCRESULT
-#define IMCRESULT
-
-#include "imc_datatype.hpp"
-
-//---------------------------------------------------------------------------//
-
-namespace imc
-{
-  struct channel_tab
-  {
-    std::string name_;
-
-    // abscissa
-    std::vector<double> xaxis_;
-    std::string xunit_;
-
-    // ordinate
-    // std::vector<imc::datatype> yaxis_;
-    std::vector<double> yaxis_;
-    std::string yunit_;
-  };
-
-}
-
-#endif
-
-//---------------------------------------------------------------------------//
diff --git a/python/imctermite.pyx b/python/imctermite.pyx
index 5618592..3bfc319 100644
--- a/python/imctermite.pyx
+++ b/python/imctermite.pyx
@@ -21,12 +21,16 @@ def get_codepage(chn) :
 
 cdef class imctermite:
 
-  # C++ instance of class => stack allocated (requires nullary constructor!)
-  cdef cppimctermite cppimc
+  # C++ instance of class
+  cdef cppimctermite* cppimc
 
   # constructor
   def __cinit__(self, string rawfile):
-    self.cppimc = cppimctermite(rawfile)
+    self.cppimc = new cppimctermite(rawfile)
+
+  def __dealloc__(self):
+    if self.cppimc != NULL:
+        del self.cppimc
 
   # provide raw file
   def submit_file(self,string rawfile):

From fdc8ecef57e5dcb2026a1818efe532b1113ab2e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <janole@goedeke.de>
Date: Mon, 15 Dec 2025 22:48:51 +0100
Subject: [PATCH 05/12] Add windows target to CI tests and add  tests for
 streaming functionality

- Update GitHub Actions workflow to support testing on multiple OS
- Refactor memory mapping in imc_buffer.hpp for Windows compatibility
- Improve makefile to handle .pyd files for Python builds
- Add comprehensive tests for streaming and chunking functionality in test_streaming.py
---
 .github/workflows/test.yml |  33 ++++++++---
 lib/imc_buffer.hpp         | 101 +++++++++++++++++++++++++++++++--
 makefile                   |   4 +-
 python/makefile            |   4 +-
 tests/test_streaming.py    | 112 +++++++++++++++++++++++++++++++++++++
 5 files changed, 236 insertions(+), 18 deletions(-)
 create mode 100644 tests/test_streaming.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ce243c4..7fa6520 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -2,20 +2,35 @@ name: Run Tests
 
 on:
   push:
-    branches: [ master ]
+    branches: [ master, numpy-streaming ]
   pull_request:
     branches: [ master ]
 
 jobs:
   test:
-    runs-on: ubuntu-latest
-    
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest]
+        python-version: ["3.10"]
+
     steps:
     - name: Checkout code
       uses: actions/checkout@v3
-    
-    - name: Build Docker image
-      run: docker build -t imctermite .
-    
-    - name: Run tests in container
-      run: docker run --rm imctermite make test
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pytest numpy cython setuptools wheel
+
+    # Cross-platform build and test using Makefile
+    # Requires bash shell on Windows (Git Bash)
+    - name: Build and Test
+      shell: bash
+      run: |
+        make test
diff --git a/lib/imc_buffer.hpp b/lib/imc_buffer.hpp
index d6708f7..80f68f6 100644
--- a/lib/imc_buffer.hpp
+++ b/lib/imc_buffer.hpp
@@ -3,12 +3,18 @@
 
 #include <string>
 #include <stdexcept>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
 #include <iostream>
 
+#if defined(_WIN32) || defined(_WIN64)
+    #define WIN32_LEAN_AND_MEAN
+    #include <windows.h>
+#else
+    #include <sys/mman.h>
+    #include <sys/stat.h>
+    #include <fcntl.h>
+    #include <unistd.h>
+#endif
+
 namespace imc
 {
     class MemoryMappedFile
@@ -16,10 +22,19 @@ namespace imc
     private:
         const unsigned char* data_;
         size_t size_;
+#if defined(_WIN32) || defined(_WIN64)
+        HANDLE hFile_;
+        HANDLE hMap_;
+#else
         int fd_;
+#endif
 
     public:
+#if defined(_WIN32) || defined(_WIN64)
+        MemoryMappedFile() : data_(nullptr), size_(0), hFile_(INVALID_HANDLE_VALUE), hMap_(NULL) {}
+#else
         MemoryMappedFile() : data_(nullptr), size_(0), fd_(-1) {}
+#endif
 
         ~MemoryMappedFile()
         {
@@ -32,12 +47,22 @@ namespace imc
 
         // Implement move constructor
         MemoryMappedFile(MemoryMappedFile&& other) noexcept
+#if defined(_WIN32) || defined(_WIN64)
+            : data_(other.data_), size_(other.size_), hFile_(other.hFile_), hMap_(other.hMap_)
+        {
+            other.data_ = nullptr;
+            other.size_ = 0;
+            other.hFile_ = INVALID_HANDLE_VALUE;
+            other.hMap_ = NULL;
+        }
+#else
             : data_(other.data_), size_(other.size_), fd_(other.fd_)
         {
             other.data_ = nullptr;
             other.size_ = 0;
             other.fd_ = -1;
         }
+#endif
 
         // Implement move assignment operator
         MemoryMappedFile& operator=(MemoryMappedFile&& other) noexcept
@@ -47,10 +72,17 @@ namespace imc
                 close_file();
                 data_ = other.data_;
                 size_ = other.size_;
+#if defined(_WIN32) || defined(_WIN64)
+                hFile_ = other.hFile_;
+                hMap_ = other.hMap_;
+                other.hFile_ = INVALID_HANDLE_VALUE;
+                other.hMap_ = NULL;
+#else
                 fd_ = other.fd_;
+                other.fd_ = -1;
+#endif
                 other.data_ = nullptr;
                 other.size_ = 0;
-                other.fd_ = -1;
             }
             return *this;
         }
@@ -59,6 +91,46 @@ namespace imc
         {
             close_file();
 
+#if defined(_WIN32) || defined(_WIN64)
+            hFile_ = CreateFileA(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+            if (hFile_ == INVALID_HANDLE_VALUE)
+            {
+                throw std::runtime_error("Failed to open file: " + filename);
+            }
+
+            LARGE_INTEGER fileSize;
+            if (!GetFileSizeEx(hFile_, &fileSize))
+            {
+                CloseHandle(hFile_);
+                hFile_ = INVALID_HANDLE_VALUE;
+                throw std::runtime_error("Failed to get file size: " + filename);
+            }
+            size_ = (size_t)fileSize.QuadPart;
+
+            if (size_ == 0)
+            {
+                data_ = nullptr;
+                return;
+            }
+
+            hMap_ = CreateFileMappingA(hFile_, NULL, PAGE_READONLY, 0, 0, NULL);
+            if (hMap_ == NULL)
+            {
+                CloseHandle(hFile_);
+                hFile_ = INVALID_HANDLE_VALUE;
+                throw std::runtime_error("Failed to create file mapping: " + filename);
+            }
+
+            data_ = static_cast<const unsigned char*>(MapViewOfFile(hMap_, FILE_MAP_READ, 0, 0, 0));
+            if (data_ == NULL)
+            {
+                CloseHandle(hMap_);
+                hMap_ = NULL;
+                CloseHandle(hFile_);
+                hFile_ = INVALID_HANDLE_VALUE;
+                throw std::runtime_error("Failed to map view of file: " + filename);
+            }
+#else
             fd_ = open(filename.c_str(), O_RDONLY);
             if (fd_ == -1)
             {
@@ -90,20 +162,39 @@ namespace imc
             }
 
             data_ = static_cast<const unsigned char*>(mapped);
+#endif
         }
 
         void close_file()
         {
             if (data_)
             {
+#if defined(_WIN32) || defined(_WIN64)
+                UnmapViewOfFile(data_);
+#else
                 munmap(const_cast<unsigned char*>(data_), size_);
+#endif
                 data_ = nullptr;
             }
+            
+#if defined(_WIN32) || defined(_WIN64)
+            if (hMap_)
+            {
+                CloseHandle(hMap_);
+                hMap_ = NULL;
+            }
+            if (hFile_ != INVALID_HANDLE_VALUE)
+            {
+                CloseHandle(hFile_);
+                hFile_ = INVALID_HANDLE_VALUE;
+            }
+#else
             if (fd_ != -1)
             {
                 close(fd_);
                 fd_ = -1;
             }
+#endif
             size_ = 0;
         }
 
diff --git a/makefile b/makefile
index d5e7b01..59dd5a9 100644
--- a/makefile
+++ b/makefile
@@ -88,11 +88,11 @@ docker-run:
 
 python-build: check-tags
 	make -C python/ build-inplace
-	cp python/imctermite*.so ./ -v
+	cp python/imctermite*.so ./ -v 2>/dev/null || cp python/imctermite*.pyd ./ -v 2>/dev/null || true
 
 python-clean:
 	make -C python/ clean
-	rm -vf imctermite*.so
+	rm -vf imctermite*.so imctermite*.pyd
 
 python-test:
 	PYTHONPATH=./ python python/examples/usage.py
diff --git a/python/makefile b/python/makefile
index 6bb6ecd..43dd6ea 100644
--- a/python/makefile
+++ b/python/makefile
@@ -26,8 +26,8 @@ build-bdist: setup
 
 build-clean:
 	python setup.py clean --all
-	rm -vf imctermite*.so imctermite*.cpp
-	rm -vf IMCtermite*.so IMCtermite*.cpp
+	rm -vf imctermite*.so imctermite*.pyd imctermite*.cpp
+	rm -vf IMCtermite*.so IMCtermite*.pyd IMCtermite*.cpp
 	rm -rvf dist/ IMCtermite.egg-info/
 	rm -rvf dist/ imctermite.egg-info/
 
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
new file mode 100644
index 0000000..ad95710
--- /dev/null
+++ b/tests/test_streaming.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+"""
+Tests for the new streaming/chunking functionality in IMCtermite
+"""
+
+import pytest
+import numpy as np
+from pathlib import Path
+
+try:
+    import imctermite
+except ImportError:
+    pytest.skip("imctermite module not built - run 'make python-build' first", allow_module_level=True)
+
+PROJECT_ROOT = Path(__file__).parent.parent
+SAMPLES_DIR = PROJECT_ROOT / "samples"
+DATASET_A = SAMPLES_DIR / "datasetA"
+
+class TestStreaming:
+    """Test iter_channel_numpy functionality"""
+
+    @pytest.fixture
+    def imc_instance(self):
+        """Create IMC instance with sample file"""
+        sample_file = DATASET_A / "datasetA_1.raw"
+        if not sample_file.exists():
+            pytest.skip(f"Sample file not found: {sample_file}")
+        return imctermite.imctermite(str(sample_file).encode())
+
+    @pytest.fixture
+    def first_channel_uuid(self, imc_instance):
+        """Get UUID of the first channel"""
+        channels = imc_instance.get_channels(include_data=False)
+        assert len(channels) > 0
+        return channels[0]['uuid']
+
+    def test_iter_channel_numpy_scaled(self, imc_instance, first_channel_uuid):
+        """Test default scaled streaming"""
+        # Get ground truth via old method
+        full_channels = imc_instance.get_channels(include_data=True)
+        target_channel = next(ch for ch in full_channels if ch['uuid'] == first_channel_uuid)
+        expected_y = np.array(target_channel['ydata'])
+        
+        # Stream data
+        streamed_y = []
+        # Encode UUID to bytes for C++ std::string
+        uuid_bytes = first_channel_uuid.encode('utf-8')
+        for chunk in imc_instance.iter_channel_numpy(uuid_bytes, chunk_rows=100):
+            assert 'y' in chunk
+            assert isinstance(chunk['y'], np.ndarray)
+            assert chunk['y'].dtype == np.float64 # Scaled should be float64
+            streamed_y.append(chunk['y'])
+            
+        full_streamed_y = np.concatenate(streamed_y)
+        
+        # Compare
+        np.testing.assert_allclose(full_streamed_y, expected_y, rtol=1e-4)
+
+    def test_iter_channel_numpy_raw(self, imc_instance, first_channel_uuid):
+        """Test raw streaming"""
+        # We can't easily compare raw values to scaled values without knowing the factor/offset
+        # But we can check types and consistency
+        
+        streamed_y_raw = []
+        uuid_bytes = first_channel_uuid.encode('utf-8')
+        for chunk in imc_instance.iter_channel_numpy(uuid_bytes, chunk_rows=100, mode="raw"):
+            assert 'y' in chunk
+            assert isinstance(chunk['y'], np.ndarray)
+            # Raw type depends on file, but shouldn't necessarily be float64 unless the raw data is float
+            streamed_y_raw.append(chunk['y'])
+            
+        full_streamed_y_raw = np.concatenate(streamed_y_raw)
+        
+        # Ensure we got data
+        assert len(full_streamed_y_raw) > 0
+
+    def test_chunking_behavior(self, imc_instance, first_channel_uuid):
+        """Test that small chunks work correctly"""
+        # Get total length
+        channels = imc_instance.get_channels(include_data=False)
+        # We don't have direct access to length in metadata without loading, 
+        # but we can infer it from a full load or just count
+        
+        chunk_size = 10
+        uuid_bytes = first_channel_uuid.encode('utf-8')
+        chunks = list(imc_instance.iter_channel_numpy(uuid_bytes, chunk_rows=chunk_size))
+        
+        # Check that most chunks are of size 10
+        for i, chunk in enumerate(chunks[:-1]): # All but last should be full
+            assert len(chunk['y']) == chunk_size
+            
+        # Check continuity of 'start' index
+        expected_start = 0
+        for chunk in chunks:
+            assert chunk['start'] == expected_start
+            expected_start += len(chunk['y'])
+
+    def test_include_x_parameter(self, imc_instance, first_channel_uuid):
+        """Test include_x=False"""
+        uuid_bytes = first_channel_uuid.encode('utf-8')
+        for chunk in imc_instance.iter_channel_numpy(uuid_bytes, include_x=False, chunk_rows=100):
+            assert 'y' in chunk
+            assert 'x' not in chunk
+
+    def test_invalid_channel_uuid(self, imc_instance):
+        """Test behavior with invalid UUID"""
+        # Depending on implementation, this might raise an error or return empty generator
+        # Based on C++ code: throw std::runtime_error("channel does not exist:" + uuid);
+        # Cython should propagate this as RuntimeError
+        
+        with pytest.raises(RuntimeError):
+            list(imc_instance.iter_channel_numpy(b"non-existent-uuid"))

From 0d93e03f4d92488ea6ce10812f46389ff1dcf554 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <janole@goedeke.de>
Date: Mon, 15 Dec 2025 22:56:24 +0100
Subject: [PATCH 06/12] Fix platform detection in codepage conversion and
 update CLI path for Windows

---
 .github/workflows/test.yml |  4 ++--
 makefile                   |  2 +-
 python/imctermite.pyx      | 16 ++++++++++++----
 python/setup.cfg           |  4 ++++
 python/setup.py            |  3 ++-
 src/main.cpp               |  1 +
 tests/test_cli.py          |  3 +++
 7 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7fa6520..e3b64e7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -2,7 +2,7 @@ name: Run Tests
 
 on:
   push:
-    branches: [ master, numpy-streaming ]
+    branches: [ master ]
   pull_request:
     branches: [ master ]
 
@@ -12,7 +12,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest]
-        python-version: ["3.10"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
 
     steps:
     - name: Checkout code
diff --git a/makefile b/makefile
index 59dd5a9..f44495b 100644
--- a/makefile
+++ b/makefile
@@ -18,7 +18,7 @@ MIB = $(foreach dir,$(KIB),-I $(dir))
 
 # choose compiler and its options
 CC = g++ -std=c++17
-OPT = -O3 -Wall -Wconversion -Wpedantic -Werror -Wunused-variable -Wsign-compare
+OPT = -O3 -Wall -Wconversion -Wpedantic -Werror -Wunused-variable -Wsign-compare -static
 
 # determine git version/commit and release tag
 GTAG := $(shell git tag -l --sort=version:refname | tail -n1 | sed "s/$^v//g")
diff --git a/python/imctermite.pyx b/python/imctermite.pyx
index 3bfc319..a6c6c20 100644
--- a/python/imctermite.pyx
+++ b/python/imctermite.pyx
@@ -12,10 +12,18 @@ import platform
 
 # auxiliary function for codepage conversion
 def get_codepage(chn) :
-    if platform == 'Windows' :
-        chndec = jn.loads(chn.decode(errors="ignore"))
-        chncdp = chndec["codepage"]
-        return 'utf-8' if chncdp is None else chncdp
+    if platform.system() == 'Windows' :
+        try:
+            chndec = jn.loads(chn.decode(errors="ignore"))
+            chncdp = chndec.get("codepage")
+            if not chncdp:
+                return 'utf-8'
+            # If it's a number like "1252", Python expects "cp1252"
+            if str(chncdp).isdigit():
+                return 'cp' + str(chncdp)
+            return str(chncdp)
+        except:
+            return 'utf-8'
     else :
         return 'utf-8'
 
diff --git a/python/setup.cfg b/python/setup.cfg
index 1308c6e..86528dc 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -15,6 +15,10 @@ license_files = LICENSE
 keywords = IMC, raw, imcFAMOS, imcSTUDIO, imcCRONOS
 classifiers =
   Programming Language :: Python :: 3
+  Programming Language :: Python :: 3.10
+  Programming Language :: Python :: 3.11
+  Programming Language :: Python :: 3.12
+  Programming Language :: Python :: 3.13
   License :: OSI Approved :: MIT License
   Operating System :: OS Independent
   Topic :: Scientific/Engineering
diff --git a/python/setup.py b/python/setup.py
index 8d2e8a2..1d03fc6 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -15,7 +15,8 @@
     "imctermite",
     sources=["imctermite.pyx"],
     include_dirs=[numpy.get_include()],
-    extra_compile_args=cmpArgs[sys.platform]
+    extra_compile_args=cmpArgs[sys.platform],
+    define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")]
 )
 
 setup(
diff --git a/src/main.cpp b/src/main.cpp
index b19e0c7..989d30a 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -3,6 +3,7 @@
 #include <iostream>
 #include <vector>
 #include <filesystem>
+#include <map>
 
 // #include "imc_key.hpp"
 // #include "imc_block.hpp"
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 6144e6c..e906689 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -5,10 +5,13 @@
 
 import pytest
 import subprocess
+import sys
 from pathlib import Path
 
 PROJECT_ROOT = Path(__file__).parent.parent
 CLI = PROJECT_ROOT / "imctermite"
+if sys.platform == "win32":
+    CLI = CLI.with_suffix(".exe")
 SAMPLES_DIR = PROJECT_ROOT / "samples" / "datasetA"
 
 

From 27d8215c85f71a72bef69fdee0f27364a3b3f73b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <jan.goedeke@pilatus-aircraft.com>
Date: Tue, 16 Dec 2025 19:32:33 +0100
Subject: [PATCH 07/12] Implement chunked streaming for channel printing to
 improve memory efficiency (#9)

Reduces memory usage by 90% for large datasets while maintaining comparable processing speed.
---
 lib/imc_channel.hpp   | 42 +++++++++++++++++++++++++++++-------------
 lib/imc_raw.hpp       | 10 +++++-----
 python/imctermite.pxd |  4 ++--
 python/imctermite.pyx |  8 ++++----
 4 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/lib/imc_channel.hpp b/lib/imc_channel.hpp
index c86e394..fe61b34 100644
--- a/lib/imc_channel.hpp
+++ b/lib/imc_channel.hpp
@@ -913,7 +913,7 @@ namespace imc
     }
 
     // print channel
-    void print(std::string filename, const char sep = ',', int width = 25, int yprec = 9)
+    void print(std::string filename, const char sep = ',', int width = 25, int yprec = 9, unsigned long int chunk_size = 100000)
     {
       std::ofstream fou(filename);
 
@@ -930,21 +930,37 @@ namespace imc
         fou<<xname_<<sep<<yname_<<"\n"<<xunit_<<sep<<yunit_<<"\n";
       }
 
-      for ( unsigned long int i = 0; i < xdata_.size(); i++ )
+      // Stream data in chunks
+      unsigned long int start = 0;
+      while (start < number_of_samples_)
       {
-        if ( sep == ' ' )
-        {
-          fou<<std::setprecision(xprec_)<<std::fixed
-             <<std::setw(width)<<std::left<<xdata_[i]
-             <<std::setprecision(yprec)<<std::fixed
-             <<std::setw(width)<<std::left<<ydata_[i]<<"\n";
-        }
-        else
+        channel_chunk chunk = read_chunk(start, chunk_size, true, false); // include_x=true, raw_mode=false (scaled)
+        
+        if (chunk.count == 0) break;
+        
+        // Extract x and y data from chunk
+        const double* x_ptr = reinterpret_cast<const double*>(chunk.x_bytes.data());
+        const double* y_ptr = reinterpret_cast<const double*>(chunk.y_bytes.data());
+        
+        // Write chunk data
+        for (unsigned long int i = 0; i < chunk.count; i++)
         {
-          fou<<std::setprecision(xprec_)<<std::fixed<<xdata_[i]
-             <<sep
-             <<std::setprecision(yprec)<<std::fixed<<ydata_[i]<<"\n";
+          if ( sep == ' ' )
+          {
+            fou<<std::setprecision(xprec_)<<std::fixed
+               <<std::setw(width)<<std::left<<x_ptr[i]
+               <<std::setprecision(yprec)<<std::fixed
+               <<std::setw(width)<<std::left<<y_ptr[i]<<"\n";
+          }
+          else
+          {
+            fou<<std::setprecision(xprec_)<<std::fixed<<x_ptr[i]
+               <<sep
+               <<std::setprecision(yprec)<<std::fixed<<y_ptr[i]<<"\n";
+          }
         }
+        
+        start += chunk.count;
       }
 
       fou.close();
diff --git a/lib/imc_raw.hpp b/lib/imc_raw.hpp
index 47fc011..1ab74b2 100644
--- a/lib/imc_raw.hpp
+++ b/lib/imc_raw.hpp
@@ -431,7 +431,7 @@ namespace imc
     }
 
     // print single specific channel
-    void print_channel(std::string channeluuid, std::string outputfile, const char sep)
+    void print_channel(std::string channeluuid, std::string outputfile, const char sep, unsigned long int chunk_size = 100000)
     {
       // check for given parent directory of output file
       std::filesystem::path pdf = outputfile;
@@ -444,7 +444,7 @@ namespace imc
       // find channel with given name
       if ( channels_.count(channeluuid) == 1 )
       {
-        channels_.at(channeluuid).print(outputfile,sep);
+        channels_.at(channeluuid).print(outputfile,sep,25,9,chunk_size);
       }
       else
       {
@@ -454,7 +454,7 @@ namespace imc
     }
 
     // print all channels into given directory
-    void print_channels(std::string output, const char sep)
+    void print_channels(std::string output, const char sep, unsigned long int chunk_size = 100000)
     {
       // check for given directory
       std::filesystem::path pd = output;
@@ -473,8 +473,8 @@ namespace imc
                                            : it->second.name_ + std::string(".csv");
         std::filesystem::path pf = pd / filenam;
 
-        // and print the channel
-        it->second.print(pf.u8string(),sep);
+        // and print the channel using streaming
+        it->second.print(pf.u8string(),sep,25,9,chunk_size);
       }
     }
 
diff --git a/python/imctermite.pxd b/python/imctermite.pxd
index 262f57a..682946e 100644
--- a/python/imctermite.pxd
+++ b/python/imctermite.pxd
@@ -37,6 +37,6 @@ cdef extern from "lib/imc_raw.hpp" namespace "imc":
     channel_chunk read_channel_chunk(string uuid, unsigned long int start, unsigned long int count, bool include_x, bool raw_mode) except +
 
     # print single channel/all channels
-    void print_channel(string channeluuid, string outputdir, char delimiter) except +
-    void print_channels(string outputdir, char delimiter) except +
+    void print_channel(string channeluuid, string outputdir, char delimiter, unsigned long int chunk_size) except +
+    void print_channels(string outputdir, char delimiter, unsigned long int chunk_size) except +
     void print_table(string outputfile) except +
diff --git a/python/imctermite.pyx b/python/imctermite.pyx
index a6c6c20..b85ced9 100644
--- a/python/imctermite.pyx
+++ b/python/imctermite.pyx
@@ -112,10 +112,10 @@ cdef class imctermite:
             break
 
   # print single channel/all channels
-  def print_channel(self, string channeluuid, string outputfile, char delimiter):
-    self.cppimc.print_channel(channeluuid,outputfile,delimiter)
-  def print_channels(self, string outputdir, char delimiter):
-    self.cppimc.print_channels(outputdir,delimiter)
+  def print_channel(self, string channeluuid, string outputfile, char delimiter, unsigned long int chunk_size=100000):
+    self.cppimc.print_channel(channeluuid,outputfile,delimiter,chunk_size)
+  def print_channels(self, string outputdir, char delimiter, unsigned long int chunk_size=100000):
+    self.cppimc.print_channels(outputdir,delimiter,chunk_size)
 
   # print table including channels
   def print_table(self, string outputfile):

From dffefa8b04f51a042126b96a27e8e8954506fe2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <janole@goedeke.de>
Date: Tue, 16 Dec 2025 22:12:02 +0100
Subject: [PATCH 08/12] Refactor raw file handling to use string instead of
 bytes across examples and core functionality

---
 README.md                             |  6 ++---
 python/examples/multichannel.py       |  4 ++--
 python/examples/usage.py              |  8 +++----
 python/examples/usage_adv.py          |  6 ++---
 python/examples/usage_ext.py          |  2 +-
 python/examples/usage_files.py        |  6 ++---
 python/examples/usage_numpy_chunks.py |  4 ++--
 python/imctermite.pyx                 | 32 +++++++++++++++++----------
 8 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/README.md b/README.md
index 0436816..c239a09 100644
--- a/README.md
+++ b/README.md
@@ -196,17 +196,17 @@ of it by passing a _raw_ file to the constructor:
 ```Python
 import imctermite
 
-imcraw = imctermite.imctermite(b"sample/sampleA.raw")
+imcraw = imctermite.imctermite("sample/sampleA.raw")
 ```
 
 An example of how to create an instance and obtain the list of channels is:
 
 ```Python
-import IMCtermite
+import imctermite
 
 # declare and initialize instance of "imctermite" by passing a raw-file
 try :
-    imcraw = IMCtermite.imctermite(b"samples/sampleA.raw")
+    imcraw = imctermite.imctermite("samples/sampleA.raw")
 except RuntimeError as e :
     print("failed to load/parse raw-file: " + str(e))
 
diff --git a/python/examples/multichannel.py b/python/examples/multichannel.py
index 67b6b41..d2e2e1b 100644
--- a/python/examples/multichannel.py
+++ b/python/examples/multichannel.py
@@ -11,7 +11,7 @@ def add_trigger_time(trigger_time, add_time) :
 if __name__ == "__main__" :
 
     # read file and extract data
-    imctm = imctermite.imctermite(b"Measurement.raw")
+    imctm = imctermite.imctermite("samples/exampleB.raw")
     chns = imctm.get_channels(True)
     
     # prepare abscissa
@@ -39,5 +39,5 @@ def add_trigger_time(trigger_time, add_time) :
 
     # show entire dataframe and write file
     print(df)
-    df.to_csv("Measurement.csv",header=True,sep='\t',index=False)
+    df.to_csv("exampleB.csv",header=True,sep='\t',index=False)
 
diff --git a/python/examples/usage.py b/python/examples/usage.py
index 06cc3ed..2b48e22 100644
--- a/python/examples/usage.py
+++ b/python/examples/usage.py
@@ -5,7 +5,7 @@
 
 # declare and initialize instance of "imctermite" by passing a raw-file
 try :
-    imcraw = imctermite.imctermite(b"samples/exampleB.raw")
+    imcraw = imctermite.imctermite("samples/exampleB.raw")
 except RuntimeError as e :
     raise Exception("failed to load/parse raw-file: " + str(e))
 
@@ -24,15 +24,15 @@
 print()
 
 # print the channels into a specific directory
-imcraw.print_channels(b"/tmp/",ord(','))
+imcraw.print_channels("/tmp/",ord(','))
 
 # print all channels separately
 for i,chn in enumerate(channels) :
     print(str(i)+" : "+chn['name']+" : "+chn['uuid'])
     filname = os.path.join("/tmp/",str(i) + "_" + chn['name']+".csv")
     print(filname)
-    imcraw.print_channel(chn['uuid'].encode(),filname.encode(),ord(','))
+    imcraw.print_channel(chn['uuid'],filname,ord(','))
 
 # print all channels in single file
-imcraw.print_table(b"/tmp/allchannels.csv")
+imcraw.print_table("/tmp/allchannels.csv")
 
diff --git a/python/examples/usage_adv.py b/python/examples/usage_adv.py
index 36000a6..0c844d8 100644
--- a/python/examples/usage_adv.py
+++ b/python/examples/usage_adv.py
@@ -15,7 +15,7 @@
 
     # declare and initialize instance of "imctermite" by passing a raw-file
     try :
-        imcraw = imctermite.imctermite(fl.encode())
+        imcraw = imctermite.imctermite(fl)
     except RuntimeError as e :
         raise Exception("failed to load/parse raw-file: " + str(e))
 
@@ -24,7 +24,7 @@
     print(json.dumps(channels,indent=4, sort_keys=False))
 
     # print the channels into a specific directory
-    imcraw.print_channels(b"./",ord(','))
+    imcraw.print_channels("./",ord(','))
 
     # print all channels in single file
-    imcraw.print_table(("./"+str(os.path.basename(fl).split('.')[0])+"_allchannels.csv").encode())
+    imcraw.print_table(("./"+str(os.path.basename(fl).split('.')[0])+"_allchannels.csv"))
diff --git a/python/examples/usage_ext.py b/python/examples/usage_ext.py
index b6536e2..e7dd8e5 100644
--- a/python/examples/usage_ext.py
+++ b/python/examples/usage_ext.py
@@ -6,7 +6,7 @@
 
 # declare and initialize instance of "imctermite" by passing a raw-file
 try :
-    imcraw = imctermite.imctermite(b"samples/sampleB.raw")
+    imcraw = imctermite.imctermite("samples/sampleB.raw")
 except RuntimeError as e :
     raise Exception("failed to load/parse raw-file: " + str(e))
 
diff --git a/python/examples/usage_files.py b/python/examples/usage_files.py
index 3dcebd3..b6532d6 100644
--- a/python/examples/usage_files.py
+++ b/python/examples/usage_files.py
@@ -1,5 +1,5 @@
 
-import imctermite import imctermite
+import imctermite
 
 def show_results(imcraw) :
 
@@ -19,11 +19,11 @@ def show_results(imcraw) :
     print("")
 
 # create instance of 'imctermite'
-imcraw = imctermite(b'samples/sampleA.raw')
+imcraw = imctermite.imctermite("samples/sampleA.raw")
 
 show_results(imcraw)
 
 # use previous instance of 'imctermite' to provide new file
-imcraw.submit_file(b'samples/sampleB.raw')
+imcraw.submit_file("samples/sampleB.raw")
 
 show_results(imcraw)
diff --git a/python/examples/usage_numpy_chunks.py b/python/examples/usage_numpy_chunks.py
index 6aeaf09..8e63ee2 100644
--- a/python/examples/usage_numpy_chunks.py
+++ b/python/examples/usage_numpy_chunks.py
@@ -6,7 +6,7 @@
 
 # Path to a sample file
 # Using sampleB.raw because it has integer data with scaling (factor=0.01, offset=327.68)
-raw_file = b"samples/sampleB.raw"
+raw_file = "samples/sampleB.raw"
 if not os.path.exists(raw_file):
     print(f"Sample file {raw_file} not found.")
     exit(1)
@@ -30,7 +30,7 @@
 target_uuid = "347"
 channel_info = next((ch for ch in channels if ch['uuid'] == target_uuid), channels[0])
 
-first_channel_uuid = channel_info['uuid'].encode('utf-8')
+first_channel_uuid = channel_info['uuid']
 print(f"Iterating over channel {first_channel_uuid} ({channel_info.get('name', 'unnamed')})")
 
 # Check native datatype
diff --git a/python/imctermite.pyx b/python/imctermite.pyx
index b85ced9..f8ecfbf 100644
--- a/python/imctermite.pyx
+++ b/python/imctermite.pyx
@@ -27,22 +27,30 @@ def get_codepage(chn) :
     else :
         return 'utf-8'
 
+cdef bytes _as_bytes(obj):
+    if isinstance(obj, bytes):
+        return obj
+    elif isinstance(obj, str):
+        return obj.encode('utf-8')
+    else:
+        return str(obj).encode('utf-8')
+
 cdef class imctermite:
 
   # C++ instance of class
   cdef cppimctermite* cppimc
 
   # constructor
-  def __cinit__(self, string rawfile):
-    self.cppimc = new cppimctermite(rawfile)
+  def __cinit__(self, rawfile):
+    self.cppimc = new cppimctermite(_as_bytes(rawfile))
 
   def __dealloc__(self):
     if self.cppimc != NULL:
         del self.cppimc
 
   # provide raw file
-  def submit_file(self,string rawfile):
-    self.cppimc.set_file(rawfile)
+  def submit_file(self, rawfile):
+    self.cppimc.set_file(_as_bytes(rawfile))
 
   # get JSON list of channels
   def get_channels(self, bool include_data):
@@ -51,7 +59,7 @@ cdef class imctermite:
     return chnlstjn
 
   def iter_channel_numpy(self, string channeluuid, bool include_x=True, int chunk_rows=1000000, str mode="scaled"):
-    cdef unsigned long int total_len = self.cppimc.get_channel_length(channeluuid)
+    cdef unsigned long int total_len = self.cppimc.get_channel_length(_as_bytes(channeluuid))
     cdef unsigned long int start = 0
     cdef channel_chunk chunk
     cdef cnp.ndarray x_arr
@@ -77,7 +85,7 @@ cdef class imctermite:
     }
 
     while start < total_len:
-        chunk = self.cppimc.read_channel_chunk(channeluuid, start, chunk_rows, include_x, raw_mode)
+        chunk = self.cppimc.read_channel_chunk(_as_bytes(channeluuid), start, chunk_rows, include_x, raw_mode)
         
         # Create numpy arrays from bytes
         y_dtype = dtype_map.get(chunk.y_type, np.float64)
@@ -112,16 +120,16 @@ cdef class imctermite:
             break
 
   # print single channel/all channels
-  def print_channel(self, string channeluuid, string outputfile, char delimiter, unsigned long int chunk_size=100000):
-    self.cppimc.print_channel(channeluuid,outputfile,delimiter,chunk_size)
-  def print_channels(self, string outputdir, char delimiter, unsigned long int chunk_size=100000):
-    self.cppimc.print_channels(outputdir,delimiter,chunk_size)
+  def print_channel(self, channeluuid, outputfile, char delimiter, unsigned long int chunk_size=100000):
+    self.cppimc.print_channel(_as_bytes(channeluuid),_as_bytes(outputfile),delimiter,chunk_size)
+  def print_channels(self, outputdir, char delimiter, unsigned long int chunk_size=100000):
+    self.cppimc.print_channels(_as_bytes(outputdir),delimiter,chunk_size)
 
   # print table including channels
-  def print_table(self, string outputfile):
+  def print_table(self, outputfile):
     chnlst = self.cppimc.get_channels(True,True)
     chnlstjn = [jn.loads(chn.decode(errors="ignore")) for chn in chnlst]
-    with open(outputfile.decode(),'w') as fout:
+    with open(outputfile,'w') as fout:
       for chn in chnlstjn:
         fout.write('#' +str(chn['xname']).rjust(19)+str(chn['yname']).rjust(20)+'\n')
         fout.write('#'+str(chn['xunit']).rjust(19)+str(chn['yunit']).rjust(20)+'\n')

From 62da4de05d2d992c869ea678bf37980d3ca7ad36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <janole@goedeke.de>
Date: Tue, 16 Dec 2025 22:30:27 +0100
Subject: [PATCH 09/12] Enhance multichannel data handling and add numpy
 support for channel data retrieval (#33)

---
 python/examples/multichannel.py | 53 ++++++++++++++++++++++-----------
 python/imctermite.pyx           | 14 +++++++--
 2 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/python/examples/multichannel.py b/python/examples/multichannel.py
index d2e2e1b..4dd7b4a 100644
--- a/python/examples/multichannel.py
+++ b/python/examples/multichannel.py
@@ -2,6 +2,7 @@
 import imctermite
 import pandas
 import datetime
+import numpy as np
 
 def add_trigger_time(trigger_time, add_time) :
     trgts = datetime.datetime.strptime(trigger_time,'%Y-%m-%dT%H:%M:%S')
@@ -10,32 +11,48 @@ def add_trigger_time(trigger_time, add_time) :
 
 if __name__ == "__main__" :
 
-    # read file and extract data
+    # read file
     imctm = imctermite.imctermite("samples/exampleB.raw")
-    chns = imctm.get_channels(True)
     
-    # prepare abscissa
-    xcol = "time ["+chns[0]['xunit']+"]"
-    #xcol = "timestamp"
-    xsts = [add_trigger_time(chns[0]['trigger-time'],tm) for tm in chns[0]['xdata']]
+    # Get metadata only
+    chns = imctm.get_channels(False)
+    
+    if not chns:
+        print("No channels found")
+        exit()
+    
+    # Prepare DataFrame
+    df = pandas.DataFrame()
 
-    # sort channels
+    # Get X-axis from the first channel
+    first_chn = chns[0]
+    
+    data = imctm.get_channel_data(first_chn['uuid'], include_x=True)
+    x_data = data['x']
+    
+    xcol = "time ["+first_chn['xunit']+"]"
+    df[xcol] = x_data
+
+    # sort channels by name
     chnnms = sorted([chn['name'] for chn in chns], reverse=False)
-    chnsdict = {}
-    for chn in chns :
-        chnsdict[chn['name']] = chn
+    chnsdict = {chn['name']: chn for chn in chns}
 
-    # construct dataframe
-    df = pandas.DataFrame()
-    df[xcol] = pandas.Series(chns[0]['xdata'])
-    #df[xcol] = pandas.Series(xsts)
-    #for idx,chn in enumerate(chns) :
     for chnnm in chnnms :
         chn = chnsdict[chnnm]
-        #xcol = (chn['xname'] if chn['xname'] != '' else "x_"+str(idx))+" ["+chn['xunit']+"]"
-        #df[xcol] = pandas.Series(chn['xdata'])
+        uuid = chn['uuid']
+        
+        # Fetch Y data only
+        data = imctm.get_channel_data(uuid, include_x=False)
+        y_data = data['y']
+        
         ycol = chn['yname']+" ["+chn['yunit']+"]"
-        df[ycol] = pandas.Series(chn['ydata'])
+        
+        # Assign to DataFrame
+        if len(y_data) == len(df):
+            df[ycol] = y_data
+        else:
+            # Fallback to Series for alignment/filling
+            df[ycol] = pandas.Series(y_data)
 
     # show entire dataframe and write file
     print(df)
diff --git a/python/imctermite.pyx b/python/imctermite.pyx
index f8ecfbf..df00846 100644
--- a/python/imctermite.pyx
+++ b/python/imctermite.pyx
@@ -58,9 +58,9 @@ cdef class imctermite:
     chnlstjn = [jn.loads(chn.decode(get_codepage(chn),errors="ignore")) for chn in chnlst]
     return chnlstjn
 
-  def iter_channel_numpy(self, string channeluuid, bool include_x=True, int chunk_rows=1000000, str mode="scaled"):
+  def iter_channel_numpy(self, channeluuid, bool include_x=True, unsigned long int chunk_rows=1000000, str mode="scaled", unsigned long int start_index=0):
     cdef unsigned long int total_len = self.cppimc.get_channel_length(_as_bytes(channeluuid))
-    cdef unsigned long int start = 0
+    cdef unsigned long int start = start_index
     cdef channel_chunk chunk
     cdef cnp.ndarray x_arr
     cdef cnp.ndarray y_arr
@@ -119,6 +119,16 @@ cdef class imctermite:
         if chunk.count == 0:
             break
 
+  def get_channel_data(self, channeluuid, bool include_x=True, str mode="scaled"):
+    cdef unsigned long int total_len = self.cppimc.get_channel_length(_as_bytes(channeluuid))
+    if total_len == 0:
+        res = {'y': np.array([])}
+        if include_x:
+            res['x'] = np.array([])
+        return res
+    
+    return next(self.iter_channel_numpy(channeluuid, include_x, total_len, mode, 0))
+
   # print single channel/all channels
   def print_channel(self, channeluuid, outputfile, char delimiter, unsigned long int chunk_size=100000):
     self.cppimc.print_channel(_as_bytes(channeluuid),_as_bytes(outputfile),delimiter,chunk_size)

From b644d3382b89a8b73a363dfde22d01ed3bbc3538 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <janole@goedeke.de>
Date: Tue, 16 Dec 2025 22:33:28 +0100
Subject: [PATCH 10/12] Add print_timerange example and implement channel
 length retrieval in imctermite

---
 python/examples/usage_timerange.py | 65 ++++++++++++++++++++++++++++++
 python/imctermite.pyx              |  4 ++
 2 files changed, 69 insertions(+)
 create mode 100644 python/examples/usage_timerange.py

diff --git a/python/examples/usage_timerange.py b/python/examples/usage_timerange.py
new file mode 100644
index 0000000..eb36d9e
--- /dev/null
+++ b/python/examples/usage_timerange.py
@@ -0,0 +1,65 @@
+
+import imctermite
+import sys
+import os
+
+def print_timerange(filename):
+    """
+    Demonstrates how to efficiently get the time range (first and last X values)
+    of channels without reading the entire file.
+    """
+    
+    try:
+        imc = imctermite.imctermite(filename)
+    except RuntimeError as e:
+        print(f"Error loading file: {e}")
+        return
+
+    # Get list of channels (metadata only, no data loaded yet)
+    channels = imc.get_channels(False)
+    
+    if not channels:
+        print("No channels found in file.")
+        return
+
+    print(f"File: {filename}")
+    print("-" * 80)
+    print(f"{'Channel Name':<25} | {'Start (X)':<15} | {'End (X)':<15} | {'Samples':<10}")
+    print("-" * 80)
+
+    for chn in channels:
+        uuid = chn['uuid']
+        name = chn.get('yname', 'Unknown')
+        
+        length = imc.get_channel_length(uuid)
+        
+        if length == 0:
+            print(f"{name:<25} | {'Empty':<15} | {'Empty':<15} | {0:<10}")
+            continue
+
+        # Get first sample (efficiently, reading only 1 row)
+        # We request X data to get the time/index
+        # chunk_rows=1 ensures we only read/convert the absolute minimum data
+        gen_first = imc.iter_channel_numpy(uuid, start_index=0, chunk_rows=1, include_x=True)
+        try:
+            first_chunk = next(gen_first)
+            first_x = first_chunk['x'][0]
+        except (StopIteration, IndexError):
+            first_x = float('nan')
+
+        # Get last sample
+        gen_last = imc.iter_channel_numpy(uuid, start_index=length-1, chunk_rows=1, include_x=True)
+        try:
+            last_chunk = next(gen_last)
+            last_x = last_chunk['x'][0]
+        except (StopIteration, IndexError):
+            last_x = float('nan')
+
+        print(f"{name:<25} | {first_x:<15.5f} | {last_x:<15.5f} | {length:<10}")
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python usage_timerange.py <path_to_raw_file>")
+        print("Example: python usage_timerange.py ../../samples/datasetA/datasetA_1.raw")
+    else:
+        print_timerange(sys.argv[1])
diff --git a/python/imctermite.pyx b/python/imctermite.pyx
index df00846..10e1991 100644
--- a/python/imctermite.pyx
+++ b/python/imctermite.pyx
@@ -58,6 +58,10 @@ cdef class imctermite:
     chnlstjn = [jn.loads(chn.decode(get_codepage(chn),errors="ignore")) for chn in chnlst]
     return chnlstjn
 
+  # get length of a channel
+  def get_channel_length(self, channeluuid):
+    return self.cppimc.get_channel_length(_as_bytes(channeluuid))
+
   def iter_channel_numpy(self, channeluuid, bool include_x=True, unsigned long int chunk_rows=1000000, str mode="scaled", unsigned long int start_index=0):
     cdef unsigned long int total_len = self.cppimc.get_channel_length(_as_bytes(channeluuid))
     cdef unsigned long int start = start_index

From fbf752f5457e7a398baacde62e5998237dba15a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <janole@goedeke.de>
Date: Tue, 16 Dec 2025 22:33:53 +0100
Subject: [PATCH 11/12] Add type stubs and package data for improved type
 checking and IDE support

---
 python/MANIFEST.in    |   2 +
 python/imctermite.pyi | 185 ++++++++++++++++++++++++++++++++++++++++++
 python/py.typed       |   0
 python/setup.py       |   6 +-
 4 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 python/imctermite.pyi
 create mode 100644 python/py.typed

diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index dbe052e..eb044a9 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -2,4 +2,6 @@ include lib/*.hpp
 include *.cpp
 include *.pyx
 include *.pxd
+include *.pyi
+include py.typed
 include VERSION
diff --git a/python/imctermite.pyi b/python/imctermite.pyi
new file mode 100644
index 0000000..07f1dde
--- /dev/null
+++ b/python/imctermite.pyi
@@ -0,0 +1,185 @@
+"""
+Type stub file for IMCtermite Cython extension.
+This provides IDE support, type checking, and autocomplete for the imctermite module.
+"""
+
+from typing import Any, Dict, Iterator, List, Literal, Optional, Union
+import numpy as np
+import numpy.typing as npt
+
+def get_codepage(chn: bytes) -> str:
+    """Get the codepage for decoding channel data."""
+    ...
+
+class imctermite:
+    """
+    IMCtermite parser for .raw (IMC2 Data Format) files.
+    
+    This class provides methods to read and parse IMC measurement data files,
+    extracting channel metadata and data.
+    """
+    
+    def __init__(self, rawfile: Union[str, bytes]) -> None:
+        """
+        Initialize parser with a .raw file.
+        
+        Args:
+            rawfile: Path to the .raw file to parse
+        """
+        ...
+    
+    def submit_file(self, rawfile: Union[str, bytes]) -> None:
+        """
+        Set or change the raw file to parse.
+        
+        Args:
+            rawfile: Path to the .raw file to parse
+        """
+        ...
+    
+    def get_channels(self, include_data: bool = True) -> List[Dict[str, Any]]:
+        """
+        Get list of all channels in the file with their metadata.
+        
+        Args:
+            include_data: If True, includes the actual measurement data in the result.
+                         If False, only returns metadata (faster for inspection).
+        
+        Returns:
+            List of dictionaries containing channel information:
+            - uuid: Unique identifier for the channel
+            - xname: X-axis name (typically "time")
+            - yname: Y-axis name (measurement name)
+            - xunit: X-axis unit
+            - yunit: Y-axis unit
+            - length: Number of data points
+            - xdata: X-axis data (if include_data=True)
+            - ydata: Y-axis data (if include_data=True)
+            - buffer_type: Data type identifier
+            - codepage: Text encoding information
+        """
+        ...
+    
+    def get_channel_length(self, channeluuid: Union[str, bytes]) -> int:
+        """
+        Get the number of data points in a channel.
+        
+        Args:
+            channeluuid: UUID of the channel to query
+        
+        Returns:
+            Number of data points in the channel
+        """
+        ...
+    
+    def iter_channel_numpy(
+        self,
+        channeluuid: Union[str, bytes],
+        include_x: bool = True,
+        chunk_rows: int = 1000000,
+        mode: Literal["scaled", "raw"] = "scaled",
+        start_index: int = 0
+    ) -> Iterator[Dict[str, Union[int, npt.NDArray[Any]]]]:
+        """
+        Iterate over channel data in chunks as numpy arrays.
+        
+        This is memory-efficient for large datasets as it yields data in chunks
+        rather than loading everything into memory at once.
+        
+        Args:
+            channeluuid: UUID of the channel to read
+            include_x: If True, includes x-axis data in results
+            chunk_rows: Number of rows per chunk (default: 1,000,000)
+            mode: "scaled" for calibrated values or "raw" for uncalibrated ADC values
+            start_index: Starting row index (for partial reads)
+        
+        Yields:
+            Dictionary containing:
+            - start: Starting index of this chunk
+            - y: numpy array of Y-axis values
+            - x: numpy array of X-axis values (if include_x=True)
+        
+        Example:
+            >>> imc = imctermite("measurement.raw")
+            >>> channels = imc.get_channels(include_data=False)
+            >>> uuid = channels[0]['uuid']
+            >>> for chunk in imc.iter_channel_numpy(uuid, chunk_rows=100000):
+            ...     print(f"Processing {len(chunk['y'])} samples starting at {chunk['start']}")
+            ...     # Process chunk['x'] and chunk['y'] arrays
+        """
+        ...
+    
+    def get_channel_data(
+        self,
+        channeluuid: Union[str, bytes],
+        include_x: bool = True,
+        mode: Literal["scaled", "raw"] = "scaled"
+    ) -> Dict[str, npt.NDArray[Any]]:
+        """
+        Get all data for a channel as numpy arrays.
+        
+        Args:
+            channeluuid: UUID of the channel to read
+            include_x: If True, includes x-axis data in result
+            mode: "scaled" for calibrated values or "raw" for uncalibrated ADC values
+        
+        Returns:
+            Dictionary containing:
+            - y: numpy array of Y-axis values
+            - x: numpy array of X-axis values (if include_x=True)
+        
+        Note:
+            This loads the entire channel into memory. For large datasets,
+            consider using iter_channel_numpy() instead.
+        
+        Example:
+            >>> imc = imctermite("measurement.raw")
+            >>> channels = imc.get_channels(include_data=False)
+            >>> uuid = channels[0]['uuid']
+            >>> data = imc.get_channel_data(uuid)
+            >>> print(f"X shape: {data['x'].shape}, Y shape: {data['y'].shape}")
+        """
+        ...
+    
+    def print_channel(
+        self,
+        channeluuid: Union[str, bytes],
+        outputfile: Union[str, bytes],
+        delimiter: Union[str, bytes] = b',',
+        chunk_size: int = 100000
+    ) -> None:
+        """
+        Export a single channel to a CSV file.
+        
+        Args:
+            channeluuid: UUID of the channel to export
+            outputfile: Path to output file
+            delimiter: Column delimiter character (default: comma)
+            chunk_size: Number of rows to process at once
+        """
+        ...
+    
+    def print_channels(
+        self,
+        outputdir: Union[str, bytes],
+        delimiter: Union[str, bytes] = b',',
+        chunk_size: int = 100000
+    ) -> None:
+        """
+        Export all channels to separate CSV files in a directory.
+        
+        Args:
+            outputdir: Directory path for output files
+            delimiter: Column delimiter character (default: comma)
+            chunk_size: Number of rows to process at once
+        """
+        ...
+    
+    def print_table(self, outputfile: Union[str, bytes]) -> None:
+        """
+        Export all channels with headers to a single formatted text file.
+        
+        Args:
+            outputfile: Path to output file
+        """
+        ...
diff --git a/python/py.typed b/python/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/python/setup.py b/python/setup.py
index 1d03fc6..badf881 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -20,5 +20,9 @@
 )
 
 setup(
-    ext_modules=cythonize(extension,language_level=3)
+    ext_modules=cythonize(extension,language_level=3),
+    package_data={
+        "": ["py.typed", "*.pyi"]
+    },
+    zip_safe=False
 )

From c2c9109761bd6231e55358c15abd94637406a47f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20G=C3=B6deke?= <jan.goedeke@pilatus-aircraft.com>
Date: Wed, 17 Dec 2025 09:52:07 +0100
Subject: [PATCH 12/12] chore: modernize Python packaging and CI workflows

- Migrate from setup.cfg to pyproject.toml with PEP 517/621 compliance
- Update to Python build tools (replace setup.py commands with python -m build)
- Upgrade all GitHub Actions to latest versions (@v4, ubuntu-latest)
- Remove outdated cibuildwheel version pinning
- Add numpy as explicit build and runtime dependency
- Bump package version to 3.0.0
- Improve test documentation with development install guidance
- Add Python version badge to README
- Standardize python3 usage across makefiles
---
 .github/workflows/pypi-deploy.yml | 35 ++++++++++----------
 .github/workflows/test.yml        |  7 ++--
 README.md                         | 12 ++++---
 makefile                          |  4 +--
 python/VERSION                    |  2 +-
 python/makefile                   | 21 +++++-------
 python/pyproject.toml             | 55 +++++++++++++++++++++++++++++--
 python/setup.cfg                  | 27 ---------------
 python/setup.py                   |  5 +--
 tests/README.md                   | 21 +++++++++++-
 10 files changed, 113 insertions(+), 76 deletions(-)
 delete mode 100644 python/setup.cfg

diff --git a/.github/workflows/pypi-deploy.yml b/.github/workflows/pypi-deploy.yml
index aa89ef9..db89645 100644
--- a/.github/workflows/pypi-deploy.yml
+++ b/.github/workflows/pypi-deploy.yml
@@ -13,14 +13,14 @@ jobs:
 
   build_setup:
     name: Prepare environment for wheel builds
-    runs-on: ubuntu-24.04
+    runs-on: ubuntu-latest
     needs: [test]
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Prepare wheel build
         run: make -C python/ setup
       - name: Store wheel configuration files
-        uses: actions/upload-artifact@v4.6.0
+        uses: actions/upload-artifact@v4
         with:
           name: wheel-config
           path: python/
@@ -36,12 +36,11 @@ jobs:
         os: [ubuntu-latest, windows-latest]
 
     steps:
-      - uses: actions/checkout@v2
-      - uses: actions/setup-python@v2
+      - uses: actions/checkout@v4
       - name: Install cibuildwheel
-        run: python -m pip install cibuildwheel==2.1.2
+        run: python -m pip install cibuildwheel
       - name: Get wheel configuration files
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v4
         with:
           name: wheel-config
           path: python/
@@ -49,29 +48,29 @@ jobs:
         run: python -m cibuildwheel --output-dir wheelhouse
         working-directory: python/
       - name: Store binary wheels
-        uses: actions/upload-artifact@v4.6.0
+        uses: actions/upload-artifact@v4
         with:
           name: binary-wheels-${{matrix.os}}-${{ strategy.job-index }}
           path: python/wheelhouse/*.whl
 
   build_sdist:
     name: Build source distribution
-    runs-on: ubuntu-24.04
+    runs-on: ubuntu-latest
     needs: [build_setup]
     steps:
-      - uses: actions/checkout@v2
-      - name: Install cython
-        run: python -m pip install cython==0.29.24
+      - uses: actions/checkout@v4
+      - name: Install build tools
+        run: python -m pip install build
       - name: Get wheel configuration files
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v4
         with:
           name: wheel-config
           path: python/
       - name: Build sdist
-        run: python setup.py sdist
+        run: python -m build --sdist
         working-directory: python/
       - name: Store source wheels
-        uses: actions/upload-artifact@v4.6.0
+        uses: actions/upload-artifact@v4
         with:
           name: source-wheels
           path: python/dist/*.tar.gz
@@ -81,17 +80,17 @@ jobs:
 
   upload_pypi:
     name: Upload wheels to PyPI
-    runs-on: ubuntu-24.04
+    runs-on: ubuntu-latest
     needs: [build_wheels, build_sdist]
 
     steps:
       - name: Get source wheels
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v4
         with:
           name: source-wheels
           path: dist/
       - name: Get binary wheels
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v4
         with:
           path: dist/
           pattern: binary-wheels-*
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e3b64e7..861d309 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -5,6 +5,7 @@ on:
     branches: [ master ]
   pull_request:
     branches: [ master ]
+  workflow_call:
 
 jobs:
   test:
@@ -23,13 +24,11 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
-    - name: Install dependencies
+    - name: Install test dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install pytest numpy cython setuptools wheel
+        pip install pytest
 
-    # Cross-platform build and test using Makefile
-    # Requires bash shell on Windows (Git Bash)
     - name: Build and Test
       shell: bash
       run: |
diff --git a/README.md b/README.md
index c239a09..883d57d 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@
 ![Tests](https://github.com/RecordEvolution/IMCtermite/actions/workflows/test.yml/badge.svg)
 ![CI Build Wheel](https://github.com/RecordEvolution/IMCtermite/actions/workflows/pypi-deploy.yml/badge.svg?branch=&event=push)
 [![PYPI](https://img.shields.io/pypi/v/IMCtermite.svg)](https://pypi.org/project/imctermite/)
+[![Python Version](https://img.shields.io/pypi/pyversions/imctermite)](https://pypi.org/project/imctermite/)
 
 # IMCtermite
 
@@ -151,10 +152,13 @@ python3 -m pip install imctermite
 ```
 
 which provides binary wheels for multiple architectures on _Windows_ and _Linux_
-and most _Python 3.x_ distributions. However, if your platform/architecture is
-not supported you can still compile the source distribution yourself, which
-requires _python3_setuptools_ and an up-to-date compiler supporting C++11
-standard (e.g. _gcc version >= 10.2.0_).
+and most _Python 3.x_ distributions. **Note:** Starting from version 3.0.0, 
+imctermite requires numpy as a dependency, which will be automatically 
+installed if not already present.
+
+However, if your platform/architecture is not supported you can still compile 
+the source distribution yourself, which requires _python3_setuptools_, _numpy_, 
+and an up-to-date compiler supporting C++11 standard (e.g. _gcc version >= 10.2.0_).
 
 ## Usage
 
diff --git a/makefile b/makefile
index f44495b..41580c5 100644
--- a/makefile
+++ b/makefile
@@ -87,7 +87,7 @@ docker-run:
 # python
 
 python-build: check-tags
-	make -C python/ build-inplace
+	make -C python/ build
 	cp python/imctermite*.so ./ -v 2>/dev/null || cp python/imctermite*.pyd ./ -v 2>/dev/null || true
 
 python-clean:
@@ -95,7 +95,7 @@ python-clean:
 	rm -vf imctermite*.so imctermite*.pyd
 
 python-test:
-	PYTHONPATH=./ python python/examples/usage.py
+	PYTHONPATH=./ python3 python/examples/usage.py
 
 #-----------------------------------------------------------------------------#
 # tests
diff --git a/python/VERSION b/python/VERSION
index d302656..4a36342 100644
--- a/python/VERSION
+++ b/python/VERSION
@@ -1 +1 @@
-2.1.18
+3.0.0
diff --git a/python/makefile b/python/makefile
index 43dd6ea..878a7b3 100644
--- a/python/makefile
+++ b/python/makefile
@@ -11,25 +11,20 @@ setup-clean:
 	rm -rf lib/
 
 build: setup
-	python setup.py build
-
-build-inplace: setup
-	python setup.py build_ext --inplace
+	python3 -m pip install -e .
 
 build-sdist: setup
-	python setup.py sdist
-	python -m twine check dist/*
+	python3 -m build --sdist
+	python3 -m twine check dist/*
 
 build-bdist: setup
-	python setup.py bdist
-	python -m twine check dist/*
+	python3 -m build --wheel
+	python3 -m twine check dist/*
 
 build-clean:
-	python setup.py clean --all
 	rm -vf imctermite*.so imctermite*.pyd imctermite*.cpp
-	rm -vf IMCtermite*.so IMCtermite*.pyd IMCtermite*.cpp
-	rm -rvf dist/ IMCtermite.egg-info/
 	rm -rvf dist/ imctermite.egg-info/
+	rm -rvf build/
 
 cibuildwheel-build: setup
 	cibuildwheel --platform linux
@@ -38,9 +33,9 @@ cibuildwheel-clean:
 	rm -rvf wheelhouse/
 
 pypi-upload:
-	python -m twine upload dist/$(shell ls -t dist/ | head -n1)
+	python3 -m twine upload dist/$(shell ls -t dist/ | head -n1)
 
 clean: setup build-clean cibuildwheel-clean setup-clean
 
 run-example:
-	PYTHONPATH=$(pwd) python examples/usage_files.py
+	PYTHONPATH=$(pwd) python3 examples/usage_files.py
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 0e657f5..39b64ac 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,6 +1,57 @@
 [build-system]
-requires = ["setuptools", "wheel","Cython"]
+requires = ["setuptools>=77.0.0", "wheel", "Cython", "numpy"]
 build-backend = "setuptools.build_meta"
 
+[project]
+name = "imctermite"
+description = "Enables extraction of measurement data from binary files with extension 'raw' used by proprietary software imcFAMOS and imcSTUDIO and facilitates its storage in open source file formats"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+authors = [
+  {name = "Record Evolution GmbH", email = "mario.fink@record-evolution.de"}
+]
+maintainers = [
+  {name = "Record Evolution GmbH"}
+]
+keywords = ["IMC", "raw", "imcFAMOS", "imcSTUDIO", "imcCRONOS"]
+classifiers = [
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Operating System :: OS Independent",
+  "Topic :: Scientific/Engineering",
+  "Topic :: Software Development :: Libraries :: Python Modules"
+]
+dependencies = [
+  "numpy>=1.26.0"
+]
+dynamic = ["version"]
+
+[project.optional-dependencies]
+test = ["pytest>=7.0.0"]
+
+[project.urls]
+Homepage = "https://github.com/RecordEvolution/IMCtermite.git"
+
+[tool.setuptools]
+# This is a single extension module build, not a package with subdirectories
+py-modules = []
+# Explicitly set packages to empty to prevent auto-discovery
+packages = []
+
+[tool.setuptools.dynamic]
+version = {file = "VERSION"}
+
+[tool.setuptools.package-data]
+"*" = ["py.typed", "*.pyi"]
+
 [tool.cibuildwheel]
-before-all = ""
+# Build for Python 3.10-3.13
+build = "cp310-* cp311-* cp312-* cp313-*"
+# Skip 32-bit builds and musllinux
+skip = "*-win32 *-manylinux_i686 *-musllinux_*"
+# Tests are already run in test.yml workflow before wheel building
+test-skip = "*"
diff --git a/python/setup.cfg b/python/setup.cfg
deleted file mode 100644
index 86528dc..0000000
--- a/python/setup.cfg
+++ /dev/null
@@ -1,27 +0,0 @@
-
-[metadata]
-name = imctermite
-description = Enables extraction of measurement data from binary files with extension 'raw' used by proprietary software imcFAMOS and imcSTUDIO and facilitates its storage in open source file formats
-long_description = file: README.md
-# long_description_content_type = text/x-rst
-long_description_content_type = text/markdown
-version = file: VERSION
-author = Record Evolution GmbH
-author_email = mario.fink@record-evolution.de
-maintainer = Record Evolution GmbH
-url= https://github.com/RecordEvolution/IMCtermite.git
-license = MIT License
-license_files = LICENSE
-keywords = IMC, raw, imcFAMOS, imcSTUDIO, imcCRONOS
-classifiers =
-  Programming Language :: Python :: 3
-  Programming Language :: Python :: 3.10
-  Programming Language :: Python :: 3.11
-  Programming Language :: Python :: 3.12
-  Programming Language :: Python :: 3.13
-  License :: OSI Approved :: MIT License
-  Operating System :: OS Independent
-  Topic :: Scientific/Engineering
-  Topic :: Software Development :: Libraries :: Python Modules
-
-[options]
diff --git a/python/setup.py b/python/setup.py
index badf881..afb021d 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -20,9 +20,6 @@
 )
 
 setup(
-    ext_modules=cythonize(extension,language_level=3),
-    package_data={
-        "": ["py.typed", "*.pyi"]
-    },
+    ext_modules=cythonize(extension, language_level=3),
     zip_safe=False
 )
diff --git a/tests/README.md b/tests/README.md
index aa8343c..715ea11 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -25,6 +25,25 @@ pytest tests/test_python.py
 
 ## Prerequisites
 
+### Recommended: Development install
+
+Install the package in editable mode with test dependencies (handles all requirements automatically):
+
+```bash
+pip install -e "python[test]"
+```
+
+Then run tests with pytest:
 ```bash
-pip install cython pytest setuptools
+pytest
 ```
+
+### Alternative: Using makefile
+
+If you prefer `make test`, just install pytest first:
+
+```bash
+pip install pytest
+make test
+```
+