From 9baaa044078d7929237f3111e8055ef85f31c949 Mon Sep 17 00:00:00 2001 From: Peter Somhorst Date: Mon, 13 Apr 2026 12:49:24 +0200 Subject: [PATCH] Set path in loaded data to PurePath When data objects are pickled and then unpickled on a different system, Paths can lead to errors, because PosixPaths can't be created non Windows machines, and WindowsPaths can't be created on Posix machines. This commit replaces Paths in EITData with PurePaths. These are either PurePosixPaths or PureWindowsPaths, both of which can be handled on all machines. --- eitprocessing/datahandling/eitdata.py | 16 ++++++++-------- eitprocessing/datahandling/loading/__init__.py | 6 +++--- eitprocessing/datahandling/loading/draeger.py | 9 ++++----- eitprocessing/datahandling/loading/sentec.py | 7 ++++--- eitprocessing/datahandling/loading/timpel.py | 7 ++++--- tests/eitdata/test_loading_draeger.py | 8 ++++++-- tests/eitdata/test_loading_sentec.py | 12 +++++++++++- tests/eitdata/test_loading_timpel.py | 5 +++++ 8 files changed, 45 insertions(+), 25 deletions(-) diff --git a/eitprocessing/datahandling/eitdata.py b/eitprocessing/datahandling/eitdata.py index ea50d9bf0..c0e6c4dfc 100644 --- a/eitprocessing/datahandling/eitdata.py +++ b/eitprocessing/datahandling/eitdata.py @@ -3,7 +3,7 @@ import warnings from dataclasses import InitVar, dataclass, field from enum import Enum -from pathlib import Path +from pathlib import PurePath from typing import TYPE_CHECKING, Any, TypeVar import numpy as np @@ -40,7 +40,7 @@ class is meant to hold data from (part of) a singular continuous measurement. pixel_impedance: Impedance values for each pixel at each frame. """ # TODO: fix docstring - path: str | Path | list[Path | str] = field(compare=False, repr=False) + path: str | PurePath | list[PurePath | str] = field(compare=False, repr=False) nframes: int = field(repr=False) time: np.ndarray = field(repr=False) sample_frequency: float = field(metadata={"check_equivalence": True}, repr=False) @@ -94,16 +94,16 @@ def framerate(self) -> float: @staticmethod def ensure_path_list( - path: str | Path | list[str | Path], - ) -> list[Path]: + path: str | PurePath | list[str | PurePath], + ) -> list[PurePath]: """Return the path or paths as a list. - The path of any EITData object can be a single str/Path or a list of str/Path objects. This method returns a - list of Path objects given either a str/Path or list of str/Paths. + The path of any EITData object can be a single str/PurePath or a list of str/PurePath objects. This method + returns a list of PurePath objects given either a str/PurePath or list of str/PurePaths. """ if isinstance(path, list): - return [Path(p) for p in path] - return [Path(path)] + return [PurePath(p) for p in path] + return [PurePath(path)] def __add__(self: Self, other: Self) -> Self: return self.concatenate(other) diff --git a/eitprocessing/datahandling/loading/__init__.py b/eitprocessing/datahandling/loading/__init__.py index b69180f54..be5b93224 100644 --- a/eitprocessing/datahandling/loading/__init__.py +++ b/eitprocessing/datahandling/loading/__init__.py @@ -1,5 +1,5 @@ from functools import reduce -from pathlib import Path +from pathlib import Path, PurePath from eitprocessing.datahandling.datacollection import DataCollection from eitprocessing.datahandling.eitdata import EITData, Vendor @@ -7,7 +7,7 @@ def load_eit_data( - path: str | Path | list[str | Path], + path: str | PurePath | list[str | PurePath], vendor: Vendor | str, label: str | None = None, name: str | None = None, @@ -78,7 +78,7 @@ def load_eit_data( interval_datasets: list[DataCollection] = [] for single_path in paths: - single_path.resolve(strict=True) # raise error if any file does not exist + Path(single_path).resolve(strict=True) # raise error if any file does not exist for single_path in paths: loaded_data = load_from_single_path( diff --git a/eitprocessing/datahandling/loading/draeger.py b/eitprocessing/datahandling/loading/draeger.py index 5fb048fd8..93b0a7d51 100644 --- a/eitprocessing/datahandling/loading/draeger.py +++ b/eitprocessing/datahandling/loading/draeger.py @@ -5,6 +5,7 @@ import sys import warnings from functools import partial +from pathlib import Path, PurePath from typing import TYPE_CHECKING, NamedTuple from warnings import catch_warnings @@ -21,8 +22,6 @@ from eitprocessing.datahandling.sparsedata import SparseData if TYPE_CHECKING: - from pathlib import Path - from numpy.typing import NDArray load_draeger_data = partial(load_eit_data, vendor=Vendor.DRAEGER) @@ -31,13 +30,13 @@ def load_from_single_path( # noqa: PLR0915 - path: Path, + path: PurePath, sample_frequency: float | None = None, first_frame: int = 0, max_frames: int | None = None, ) -> dict[str, DataCollection]: """Load Dräger EIT data from path.""" - file_size = path.stat().st_size + file_size = Path(path).stat().st_size frame_size: int medibus_fields: list @@ -88,7 +87,7 @@ def load_from_single_path( # noqa: PLR0915 phases: list[tuple[float, int]] = [] medibus_data = np.zeros((len(medibus_fields), n_frames), dtype=np.float32) - with path.open("br") as fo, mmap.mmap(fo.fileno(), length=0, access=mmap.ACCESS_READ) as fh: + with Path(path).open("br") as fo, mmap.mmap(fo.fileno(), length=0, access=mmap.ACCESS_READ) as fh: fh.seek(first_frame_to_load * frame_size) reader = BinReader(fh) previous_marker = None diff --git a/eitprocessing/datahandling/loading/sentec.py b/eitprocessing/datahandling/loading/sentec.py index 5a252eda2..bedb5f373 100644 --- a/eitprocessing/datahandling/loading/sentec.py +++ b/eitprocessing/datahandling/loading/sentec.py @@ -5,6 +5,7 @@ import warnings from enum import IntEnum from functools import partial +from pathlib import Path from typing import TYPE_CHECKING import numpy as np @@ -18,7 +19,7 @@ from eitprocessing.datahandling.sparsedata import SparseData if TYPE_CHECKING: - from pathlib import Path + from pathlib import PurePath from numpy.typing import NDArray @@ -28,7 +29,7 @@ def load_from_single_path( - path: Path, + path: PurePath, sample_frequency: float | None = None, first_frame: int = 0, max_frames: int | None = None, @@ -36,7 +37,7 @@ def load_from_single_path( """Load Sentec EIT data from path.""" time: list[float] = [] index = 0 - with path.open("br") as fo, mmap.mmap(fo.fileno(), length=0, access=mmap.ACCESS_READ) as fh: + with Path(path).open("br") as fo, mmap.mmap(fo.fileno(), length=0, access=mmap.ACCESS_READ) as fh: file_length = os.fstat(fo.fileno()).st_size reader = BinReader(fh, endian="little") version = reader.uint8() diff --git a/eitprocessing/datahandling/loading/timpel.py b/eitprocessing/datahandling/loading/timpel.py index 2a93107f7..ea70e585a 100644 --- a/eitprocessing/datahandling/loading/timpel.py +++ b/eitprocessing/datahandling/loading/timpel.py @@ -2,6 +2,7 @@ import warnings from functools import partial +from pathlib import Path from typing import TYPE_CHECKING import numpy as np @@ -15,7 +16,7 @@ from eitprocessing.datahandling.sparsedata import SparseData if TYPE_CHECKING: - from pathlib import Path + from pathlib import PurePath from numpy.typing import NDArray @@ -30,7 +31,7 @@ def load_from_single_path( - path: Path, + path: PurePath, sample_frequency: float | None = TIMPEL_SAMPLE_FREQUENCY, first_frame: int = 0, max_frames: int | None = None, @@ -41,7 +42,7 @@ def load_from_single_path( try: data: NDArray = np.loadtxt( - str(path), + str(Path(path)), dtype=float, delimiter=",", skiprows=first_frame, diff --git a/tests/eitdata/test_loading_draeger.py b/tests/eitdata/test_loading_draeger.py index 1bea8d9e7..420debd69 100644 --- a/tests/eitdata/test_loading_draeger.py +++ b/tests/eitdata/test_loading_draeger.py @@ -1,6 +1,6 @@ import sys import tempfile -from pathlib import Path +from pathlib import Path, PurePath import numpy as np import pytest @@ -80,6 +80,10 @@ def test_load_draeger( "Loading without sample frequency should yield the same data" ) + path = sequence.eit_data["raw"].path + assert isinstance(path, PurePath), "EITData path should be a PurePath object" + assert not isinstance(path, Path), "EITData path should be a PurePath object" + def test_draeger_20hz_healthy_volunteer_2_differ( draeger_20hz_healthy_volunteer: Sequence, draeger_20hz_healthy_volunteer_fixed_rr: Sequence @@ -187,7 +191,7 @@ def test_event_on_first_frame(draeger_20hz_healthy_volunteer: Sequence): with tempfile.NamedTemporaryFile(**kwargs) as temporary_file: # Create a temporary file, that is removed after the context manager is closed tempfile_path = Path(temporary_file.name) - with draeger_20hz_healthy_volunteer.eit_data["raw"].path.open("rb") as original_file: + with Path(draeger_20hz_healthy_volunteer.eit_data["raw"].path).open("rb") as original_file: original_file.seek(ignore_bytes) # skip frames before the event temporary_file.write(original_file.read()) # write remaining data to temp file diff --git a/tests/eitdata/test_loading_sentec.py b/tests/eitdata/test_loading_sentec.py index 73f8bb244..301ebfcc6 100644 --- a/tests/eitdata/test_loading_sentec.py +++ b/tests/eitdata/test_loading_sentec.py @@ -1,4 +1,4 @@ -from pathlib import Path +from pathlib import Path, PurePath import numpy as np import pytest @@ -55,6 +55,9 @@ def test_load_sentec_single_file( label="something else", ), "Loading with a different label should yield same data" + assert isinstance(sequence.eit_data["raw"].path, PurePath), "EITData path should be a PurePath object" + assert not isinstance(sequence.eit_data["raw"].path, Path), "EITData path should be a PurePath object" + @pytest.mark.parametrize( ("sequence_a_fixture_name", "sequence_b_fixture_name", "sequence_merge_fixture_name"), @@ -95,6 +98,13 @@ def test_load_sentec_multiple_files( "Second part of merged sequence should match second individual sequence" ) + assert all(isinstance(path_, PurePath) for path_ in sequence_merged.eit_data["raw"].path), ( + "EITData path should be a PurePath object" + ) + assert not any(isinstance(path_, Path) for path_ in sequence_merged.eit_data["raw"].path), ( + "EITData path should not be a Path object" + ) + def test_load_sentec_skip_frames(sentec_healthy_volunteer_1a: Sequence, sentec_healthy_volunteer_1a_path: Path): n_frames = len(sentec_healthy_volunteer_1a) diff --git a/tests/eitdata/test_loading_timpel.py b/tests/eitdata/test_loading_timpel.py index 08da4d1bb..7b797edaa 100644 --- a/tests/eitdata/test_loading_timpel.py +++ b/tests/eitdata/test_loading_timpel.py @@ -1,3 +1,5 @@ +from pathlib import Path, PurePath + import pytest from eitprocessing.datahandling.eitdata import EITData, Vendor @@ -25,6 +27,9 @@ def test_loading_timpel( loaded_using_enum_vendor = load_eit_data(sequence.eit_data["raw"].path, vendor=Vendor.TIMPEL, label="timpel") assert sequence == loaded_using_enum_vendor + assert isinstance(sequence.eit_data["raw"].path, PurePath), "EITData path should be a PurePath object" + assert not isinstance(sequence.eit_data["raw"].path, Path), "EITData path should be a PurePath object" + def test_loading_timpel_multiple_files(): # TODO: find out whether it is possible to have a single measurements split into multiple files