diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 03031190..508b02e4 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -35,7 +35,7 @@ NullFinder, install, ) -from ._functools import method_cache, pass_none +from ._functools import method_cache, noop, pass_none, passthrough from ._itertools import always_iterable, bucket, unique_everseen from ._meta import PackageMetadata, SimplePath from ._typing import md_none @@ -787,6 +787,20 @@ def find_distributions(self, context=Context()) -> Iterable[Distribution]: """ +@passthrough +def _clear_after_fork(cached): + """Ensure ``func`` clears cached state after ``fork`` when supported. + + ``FastPath`` caches zip-backed ``pathlib.Path`` objects that retain a + reference to the parent's open ``ZipFile`` handle. Re-using a cached + instance in a forked child can therefore resurrect invalid file pointers + and trigger ``BadZipFile``/``OSError`` failures (python/importlib_metadata#520). + Registering ``cache_clear`` with ``os.register_at_fork`` keeps each process + on its own cache. + """ + getattr(os, 'register_at_fork', noop)(after_in_child=cached.cache_clear) + + class FastPath: """ Micro-optimized class for searching a root for children. @@ -803,7 +817,8 @@ class FastPath: True """ - @functools.lru_cache() # type: ignore[misc] + @_clear_after_fork # type: ignore[misc] + @functools.lru_cache() def __new__(cls, root): return super().__new__(cls) diff --git a/importlib_metadata/_functools.py b/importlib_metadata/_functools.py index 5dda6a21..b1fd04a8 100644 --- a/importlib_metadata/_functools.py +++ b/importlib_metadata/_functools.py @@ -1,5 +1,6 @@ import functools import types +from typing import Callable, TypeVar # from jaraco.functools 3.3 @@ -102,3 +103,33 @@ def wrapper(param, *args, **kwargs): return func(param, *args, **kwargs) return wrapper + + +# From jaraco.functools 4.4 +def noop(*args, **kwargs): + """ + A no-operation function that does nothing. + + >>> noop(1, 2, three=3) + """ + + +_T = TypeVar('_T') + + +# From jaraco.functools 4.4 +def passthrough(func: Callable[..., object]) -> Callable[[_T], _T]: + """ + Wrap the function to always return the first parameter. + + >>> passthrough(print)('3') + 3 + '3' + """ + + @functools.wraps(func) + def wrapper(first: _T, *args, **kwargs) -> _T: + func(first, *args, **kwargs) + return first + + return wrapper # type: ignore[return-value] diff --git a/newsfragments/520.bugfix.rst b/newsfragments/520.bugfix.rst new file mode 100644 index 00000000..1fbe7cec --- /dev/null +++ b/newsfragments/520.bugfix.rst @@ -0,0 +1 @@ +Fixed errors in FastPath under fork-multiprocessing. diff --git a/tests/test_zip.py b/tests/test_zip.py index d4f8e2f0..165aa6dd 100644 --- a/tests/test_zip.py +++ b/tests/test_zip.py @@ -1,7 +1,10 @@ +import multiprocessing +import os import sys import unittest from importlib_metadata import ( + FastPath, PackageNotFoundError, distribution, distributions, @@ -47,6 +50,37 @@ def test_one_distribution(self): dists = list(distributions(path=sys.path[:1])) assert len(dists) == 1 + @unittest.skipUnless( + hasattr(os, 'register_at_fork') + and 'fork' in multiprocessing.get_all_start_methods(), + 'requires fork-based multiprocessing support', + ) + def test_fastpath_cache_cleared_in_forked_child(self): + zip_path = sys.path[0] + + FastPath(zip_path) + assert FastPath.__new__.cache_info().currsize >= 1 + + ctx = multiprocessing.get_context('fork') + parent_conn, child_conn = ctx.Pipe() + + def child(conn, root): + try: + before = FastPath.__new__.cache_info().currsize + FastPath(root) + after = FastPath.__new__.cache_info().currsize + conn.send((before, after)) + finally: + conn.close() + + proc = ctx.Process(target=child, args=(child_conn, zip_path)) + proc.start() + child_conn.close() + cache_sizes = parent_conn.recv() + proc.join() + + self.assertEqual(cache_sizes, (0, 1)) + class TestEgg(TestZip): def setUp(self):