diff --git a/exercises.py b/exercises.py index adccf03c..cf6d9d18 100644 --- a/exercises.py +++ b/exercises.py @@ -45,3 +45,13 @@ def entrypoint_regexp_perf(): input = '0' + ' ' * 2**10 + '0' # end warmup re.match(importlib_metadata.EntryPoint.pattern, input) + + +def normalize_perf(): + # python/cpython#143658 + import importlib_metadata # end warmup + + # operation completes in < 1ms, so repeat it to get visibility + # https://github.com/jaraco/pytest-perf/issues/12 + for _ in range(1000): + importlib_metadata.Prepared.normalize('sample') diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index cdfc1f62..88d65d5d 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -636,7 +636,8 @@ def _read_files_egginfo_installed(self): return paths = ( - py311.relative_fix((subdir / name).resolve()) + py311 + .relative_fix((subdir / name).resolve()) .relative_to(self.locate_file('').resolve(), walk_up=True) .as_posix() for name in text.splitlines() @@ -928,8 +929,15 @@ def __init__(self, name: str | None): def normalize(name): """ PEP 503 normalization plus dashes as underscores. + + Specifically avoids ``re.sub`` as prescribed for performance + benefits (see python/cpython#143658). """ - return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') + value = name.lower().replace("-", "_").replace(".", "_") + # Condense repeats + while "__" in value: + value = value.replace("__", "_") + return value @staticmethod def legacy_normalize(name): diff --git a/tests/test_api.py b/tests/test_api.py index c36f93e0..3dbed628 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -6,6 +6,7 @@ from importlib_metadata import ( Distribution, PackageNotFoundError, + Prepared, distribution, entry_points, files, @@ -317,3 +318,34 @@ class InvalidateCache(unittest.TestCase): def test_invalidate_cache(self): # No externally observable behavior, but ensures test coverage... importlib.invalidate_caches() + + +class PreparedTests(unittest.TestCase): + @fixtures.parameterize( + # Simple + dict(input='sample', expected='sample'), + # Mixed case + dict(input='Sample', expected='sample'), + dict(input='SAMPLE', expected='sample'), + dict(input='SaMpLe', expected='sample'), + # Separator conversions + dict(input='sample-pkg', expected='sample_pkg'), + dict(input='sample.pkg', expected='sample_pkg'), + dict(input='sample_pkg', expected='sample_pkg'), + # Multiple separators + dict(input='sample---pkg', expected='sample_pkg'), + dict(input='sample___pkg', expected='sample_pkg'), + dict(input='sample...pkg', expected='sample_pkg'), + # Mixed separators + dict(input='sample-._pkg', expected='sample_pkg'), + dict(input='sample_.-pkg', expected='sample_pkg'), + # Complex + dict(input='Sample__Pkg-name.foo', expected='sample_pkg_name_foo'), + dict(input='Sample__Pkg.name__foo', expected='sample_pkg_name_foo'), + # Uppercase with separators + dict(input='SAMPLE-PKG', expected='sample_pkg'), + dict(input='Sample.Pkg', expected='sample_pkg'), + dict(input='SAMPLE_PKG', expected='sample_pkg'), + ) + def test_normalize(self, input, expected): + self.assertEqual(Prepared.normalize(input), expected)