diff --git a/.clang-format b/.clang-format index fbbb227e..911ba34d 100644 --- a/.clang-format +++ b/.clang-format @@ -26,3 +26,4 @@ BraceWrapping: AllowAllConstructorInitializersOnNextLine: true ConstructorInitializerAllOnOneLineOrOnePerLine: true AllowShortCaseLabelsOnASingleLine: true +SortIncludes: false diff --git a/.github/workflows/branchbuild.yml b/.github/workflows/branchbuild.yml index 41bc9b13..fe738102 100644 --- a/.github/workflows/branchbuild.yml +++ b/.github/workflows/branchbuild.yml @@ -27,8 +27,8 @@ jobs: - name: Generate cython run: | - chmod +x ./src/rapidfuzz/generate.sh - ./src/rapidfuzz/generate.sh + chmod +x ./tools/generate_cython.sh + ./tools/generate_cython.sh - name: build run: | @@ -60,7 +60,7 @@ jobs: run: | git clone https://github.com/rapidfuzz/rapidfuzz-cpp.git cd rapidfuzz-cpp - git checkout v3.1.1 + git checkout v3.2.0 mkdir build && cd build cmake .. -DCMAKE_BUILD_TYPE=Release cmake --build . @@ -104,7 +104,7 @@ jobs: run: | git clone https://github.com/rapidfuzz/rapidfuzz-cpp.git cd rapidfuzz-cpp - git checkout v3.1.1 + git checkout v3.2.0 mkdir build && cd build cmake .. -DCMAKE_BUILD_TYPE=Release cmake --build . @@ -180,6 +180,16 @@ jobs: run: | python -m PyInstaller.utils.run_tests --include_only rapidfuzz. + - name: test cx_freeze packaging + if: matrix.python-version != '3.13' + run: | + pip install cx_freeze + cd tests/freezeTools + cxfreeze --script script.py --target-dir cxfreezeDist + ls + ./cxFreezeDist/script + + # validate sse2 code on gcc sde_tests: name: "Test sse2 code on Linux (gcc)" @@ -205,7 +215,7 @@ jobs: - name: Install testing dependencies run: | python -m pip install --upgrade pip - pip install pytest hypothesis pandas mypy pyinstaller[hook_testing] + pip install pytest hypothesis pandas mypy - name: Test on nehalem which does not support avx2 run: ./sde/sde -nhm -- pytest tests diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index ac76504c..abf6951a 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -24,8 +24,8 @@ jobs: - name: Generate cython run: | - chmod +x ./src/rapidfuzz/generate.sh - ./src/rapidfuzz/generate.sh + chmod +x ./tools/generate_cython.sh + ./tools/generate_cython.sh # for cython tests inplace installation is required - name: build diff --git a/.github/workflows/releasebuild.yml b/.github/workflows/releasebuild.yml index 72973817..081e76c5 100644 --- a/.github/workflows/releasebuild.yml +++ b/.github/workflows/releasebuild.yml @@ -27,8 +27,8 @@ jobs: # The cythonized files allow installation from the sdist without cython - name: Generate cython run: | - chmod +x ./src/rapidfuzz/generate.sh - ./src/rapidfuzz/generate.sh + chmod +x ./tools/generate_cython.sh + ./tools/generate_cython.sh - name: Build sdist run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 180efedb..f0b1f29e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,6 +12,11 @@ # # See https://github.com/pre-commit/pre-commit +exclude: | + (?x)( + tools/sdist.patch + ) + repos: # Standard hooks - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c5401e05..5c17ecb4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -27,7 +27,7 @@ Added Fixed ~~~~~ - drop support for Python 3.8 -- switch build system to `scikit-build-core` +- switch build system to ``scikit-build-core`` [3.9.7] - 2024-09-02 ^^^^^^^^^^^^^^^^^^^^ @@ -47,7 +47,7 @@ Changed Fixed ~~~~~ * include simd binaries in pyinstaller builds -* fix builds with setuptools 72 by upgrading `scikit-build` +* fix builds with setuptools 72 by upgrading ``scikit-build`` [3.9.4] - 2024-07-02 ^^^^^^^^^^^^^^^^^^^^ diff --git a/CMakeLists.txt b/CMakeLists.txt index 11f53f73..a24fbfcb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,8 +30,8 @@ endif() if(MSVC) add_compile_options(/W4 /bigobj /wd4127) - # NOTE: _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR is temporary. When building on - # VS 2022 17.10 or newer, but using an older runtime, mutexes can crash + # NOTE: _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR is temporary. When building on VS + # 2022 17.10 or newer, but using an older runtime, mutexes can crash add_compile_options(/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR) else() add_compile_options(-Wall -Wextra -pedantic -Wno-psabi) diff --git a/pyproject.toml b/pyproject.toml index be970482..3ffbf330 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,8 +54,7 @@ wheel.exclude = [ "**.cpp", "**.hpp", "**.h", - "CMakeLists.txt", - "generate.sh" + "CMakeLists.txt" ] wheel.packages = ["src/rapidfuzz"] wheel.cmake = false @@ -162,7 +161,6 @@ select = [ extend-ignore = [ "PLR", # Design related pylint codes "E501", # Line too long - "PT004", # Use underscore for non-returning fixture (use usefixture instead) "PTH123", # use pathlib instead of builtin open ] unfixable = [ @@ -171,6 +169,7 @@ unfixable = [ ] flake8-unused-arguments.ignore-variadic-names = true isort.required-imports = ["from __future__ import annotations"] +isort.combine-as-imports = true [tool.ruff.lint.per-file-ignores] "tests/**" = ["T20"] diff --git a/src/rapidfuzz/CMakeLists.txt b/src/rapidfuzz/CMakeLists.txt index a4bfbba3..c8cdfd69 100644 --- a/src/rapidfuzz/CMakeLists.txt +++ b/src/rapidfuzz/CMakeLists.txt @@ -10,8 +10,8 @@ function(create_cython_target _name) VERBATIM COMMAND Python::Interpreter -m cython "${CMAKE_CURRENT_LIST_DIR}/${_name}.pyx" - --cplus -I "${CMAKE_CURRENT_LIST_DIR}" - --output-file "${CMAKE_CURRENT_BINARY_DIR}/${_name}.cxx") + --cplus -I "${CMAKE_CURRENT_LIST_DIR}" --output-file + "${CMAKE_CURRENT_BINARY_DIR}/${_name}.cxx") set(${_name} ${CMAKE_CURRENT_BINARY_DIR}/${_name}.cxx diff --git a/src/rapidfuzz/__init__.pyi b/src/rapidfuzz/__init__.pyi index f201a976..ffd9870f 100644 --- a/src/rapidfuzz/__init__.pyi +++ b/src/rapidfuzz/__init__.pyi @@ -4,7 +4,9 @@ __author__: str __license__: str __version__: str -from rapidfuzz import distance as distance -from rapidfuzz import fuzz as fuzz -from rapidfuzz import process as process -from rapidfuzz import utils as utils +from rapidfuzz import ( + distance as distance, + fuzz as fuzz, + process as process, + utils as utils, +) diff --git a/src/rapidfuzz/__pyinstaller/hook-rapidfuzz.py b/src/rapidfuzz/__pyinstaller/hook-rapidfuzz.py index 422a78d3..0a54e673 100644 --- a/src/rapidfuzz/__pyinstaller/hook-rapidfuzz.py +++ b/src/rapidfuzz/__pyinstaller/hook-rapidfuzz.py @@ -5,6 +5,7 @@ def filterUnneededImports(name): + return False if "__pyinstaller" in name: return False diff --git a/src/rapidfuzz/_common_py.py b/src/rapidfuzz/_common_py.py index 72db5562..5e0d68a3 100644 --- a/src/rapidfuzz/_common_py.py +++ b/src/rapidfuzz/_common_py.py @@ -4,7 +4,7 @@ from __future__ import annotations from array import array -from typing import Hashable, Sequence +from collections.abc import Hashable, Sequence def conv_sequence(s: Sequence[Hashable]) -> Sequence[Hashable]: diff --git a/src/rapidfuzz/_utils.py b/src/rapidfuzz/_utils.py index 22acf7e9..d9ced40a 100644 --- a/src/rapidfuzz/_utils.py +++ b/src/rapidfuzz/_utils.py @@ -3,14 +3,10 @@ from __future__ import annotations -import importlib -import os import sys from math import isnan from typing import Any, Callable -from rapidfuzz._feature_detector import AVX2, SSE2, supports - pandas_NA = None @@ -80,66 +76,6 @@ def add_scorer_attrs(func: Any, cached_scorer_call: dict[str, Callable[..., dict func._RF_OriginalScorer = func -def optional_import_module(module: str) -> Any: - """ - try to import module. Return None on failure - """ - try: - return importlib.import_module(module) - except Exception: - return None - - -def vectorized_import(name: str) -> tuple[Any, list[Any]]: - """ - import module best fitting for current CPU - """ - if supports(AVX2): - module = optional_import_module(name + "_avx2") - if module is not None: - return module - if supports(SSE2): - module = optional_import_module(name + "_sse2") - if module is not None: - return module - - return importlib.import_module(name) - - -def fallback_import( - module: str, - name: str, -) -> Any: - """ - import library function and possibly fall back to a pure Python version - when no C++ implementation is available - """ - impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") - - py_mod = importlib.import_module(module + "_py") - py_func = getattr(py_mod, name) - if not py_func: - msg = f"cannot import name {name!r} from {py_mod.__name!r} ({py_mod.__file__})" - raise ImportError(msg) - - if impl == "cpp": - cpp_mod = vectorized_import(module + "_cpp") - elif impl == "python": - return py_func - else: - try: - cpp_mod = vectorized_import(module + "_cpp") - except Exception: - return py_func - - cpp_func = getattr(cpp_mod, name) - if not cpp_func: - msg = f"cannot import name {name!r} from {cpp_mod.__name!r} ({cpp_mod.__file__})" - raise ImportError(msg) - - return cpp_func - - default_distance_attribute: dict[str, Callable[..., dict[str, Any]]] = {"get_scorer_flags": _get_scorer_flags_distance} default_similarity_attribute: dict[str, Callable[..., dict[str, Any]]] = { "get_scorer_flags": _get_scorer_flags_similarity diff --git a/src/rapidfuzz/distance/CMakeLists.txt b/src/rapidfuzz/distance/CMakeLists.txt index 59f7cbc7..1281cd63 100644 --- a/src/rapidfuzz/distance/CMakeLists.txt +++ b/src/rapidfuzz/distance/CMakeLists.txt @@ -10,8 +10,8 @@ function(create_cython_target _name) VERBATIM COMMAND Python::Interpreter -m cython "${CMAKE_CURRENT_LIST_DIR}/${_name}.pyx" - --cplus -I "${CMAKE_CURRENT_LIST_DIR}/.." - --output-file "${CMAKE_CURRENT_BINARY_DIR}/${_name}.cxx") + --cplus -I "${CMAKE_CURRENT_LIST_DIR}/.." --output-file + "${CMAKE_CURRENT_BINARY_DIR}/${_name}.cxx") set(${_name} ${CMAKE_CURRENT_BINARY_DIR}/${_name}.cxx diff --git a/src/rapidfuzz/distance/DamerauLevenshtein.py b/src/rapidfuzz/distance/DamerauLevenshtein.py index a0fdd874..e093fddd 100644 --- a/src/rapidfuzz/distance/DamerauLevenshtein.py +++ b/src/rapidfuzz/distance/DamerauLevenshtein.py @@ -1,11 +1,92 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os -_mod = "rapidfuzz.distance.metrics" -distance = _fallback_import(_mod, "damerau_levenshtein_distance") -similarity = _fallback_import(_mod, "damerau_levenshtein_similarity") -normalized_distance = _fallback_import(_mod, "damerau_levenshtein_normalized_distance") -normalized_similarity = _fallback_import(_mod, "damerau_levenshtein_normalized_similarity") +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = ["distance", "normalized_distance", "normalized_similarity", "similarity"] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + damerau_levenshtein_distance as distance, + damerau_levenshtein_normalized_distance as normalized_distance, + damerau_levenshtein_normalized_similarity as normalized_similarity, + damerau_levenshtein_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + damerau_levenshtein_distance as distance, + damerau_levenshtein_normalized_distance as normalized_distance, + damerau_levenshtein_normalized_similarity as normalized_similarity, + damerau_levenshtein_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + damerau_levenshtein_distance as distance, + damerau_levenshtein_normalized_distance as normalized_distance, + damerau_levenshtein_normalized_similarity as normalized_similarity, + damerau_levenshtein_similarity as similarity, + ) +elif _impl == "python": + from rapidfuzz.distance.metrics_py import ( + damerau_levenshtein_distance as distance, + damerau_levenshtein_normalized_distance as normalized_distance, + damerau_levenshtein_normalized_similarity as normalized_similarity, + damerau_levenshtein_similarity as similarity, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + damerau_levenshtein_distance as distance, + damerau_levenshtein_normalized_distance as normalized_distance, + damerau_levenshtein_normalized_similarity as normalized_similarity, + damerau_levenshtein_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + damerau_levenshtein_distance as distance, + damerau_levenshtein_normalized_distance as normalized_distance, + damerau_levenshtein_normalized_similarity as normalized_similarity, + damerau_levenshtein_similarity as similarity, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + damerau_levenshtein_distance as distance, + damerau_levenshtein_normalized_distance as normalized_distance, + damerau_levenshtein_normalized_similarity as normalized_similarity, + damerau_levenshtein_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_py import ( + damerau_levenshtein_distance as distance, + damerau_levenshtein_normalized_distance as normalized_distance, + damerau_levenshtein_normalized_similarity as normalized_similarity, + damerau_levenshtein_similarity as similarity, + ) diff --git a/src/rapidfuzz/distance/DamerauLevenshtein.pyi b/src/rapidfuzz/distance/DamerauLevenshtein.pyi index 6748c19b..db0de625 100644 --- a/src/rapidfuzz/distance/DamerauLevenshtein.pyi +++ b/src/rapidfuzz/distance/DamerauLevenshtein.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload _UnprocessedType1 = TypeVar("_UnprocessedType1") _UnprocessedType2 = TypeVar("_UnprocessedType2") diff --git a/src/rapidfuzz/distance/Hamming.py b/src/rapidfuzz/distance/Hamming.py index 912847d9..b999b0b8 100644 --- a/src/rapidfuzz/distance/Hamming.py +++ b/src/rapidfuzz/distance/Hamming.py @@ -1,13 +1,115 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os -_mod = "rapidfuzz.distance.metrics" -distance = _fallback_import(_mod, "hamming_distance") -similarity = _fallback_import(_mod, "hamming_similarity") -normalized_similarity = _fallback_import(_mod, "hamming_normalized_similarity") -normalized_distance = _fallback_import(_mod, "hamming_normalized_distance") -editops = _fallback_import(_mod, "hamming_editops") -opcodes = _fallback_import(_mod, "hamming_opcodes") +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = [ + "distance", + "editops", + "normalized_distance", + "normalized_similarity", + "opcodes", + "similarity", +] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + hamming_distance as distance, + hamming_editops as editops, + hamming_normalized_distance as normalized_distance, + hamming_normalized_similarity as normalized_similarity, + hamming_opcodes as opcodes, + hamming_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + hamming_distance as distance, + hamming_editops as editops, + hamming_normalized_distance as normalized_distance, + hamming_normalized_similarity as normalized_similarity, + hamming_opcodes as opcodes, + hamming_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + hamming_distance as distance, + hamming_editops as editops, + hamming_normalized_distance as normalized_distance, + hamming_normalized_similarity as normalized_similarity, + hamming_opcodes as opcodes, + hamming_similarity as similarity, + ) +elif _impl == "python": + from rapidfuzz.distance.metrics_py import ( + hamming_distance as distance, + hamming_editops as editops, + hamming_normalized_distance as normalized_distance, + hamming_normalized_similarity as normalized_similarity, + hamming_opcodes as opcodes, + hamming_similarity as similarity, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + hamming_distance as distance, + hamming_editops as editops, + hamming_normalized_distance as normalized_distance, + hamming_normalized_similarity as normalized_similarity, + hamming_opcodes as opcodes, + hamming_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + hamming_distance as distance, + hamming_editops as editops, + hamming_normalized_distance as normalized_distance, + hamming_normalized_similarity as normalized_similarity, + hamming_opcodes as opcodes, + hamming_similarity as similarity, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + hamming_distance as distance, + hamming_editops as editops, + hamming_normalized_distance as normalized_distance, + hamming_normalized_similarity as normalized_similarity, + hamming_opcodes as opcodes, + hamming_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_py import ( + hamming_distance as distance, + hamming_editops as editops, + hamming_normalized_distance as normalized_distance, + hamming_normalized_similarity as normalized_similarity, + hamming_opcodes as opcodes, + hamming_similarity as similarity, + ) diff --git a/src/rapidfuzz/distance/Hamming.pyi b/src/rapidfuzz/distance/Hamming.pyi index 733003fe..4a06b371 100644 --- a/src/rapidfuzz/distance/Hamming.pyi +++ b/src/rapidfuzz/distance/Hamming.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload from rapidfuzz.distance import Editops, Opcodes diff --git a/src/rapidfuzz/distance/Indel.py b/src/rapidfuzz/distance/Indel.py index 3c104d9d..81b0df68 100644 --- a/src/rapidfuzz/distance/Indel.py +++ b/src/rapidfuzz/distance/Indel.py @@ -1,13 +1,115 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os -_mod = "rapidfuzz.distance.metrics" -distance = _fallback_import(_mod, "indel_distance") -similarity = _fallback_import(_mod, "indel_similarity") -normalized_distance = _fallback_import(_mod, "indel_normalized_distance") -normalized_similarity = _fallback_import(_mod, "indel_normalized_similarity") -editops = _fallback_import(_mod, "indel_editops") -opcodes = _fallback_import(_mod, "indel_opcodes") +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = [ + "distance", + "editops", + "normalized_distance", + "normalized_similarity", + "opcodes", + "similarity", +] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + indel_distance as distance, + indel_editops as editops, + indel_normalized_distance as normalized_distance, + indel_normalized_similarity as normalized_similarity, + indel_opcodes as opcodes, + indel_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + indel_distance as distance, + indel_editops as editops, + indel_normalized_distance as normalized_distance, + indel_normalized_similarity as normalized_similarity, + indel_opcodes as opcodes, + indel_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + indel_distance as distance, + indel_editops as editops, + indel_normalized_distance as normalized_distance, + indel_normalized_similarity as normalized_similarity, + indel_opcodes as opcodes, + indel_similarity as similarity, + ) +elif _impl == "python": + from rapidfuzz.distance.metrics_py import ( + indel_distance as distance, + indel_editops as editops, + indel_normalized_distance as normalized_distance, + indel_normalized_similarity as normalized_similarity, + indel_opcodes as opcodes, + indel_similarity as similarity, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + indel_distance as distance, + indel_editops as editops, + indel_normalized_distance as normalized_distance, + indel_normalized_similarity as normalized_similarity, + indel_opcodes as opcodes, + indel_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + indel_distance as distance, + indel_editops as editops, + indel_normalized_distance as normalized_distance, + indel_normalized_similarity as normalized_similarity, + indel_opcodes as opcodes, + indel_similarity as similarity, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + indel_distance as distance, + indel_editops as editops, + indel_normalized_distance as normalized_distance, + indel_normalized_similarity as normalized_similarity, + indel_opcodes as opcodes, + indel_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_py import ( + indel_distance as distance, + indel_editops as editops, + indel_normalized_distance as normalized_distance, + indel_normalized_similarity as normalized_similarity, + indel_opcodes as opcodes, + indel_similarity as similarity, + ) diff --git a/src/rapidfuzz/distance/Indel.pyi b/src/rapidfuzz/distance/Indel.pyi index d470bf79..c662f99b 100644 --- a/src/rapidfuzz/distance/Indel.pyi +++ b/src/rapidfuzz/distance/Indel.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload from rapidfuzz.distance import Editops, Opcodes diff --git a/src/rapidfuzz/distance/Indel_py.py b/src/rapidfuzz/distance/Indel_py.py index 00d71447..c8f29a93 100644 --- a/src/rapidfuzz/distance/Indel_py.py +++ b/src/rapidfuzz/distance/Indel_py.py @@ -4,10 +4,12 @@ from rapidfuzz._common_py import conv_sequences from rapidfuzz._utils import is_none, setupPandas -from rapidfuzz.distance.LCSseq_py import _block_similarity as lcs_seq_block_similarity -from rapidfuzz.distance.LCSseq_py import editops as lcs_seq_editops -from rapidfuzz.distance.LCSseq_py import opcodes as lcs_seq_opcodes -from rapidfuzz.distance.LCSseq_py import similarity as lcs_seq_similarity +from rapidfuzz.distance.LCSseq_py import ( + _block_similarity as lcs_seq_block_similarity, + editops as lcs_seq_editops, + opcodes as lcs_seq_opcodes, + similarity as lcs_seq_similarity, +) def distance( diff --git a/src/rapidfuzz/distance/Jaro.py b/src/rapidfuzz/distance/Jaro.py index b1f0e4d0..23dbf8ea 100644 --- a/src/rapidfuzz/distance/Jaro.py +++ b/src/rapidfuzz/distance/Jaro.py @@ -1,11 +1,92 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os -_mod = "rapidfuzz.distance.metrics" -distance = _fallback_import(_mod, "jaro_distance") -similarity = _fallback_import(_mod, "jaro_similarity") -normalized_distance = _fallback_import(_mod, "jaro_normalized_distance") -normalized_similarity = _fallback_import(_mod, "jaro_normalized_similarity") +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = ["distance", "normalized_distance", "normalized_similarity", "similarity"] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + jaro_distance as distance, + jaro_normalized_distance as normalized_distance, + jaro_normalized_similarity as normalized_similarity, + jaro_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + jaro_distance as distance, + jaro_normalized_distance as normalized_distance, + jaro_normalized_similarity as normalized_similarity, + jaro_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + jaro_distance as distance, + jaro_normalized_distance as normalized_distance, + jaro_normalized_similarity as normalized_similarity, + jaro_similarity as similarity, + ) +elif _impl == "python": + from rapidfuzz.distance.metrics_py import ( + jaro_distance as distance, + jaro_normalized_distance as normalized_distance, + jaro_normalized_similarity as normalized_similarity, + jaro_similarity as similarity, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + jaro_distance as distance, + jaro_normalized_distance as normalized_distance, + jaro_normalized_similarity as normalized_similarity, + jaro_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + jaro_distance as distance, + jaro_normalized_distance as normalized_distance, + jaro_normalized_similarity as normalized_similarity, + jaro_similarity as similarity, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + jaro_distance as distance, + jaro_normalized_distance as normalized_distance, + jaro_normalized_similarity as normalized_similarity, + jaro_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_py import ( + jaro_distance as distance, + jaro_normalized_distance as normalized_distance, + jaro_normalized_similarity as normalized_similarity, + jaro_similarity as similarity, + ) diff --git a/src/rapidfuzz/distance/Jaro.pyi b/src/rapidfuzz/distance/Jaro.pyi index 40d86203..d8ec0b6c 100644 --- a/src/rapidfuzz/distance/Jaro.pyi +++ b/src/rapidfuzz/distance/Jaro.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload _UnprocessedType1 = TypeVar("_UnprocessedType1") _UnprocessedType2 = TypeVar("_UnprocessedType2") diff --git a/src/rapidfuzz/distance/JaroWinkler.py b/src/rapidfuzz/distance/JaroWinkler.py index 1f9b4908..8b16c47e 100644 --- a/src/rapidfuzz/distance/JaroWinkler.py +++ b/src/rapidfuzz/distance/JaroWinkler.py @@ -1,11 +1,92 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os -_mod = "rapidfuzz.distance.metrics" -distance = _fallback_import(_mod, "jaro_winkler_distance") -similarity = _fallback_import(_mod, "jaro_winkler_similarity") -normalized_distance = _fallback_import(_mod, "jaro_winkler_normalized_distance") -normalized_similarity = _fallback_import(_mod, "jaro_winkler_normalized_similarity") +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = ["distance", "normalized_distance", "normalized_similarity", "similarity"] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + jaro_winkler_distance as distance, + jaro_winkler_normalized_distance as normalized_distance, + jaro_winkler_normalized_similarity as normalized_similarity, + jaro_winkler_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + jaro_winkler_distance as distance, + jaro_winkler_normalized_distance as normalized_distance, + jaro_winkler_normalized_similarity as normalized_similarity, + jaro_winkler_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + jaro_winkler_distance as distance, + jaro_winkler_normalized_distance as normalized_distance, + jaro_winkler_normalized_similarity as normalized_similarity, + jaro_winkler_similarity as similarity, + ) +elif _impl == "python": + from rapidfuzz.distance.metrics_py import ( + jaro_winkler_distance as distance, + jaro_winkler_normalized_distance as normalized_distance, + jaro_winkler_normalized_similarity as normalized_similarity, + jaro_winkler_similarity as similarity, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + jaro_winkler_distance as distance, + jaro_winkler_normalized_distance as normalized_distance, + jaro_winkler_normalized_similarity as normalized_similarity, + jaro_winkler_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + jaro_winkler_distance as distance, + jaro_winkler_normalized_distance as normalized_distance, + jaro_winkler_normalized_similarity as normalized_similarity, + jaro_winkler_similarity as similarity, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + jaro_winkler_distance as distance, + jaro_winkler_normalized_distance as normalized_distance, + jaro_winkler_normalized_similarity as normalized_similarity, + jaro_winkler_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_py import ( + jaro_winkler_distance as distance, + jaro_winkler_normalized_distance as normalized_distance, + jaro_winkler_normalized_similarity as normalized_similarity, + jaro_winkler_similarity as similarity, + ) diff --git a/src/rapidfuzz/distance/JaroWinkler.pyi b/src/rapidfuzz/distance/JaroWinkler.pyi index 59361e65..72b309d7 100644 --- a/src/rapidfuzz/distance/JaroWinkler.pyi +++ b/src/rapidfuzz/distance/JaroWinkler.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload _UnprocessedType1 = TypeVar("_UnprocessedType1") _UnprocessedType2 = TypeVar("_UnprocessedType2") diff --git a/src/rapidfuzz/distance/LCSseq.py b/src/rapidfuzz/distance/LCSseq.py index a3a765cc..b53d39fc 100644 --- a/src/rapidfuzz/distance/LCSseq.py +++ b/src/rapidfuzz/distance/LCSseq.py @@ -1,13 +1,115 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os -_mod = "rapidfuzz.distance.metrics" -distance = _fallback_import(_mod, "lcs_seq_distance") -similarity = _fallback_import(_mod, "lcs_seq_similarity") -normalized_distance = _fallback_import(_mod, "lcs_seq_normalized_distance") -normalized_similarity = _fallback_import(_mod, "lcs_seq_normalized_similarity") -editops = _fallback_import(_mod, "lcs_seq_editops") -opcodes = _fallback_import(_mod, "lcs_seq_opcodes") +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = [ + "distance", + "editops", + "normalized_distance", + "normalized_similarity", + "opcodes", + "similarity", +] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + lcs_seq_distance as distance, + lcs_seq_editops as editops, + lcs_seq_normalized_distance as normalized_distance, + lcs_seq_normalized_similarity as normalized_similarity, + lcs_seq_opcodes as opcodes, + lcs_seq_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + lcs_seq_distance as distance, + lcs_seq_editops as editops, + lcs_seq_normalized_distance as normalized_distance, + lcs_seq_normalized_similarity as normalized_similarity, + lcs_seq_opcodes as opcodes, + lcs_seq_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + lcs_seq_distance as distance, + lcs_seq_editops as editops, + lcs_seq_normalized_distance as normalized_distance, + lcs_seq_normalized_similarity as normalized_similarity, + lcs_seq_opcodes as opcodes, + lcs_seq_similarity as similarity, + ) +elif _impl == "python": + from rapidfuzz.distance.metrics_py import ( + lcs_seq_distance as distance, + lcs_seq_editops as editops, + lcs_seq_normalized_distance as normalized_distance, + lcs_seq_normalized_similarity as normalized_similarity, + lcs_seq_opcodes as opcodes, + lcs_seq_similarity as similarity, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + lcs_seq_distance as distance, + lcs_seq_editops as editops, + lcs_seq_normalized_distance as normalized_distance, + lcs_seq_normalized_similarity as normalized_similarity, + lcs_seq_opcodes as opcodes, + lcs_seq_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + lcs_seq_distance as distance, + lcs_seq_editops as editops, + lcs_seq_normalized_distance as normalized_distance, + lcs_seq_normalized_similarity as normalized_similarity, + lcs_seq_opcodes as opcodes, + lcs_seq_similarity as similarity, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + lcs_seq_distance as distance, + lcs_seq_editops as editops, + lcs_seq_normalized_distance as normalized_distance, + lcs_seq_normalized_similarity as normalized_similarity, + lcs_seq_opcodes as opcodes, + lcs_seq_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_py import ( + lcs_seq_distance as distance, + lcs_seq_editops as editops, + lcs_seq_normalized_distance as normalized_distance, + lcs_seq_normalized_similarity as normalized_similarity, + lcs_seq_opcodes as opcodes, + lcs_seq_similarity as similarity, + ) diff --git a/src/rapidfuzz/distance/LCSseq.pyi b/src/rapidfuzz/distance/LCSseq.pyi index d470bf79..c662f99b 100644 --- a/src/rapidfuzz/distance/LCSseq.pyi +++ b/src/rapidfuzz/distance/LCSseq.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload from rapidfuzz.distance import Editops, Opcodes diff --git a/src/rapidfuzz/distance/Levenshtein.py b/src/rapidfuzz/distance/Levenshtein.py index 93a3b00d..9fac5dd3 100644 --- a/src/rapidfuzz/distance/Levenshtein.py +++ b/src/rapidfuzz/distance/Levenshtein.py @@ -1,19 +1,115 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann -""" -The Levenshtein (edit) distance is a string metric to measure the -difference between two strings/sequences s1 and s2. -It's defined as the minimum number of insertions, deletions or -substitutions required to transform s1 into s2. -""" +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os -_mod = "rapidfuzz.distance.metrics" -distance = _fallback_import(_mod, "levenshtein_distance") -similarity = _fallback_import(_mod, "levenshtein_similarity") -normalized_distance = _fallback_import(_mod, "levenshtein_normalized_distance") -normalized_similarity = _fallback_import(_mod, "levenshtein_normalized_similarity") -editops = _fallback_import(_mod, "levenshtein_editops") -opcodes = _fallback_import(_mod, "levenshtein_opcodes") +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = [ + "distance", + "editops", + "normalized_distance", + "normalized_similarity", + "opcodes", + "similarity", +] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + levenshtein_distance as distance, + levenshtein_editops as editops, + levenshtein_normalized_distance as normalized_distance, + levenshtein_normalized_similarity as normalized_similarity, + levenshtein_opcodes as opcodes, + levenshtein_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + levenshtein_distance as distance, + levenshtein_editops as editops, + levenshtein_normalized_distance as normalized_distance, + levenshtein_normalized_similarity as normalized_similarity, + levenshtein_opcodes as opcodes, + levenshtein_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + levenshtein_distance as distance, + levenshtein_editops as editops, + levenshtein_normalized_distance as normalized_distance, + levenshtein_normalized_similarity as normalized_similarity, + levenshtein_opcodes as opcodes, + levenshtein_similarity as similarity, + ) +elif _impl == "python": + from rapidfuzz.distance.metrics_py import ( + levenshtein_distance as distance, + levenshtein_editops as editops, + levenshtein_normalized_distance as normalized_distance, + levenshtein_normalized_similarity as normalized_similarity, + levenshtein_opcodes as opcodes, + levenshtein_similarity as similarity, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + levenshtein_distance as distance, + levenshtein_editops as editops, + levenshtein_normalized_distance as normalized_distance, + levenshtein_normalized_similarity as normalized_similarity, + levenshtein_opcodes as opcodes, + levenshtein_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + levenshtein_distance as distance, + levenshtein_editops as editops, + levenshtein_normalized_distance as normalized_distance, + levenshtein_normalized_similarity as normalized_similarity, + levenshtein_opcodes as opcodes, + levenshtein_similarity as similarity, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + levenshtein_distance as distance, + levenshtein_editops as editops, + levenshtein_normalized_distance as normalized_distance, + levenshtein_normalized_similarity as normalized_similarity, + levenshtein_opcodes as opcodes, + levenshtein_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_py import ( + levenshtein_distance as distance, + levenshtein_editops as editops, + levenshtein_normalized_distance as normalized_distance, + levenshtein_normalized_similarity as normalized_similarity, + levenshtein_opcodes as opcodes, + levenshtein_similarity as similarity, + ) diff --git a/src/rapidfuzz/distance/Levenshtein.pyi b/src/rapidfuzz/distance/Levenshtein.pyi index d31df2b0..8bd26720 100644 --- a/src/rapidfuzz/distance/Levenshtein.pyi +++ b/src/rapidfuzz/distance/Levenshtein.pyi @@ -9,7 +9,8 @@ substitutions required to transform s1 into s2. from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload from rapidfuzz.distance import Editops, Opcodes diff --git a/src/rapidfuzz/distance/OSA.py b/src/rapidfuzz/distance/OSA.py index 8007c705..3d4b21d9 100644 --- a/src/rapidfuzz/distance/OSA.py +++ b/src/rapidfuzz/distance/OSA.py @@ -1,11 +1,92 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os -_mod = "rapidfuzz.distance.metrics" -distance = _fallback_import(_mod, "osa_distance") -similarity = _fallback_import(_mod, "osa_similarity") -normalized_distance = _fallback_import(_mod, "osa_normalized_distance") -normalized_similarity = _fallback_import(_mod, "osa_normalized_similarity") +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = ["distance", "normalized_distance", "normalized_similarity", "similarity"] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + osa_distance as distance, + osa_normalized_distance as normalized_distance, + osa_normalized_similarity as normalized_similarity, + osa_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + osa_distance as distance, + osa_normalized_distance as normalized_distance, + osa_normalized_similarity as normalized_similarity, + osa_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + osa_distance as distance, + osa_normalized_distance as normalized_distance, + osa_normalized_similarity as normalized_similarity, + osa_similarity as similarity, + ) +elif _impl == "python": + from rapidfuzz.distance.metrics_py import ( + osa_distance as distance, + osa_normalized_distance as normalized_distance, + osa_normalized_similarity as normalized_similarity, + osa_similarity as similarity, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + osa_distance as distance, + osa_normalized_distance as normalized_distance, + osa_normalized_similarity as normalized_similarity, + osa_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + osa_distance as distance, + osa_normalized_distance as normalized_distance, + osa_normalized_similarity as normalized_similarity, + osa_similarity as similarity, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + osa_distance as distance, + osa_normalized_distance as normalized_distance, + osa_normalized_similarity as normalized_similarity, + osa_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_py import ( + osa_distance as distance, + osa_normalized_distance as normalized_distance, + osa_normalized_similarity as normalized_similarity, + osa_similarity as similarity, + ) diff --git a/src/rapidfuzz/distance/OSA.pyi b/src/rapidfuzz/distance/OSA.pyi index 6748c19b..db0de625 100644 --- a/src/rapidfuzz/distance/OSA.pyi +++ b/src/rapidfuzz/distance/OSA.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload _UnprocessedType1 = TypeVar("_UnprocessedType1") _UnprocessedType2 = TypeVar("_UnprocessedType2") diff --git a/src/rapidfuzz/distance/Postfix.py b/src/rapidfuzz/distance/Postfix.py index 8f7fa724..3645f39c 100644 --- a/src/rapidfuzz/distance/Postfix.py +++ b/src/rapidfuzz/distance/Postfix.py @@ -1,11 +1,92 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os -_mod = "rapidfuzz.distance.metrics" -distance = _fallback_import(_mod, "postfix_distance") -similarity = _fallback_import(_mod, "postfix_similarity") -normalized_distance = _fallback_import(_mod, "postfix_normalized_distance") -normalized_similarity = _fallback_import(_mod, "postfix_normalized_similarity") +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = ["distance", "normalized_distance", "normalized_similarity", "similarity"] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + postfix_distance as distance, + postfix_normalized_distance as normalized_distance, + postfix_normalized_similarity as normalized_similarity, + postfix_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + postfix_distance as distance, + postfix_normalized_distance as normalized_distance, + postfix_normalized_similarity as normalized_similarity, + postfix_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + postfix_distance as distance, + postfix_normalized_distance as normalized_distance, + postfix_normalized_similarity as normalized_similarity, + postfix_similarity as similarity, + ) +elif _impl == "python": + from rapidfuzz.distance.metrics_py import ( + postfix_distance as distance, + postfix_normalized_distance as normalized_distance, + postfix_normalized_similarity as normalized_similarity, + postfix_similarity as similarity, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + postfix_distance as distance, + postfix_normalized_distance as normalized_distance, + postfix_normalized_similarity as normalized_similarity, + postfix_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + postfix_distance as distance, + postfix_normalized_distance as normalized_distance, + postfix_normalized_similarity as normalized_similarity, + postfix_similarity as similarity, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + postfix_distance as distance, + postfix_normalized_distance as normalized_distance, + postfix_normalized_similarity as normalized_similarity, + postfix_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_py import ( + postfix_distance as distance, + postfix_normalized_distance as normalized_distance, + postfix_normalized_similarity as normalized_similarity, + postfix_similarity as similarity, + ) diff --git a/src/rapidfuzz/distance/Postfix.pyi b/src/rapidfuzz/distance/Postfix.pyi index 6748c19b..db0de625 100644 --- a/src/rapidfuzz/distance/Postfix.pyi +++ b/src/rapidfuzz/distance/Postfix.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload _UnprocessedType1 = TypeVar("_UnprocessedType1") _UnprocessedType2 = TypeVar("_UnprocessedType2") diff --git a/src/rapidfuzz/distance/Prefix.py b/src/rapidfuzz/distance/Prefix.py index a4aba101..f46586bd 100644 --- a/src/rapidfuzz/distance/Prefix.py +++ b/src/rapidfuzz/distance/Prefix.py @@ -1,11 +1,92 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os -_mod = "rapidfuzz.distance.metrics" -distance = _fallback_import(_mod, "prefix_distance") -similarity = _fallback_import(_mod, "prefix_similarity") -normalized_distance = _fallback_import(_mod, "prefix_normalized_distance") -normalized_similarity = _fallback_import(_mod, "prefix_normalized_similarity") +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = ["distance", "normalized_distance", "normalized_similarity", "similarity"] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + prefix_distance as distance, + prefix_normalized_distance as normalized_distance, + prefix_normalized_similarity as normalized_similarity, + prefix_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + prefix_distance as distance, + prefix_normalized_distance as normalized_distance, + prefix_normalized_similarity as normalized_similarity, + prefix_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + prefix_distance as distance, + prefix_normalized_distance as normalized_distance, + prefix_normalized_similarity as normalized_similarity, + prefix_similarity as similarity, + ) +elif _impl == "python": + from rapidfuzz.distance.metrics_py import ( + prefix_distance as distance, + prefix_normalized_distance as normalized_distance, + prefix_normalized_similarity as normalized_similarity, + prefix_similarity as similarity, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + prefix_distance as distance, + prefix_normalized_distance as normalized_distance, + prefix_normalized_similarity as normalized_similarity, + prefix_similarity as similarity, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + prefix_distance as distance, + prefix_normalized_distance as normalized_distance, + prefix_normalized_similarity as normalized_similarity, + prefix_similarity as similarity, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance.metrics_cpp import ( # pyright: ignore[reportMissingImports] + prefix_distance as distance, + prefix_normalized_distance as normalized_distance, + prefix_normalized_similarity as normalized_similarity, + prefix_similarity as similarity, + ) + + imported = True + + if not imported: + from rapidfuzz.distance.metrics_py import ( + prefix_distance as distance, + prefix_normalized_distance as normalized_distance, + prefix_normalized_similarity as normalized_similarity, + prefix_similarity as similarity, + ) diff --git a/src/rapidfuzz/distance/Prefix.pyi b/src/rapidfuzz/distance/Prefix.pyi index 6748c19b..db0de625 100644 --- a/src/rapidfuzz/distance/Prefix.pyi +++ b/src/rapidfuzz/distance/Prefix.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload _UnprocessedType1 = TypeVar("_UnprocessedType1") _UnprocessedType2 = TypeVar("_UnprocessedType2") diff --git a/src/rapidfuzz/distance/__init__.pyi b/src/rapidfuzz/distance/__init__.pyi index 0649b260..219b4473 100644 --- a/src/rapidfuzz/distance/__init__.pyi +++ b/src/rapidfuzz/distance/__init__.pyi @@ -3,19 +3,23 @@ from __future__ import annotations -from . import OSA as OSA -from . import DamerauLevenshtein as DamerauLevenshtein -from . import Hamming as Hamming -from . import Indel as Indel -from . import Jaro as Jaro -from . import JaroWinkler as JaroWinkler -from . import LCSseq as LCSseq -from . import Levenshtein as Levenshtein -from . import Postfix as Postfix -from . import Prefix as Prefix -from ._initialize import Editop as Editop -from ._initialize import Editops as Editops -from ._initialize import MatchingBlock as MatchingBlock -from ._initialize import Opcode as Opcode -from ._initialize import Opcodes as Opcodes -from ._initialize import ScoreAlignment as ScoreAlignment +from . import ( + OSA as OSA, + DamerauLevenshtein as DamerauLevenshtein, + Hamming as Hamming, + Indel as Indel, + Jaro as Jaro, + JaroWinkler as JaroWinkler, + LCSseq as LCSseq, + Levenshtein as Levenshtein, + Postfix as Postfix, + Prefix as Prefix, +) +from ._initialize import ( + Editop as Editop, + Editops as Editops, + MatchingBlock as MatchingBlock, + Opcode as Opcode, + Opcodes as Opcodes, + ScoreAlignment as ScoreAlignment, +) diff --git a/src/rapidfuzz/distance/_initialize.py b/src/rapidfuzz/distance/_initialize.py index e5c8a7f5..71817782 100644 --- a/src/rapidfuzz/distance/_initialize.py +++ b/src/rapidfuzz/distance/_initialize.py @@ -1,20 +1,108 @@ +# SPDX-License-Identifier: MIT +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import - -_mod = "rapidfuzz.distance._initialize" -Editop = _fallback_import(_mod, "Editop") -Editops = _fallback_import(_mod, "Editops") -Opcode = _fallback_import(_mod, "Opcode") -Opcodes = _fallback_import(_mod, "Opcodes") -ScoreAlignment = _fallback_import(_mod, "ScoreAlignment") -MatchingBlock = _fallback_import(_mod, "MatchingBlock") - -__all__ = [ - "Editop", - "Editops", - "Opcode", - "Opcodes", - "ScoreAlignment", - "MatchingBlock", -] +import contextlib +import os + +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = ["Editop", "Editops", "MatchingBlock", "Opcode", "Opcodes", "ScoreAlignment"] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance._initialize_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + Editop, + Editops, + MatchingBlock, + Opcode, + Opcodes, + ScoreAlignment, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance._initialize_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + Editop, + Editops, + MatchingBlock, + Opcode, + Opcodes, + ScoreAlignment, + ) + + imported = True + + if not imported: + from rapidfuzz.distance._initialize_cpp import ( # pyright: ignore[reportMissingImports] + Editop, + Editops, + MatchingBlock, + Opcode, + Opcodes, + ScoreAlignment, + ) +elif _impl == "python": + from rapidfuzz.distance._initialize_py import ( + Editop, + Editops, + MatchingBlock, + Opcode, + Opcodes, + ScoreAlignment, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance._initialize_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + Editop, + Editops, + MatchingBlock, + Opcode, + Opcodes, + ScoreAlignment, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.distance._initialize_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + Editop, + Editops, + MatchingBlock, + Opcode, + Opcodes, + ScoreAlignment, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.distance._initialize_cpp import ( # pyright: ignore[reportMissingImports] + Editop, + Editops, + MatchingBlock, + Opcode, + Opcodes, + ScoreAlignment, + ) + + imported = True + + if not imported: + from rapidfuzz.distance._initialize_py import ( + Editop, + Editops, + MatchingBlock, + Opcode, + Opcodes, + ScoreAlignment, + ) diff --git a/src/rapidfuzz/distance/_initialize.pyi b/src/rapidfuzz/distance/_initialize.pyi index 1c3db428..3d6eaea8 100644 --- a/src/rapidfuzz/distance/_initialize.pyi +++ b/src/rapidfuzz/distance/_initialize.pyi @@ -3,7 +3,7 @@ from __future__ import annotations -from typing import Iterator +from collections.abc import Iterator _AnyOpList = list[Editop | tuple[str, int, int]] | list[Opcode | tuple[str, int, int, int, int]] diff --git a/src/rapidfuzz/distance/metrics_py.py b/src/rapidfuzz/distance/metrics_py.py index 8370c8a9..e0ad2c3f 100644 --- a/src/rapidfuzz/distance/metrics_py.py +++ b/src/rapidfuzz/distance/metrics_py.py @@ -5,115 +5,102 @@ from typing import Any, Callable -from rapidfuzz._utils import ScorerFlag, add_scorer_attrs -from rapidfuzz._utils import default_distance_attribute as dist_attr -from rapidfuzz._utils import default_normalized_distance_attribute as norm_dist_attr -from rapidfuzz._utils import default_normalized_similarity_attribute as norm_sim_attr -from rapidfuzz._utils import default_similarity_attribute as sim_attr +from rapidfuzz._utils import ( + ScorerFlag, + add_scorer_attrs, + default_distance_attribute as dist_attr, + default_normalized_distance_attribute as norm_dist_attr, + default_normalized_similarity_attribute as norm_sim_attr, + default_similarity_attribute as sim_attr, +) # DamerauLevenshtein from rapidfuzz.distance.DamerauLevenshtein_py import ( distance as damerau_levenshtein_distance, -) -from rapidfuzz.distance.DamerauLevenshtein_py import ( normalized_distance as damerau_levenshtein_normalized_distance, -) -from rapidfuzz.distance.DamerauLevenshtein_py import ( normalized_similarity as damerau_levenshtein_normalized_similarity, -) -from rapidfuzz.distance.DamerauLevenshtein_py import ( similarity as damerau_levenshtein_similarity, ) # Hamming -from rapidfuzz.distance.Hamming_py import distance as hamming_distance -from rapidfuzz.distance.Hamming_py import editops as hamming_editops from rapidfuzz.distance.Hamming_py import ( + distance as hamming_distance, + editops as hamming_editops, normalized_distance as hamming_normalized_distance, -) -from rapidfuzz.distance.Hamming_py import ( normalized_similarity as hamming_normalized_similarity, + opcodes as hamming_opcodes, + similarity as hamming_similarity, ) -from rapidfuzz.distance.Hamming_py import opcodes as hamming_opcodes -from rapidfuzz.distance.Hamming_py import similarity as hamming_similarity # Indel -from rapidfuzz.distance.Indel_py import distance as indel_distance -from rapidfuzz.distance.Indel_py import editops as indel_editops -from rapidfuzz.distance.Indel_py import normalized_distance as indel_normalized_distance from rapidfuzz.distance.Indel_py import ( + distance as indel_distance, + editops as indel_editops, + normalized_distance as indel_normalized_distance, normalized_similarity as indel_normalized_similarity, + opcodes as indel_opcodes, + similarity as indel_similarity, ) -from rapidfuzz.distance.Indel_py import opcodes as indel_opcodes -from rapidfuzz.distance.Indel_py import similarity as indel_similarity # Jaro -from rapidfuzz.distance.Jaro_py import distance as jaro_distance -from rapidfuzz.distance.Jaro_py import normalized_distance as jaro_normalized_distance from rapidfuzz.distance.Jaro_py import ( + distance as jaro_distance, + normalized_distance as jaro_normalized_distance, normalized_similarity as jaro_normalized_similarity, + similarity as jaro_similarity, ) -from rapidfuzz.distance.Jaro_py import similarity as jaro_similarity # JaroWinkler -from rapidfuzz.distance.JaroWinkler_py import distance as jaro_winkler_distance from rapidfuzz.distance.JaroWinkler_py import ( + distance as jaro_winkler_distance, normalized_distance as jaro_winkler_normalized_distance, -) -from rapidfuzz.distance.JaroWinkler_py import ( normalized_similarity as jaro_winkler_normalized_similarity, + similarity as jaro_winkler_similarity, ) -from rapidfuzz.distance.JaroWinkler_py import similarity as jaro_winkler_similarity # LCSseq -from rapidfuzz.distance.LCSseq_py import distance as lcs_seq_distance -from rapidfuzz.distance.LCSseq_py import editops as lcs_seq_editops from rapidfuzz.distance.LCSseq_py import ( + distance as lcs_seq_distance, + editops as lcs_seq_editops, normalized_distance as lcs_seq_normalized_distance, -) -from rapidfuzz.distance.LCSseq_py import ( normalized_similarity as lcs_seq_normalized_similarity, + opcodes as lcs_seq_opcodes, + similarity as lcs_seq_similarity, ) -from rapidfuzz.distance.LCSseq_py import opcodes as lcs_seq_opcodes -from rapidfuzz.distance.LCSseq_py import similarity as lcs_seq_similarity # Levenshtein -from rapidfuzz.distance.Levenshtein_py import distance as levenshtein_distance -from rapidfuzz.distance.Levenshtein_py import editops as levenshtein_editops from rapidfuzz.distance.Levenshtein_py import ( + distance as levenshtein_distance, + editops as levenshtein_editops, normalized_distance as levenshtein_normalized_distance, -) -from rapidfuzz.distance.Levenshtein_py import ( normalized_similarity as levenshtein_normalized_similarity, + opcodes as levenshtein_opcodes, + similarity as levenshtein_similarity, ) -from rapidfuzz.distance.Levenshtein_py import opcodes as levenshtein_opcodes -from rapidfuzz.distance.Levenshtein_py import similarity as levenshtein_similarity # OSA -from rapidfuzz.distance.OSA_py import distance as osa_distance -from rapidfuzz.distance.OSA_py import normalized_distance as osa_normalized_distance -from rapidfuzz.distance.OSA_py import normalized_similarity as osa_normalized_similarity -from rapidfuzz.distance.OSA_py import similarity as osa_similarity +from rapidfuzz.distance.OSA_py import ( + distance as osa_distance, + normalized_distance as osa_normalized_distance, + normalized_similarity as osa_normalized_similarity, + similarity as osa_similarity, +) # Postfix -from rapidfuzz.distance.Postfix_py import distance as postfix_distance from rapidfuzz.distance.Postfix_py import ( + distance as postfix_distance, normalized_distance as postfix_normalized_distance, -) -from rapidfuzz.distance.Postfix_py import ( normalized_similarity as postfix_normalized_similarity, + similarity as postfix_similarity, ) -from rapidfuzz.distance.Postfix_py import similarity as postfix_similarity # Prefix -from rapidfuzz.distance.Prefix_py import distance as prefix_distance from rapidfuzz.distance.Prefix_py import ( + distance as prefix_distance, normalized_distance as prefix_normalized_distance, -) -from rapidfuzz.distance.Prefix_py import ( normalized_similarity as prefix_normalized_similarity, + similarity as prefix_similarity, ) -from rapidfuzz.distance.Prefix_py import similarity as prefix_similarity __all__ = [] diff --git a/src/rapidfuzz/fuzz.py b/src/rapidfuzz/fuzz.py index afc1baec..6874b320 100644 --- a/src/rapidfuzz/fuzz.py +++ b/src/rapidfuzz/fuzz.py @@ -1,32 +1,160 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2021 Max Bachmann +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os + +from rapidfuzz._feature_detector import AVX2, SSE2, supports __all__ = [ - "ratio", + "QRatio", + "WRatio", "partial_ratio", "partial_ratio_alignment", - "token_sort_ratio", - "token_set_ratio", - "token_ratio", - "partial_token_sort_ratio", - "partial_token_set_ratio", "partial_token_ratio", - "WRatio", - "QRatio", + "partial_token_set_ratio", + "partial_token_sort_ratio", + "ratio", + "token_ratio", + "token_set_ratio", + "token_sort_ratio", ] -_mod = "rapidfuzz.fuzz" -ratio = _fallback_import(_mod, "ratio") -partial_ratio = _fallback_import(_mod, "partial_ratio") -partial_ratio_alignment = _fallback_import(_mod, "partial_ratio_alignment") -token_sort_ratio = _fallback_import(_mod, "token_sort_ratio") -token_set_ratio = _fallback_import(_mod, "token_set_ratio") -token_ratio = _fallback_import(_mod, "token_ratio") -partial_token_sort_ratio = _fallback_import(_mod, "partial_token_sort_ratio") -partial_token_set_ratio = _fallback_import(_mod, "partial_token_set_ratio") -partial_token_ratio = _fallback_import(_mod, "partial_token_ratio") -WRatio = _fallback_import(_mod, "WRatio") -QRatio = _fallback_import(_mod, "QRatio") +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.fuzz_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + QRatio, + WRatio, + partial_ratio, + partial_ratio_alignment, + partial_token_ratio, + partial_token_set_ratio, + partial_token_sort_ratio, + ratio, + token_ratio, + token_set_ratio, + token_sort_ratio, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.fuzz_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + QRatio, + WRatio, + partial_ratio, + partial_ratio_alignment, + partial_token_ratio, + partial_token_set_ratio, + partial_token_sort_ratio, + ratio, + token_ratio, + token_set_ratio, + token_sort_ratio, + ) + + imported = True + + if not imported: + from rapidfuzz.fuzz_cpp import ( # pyright: ignore[reportMissingImports] + QRatio, + WRatio, + partial_ratio, + partial_ratio_alignment, + partial_token_ratio, + partial_token_set_ratio, + partial_token_sort_ratio, + ratio, + token_ratio, + token_set_ratio, + token_sort_ratio, + ) +elif _impl == "python": + from rapidfuzz.fuzz_py import ( + QRatio, + WRatio, + partial_ratio, + partial_ratio_alignment, + partial_token_ratio, + partial_token_set_ratio, + partial_token_sort_ratio, + ratio, + token_ratio, + token_set_ratio, + token_sort_ratio, + ) +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.fuzz_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + QRatio, + WRatio, + partial_ratio, + partial_ratio_alignment, + partial_token_ratio, + partial_token_set_ratio, + partial_token_sort_ratio, + ratio, + token_ratio, + token_set_ratio, + token_sort_ratio, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.fuzz_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + QRatio, + WRatio, + partial_ratio, + partial_ratio_alignment, + partial_token_ratio, + partial_token_set_ratio, + partial_token_sort_ratio, + ratio, + token_ratio, + token_set_ratio, + token_sort_ratio, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.fuzz_cpp import ( # pyright: ignore[reportMissingImports] + QRatio, + WRatio, + partial_ratio, + partial_ratio_alignment, + partial_token_ratio, + partial_token_set_ratio, + partial_token_sort_ratio, + ratio, + token_ratio, + token_set_ratio, + token_sort_ratio, + ) + + imported = True + + if not imported: + from rapidfuzz.fuzz_py import ( + QRatio, + WRatio, + partial_ratio, + partial_ratio_alignment, + partial_token_ratio, + partial_token_set_ratio, + partial_token_sort_ratio, + ratio, + token_ratio, + token_set_ratio, + token_sort_ratio, + ) diff --git a/src/rapidfuzz/fuzz.pyi b/src/rapidfuzz/fuzz.pyi index 693551c0..0eb38bb8 100644 --- a/src/rapidfuzz/fuzz.pyi +++ b/src/rapidfuzz/fuzz.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Callable, Hashable, Sequence, TypeVar, overload +from collections.abc import Hashable, Sequence +from typing import Callable, TypeVar, overload from rapidfuzz.distance import ScoreAlignment diff --git a/src/rapidfuzz/fuzz_py.py b/src/rapidfuzz/fuzz_py.py index a91fde3d..3792d1a7 100644 --- a/src/rapidfuzz/fuzz_py.py +++ b/src/rapidfuzz/fuzz_py.py @@ -9,9 +9,7 @@ from rapidfuzz.distance import ScoreAlignment from rapidfuzz.distance.Indel_py import ( _block_normalized_similarity as indel_block_normalized_similarity, -) -from rapidfuzz.distance.Indel_py import distance as indel_distance -from rapidfuzz.distance.Indel_py import ( + distance as indel_distance, normalized_similarity as indel_normalized_similarity, ) diff --git a/src/rapidfuzz/process.py b/src/rapidfuzz/process.py index dc5259b7..6b298a0a 100644 --- a/src/rapidfuzz/process.py +++ b/src/rapidfuzz/process.py @@ -1,13 +1,94 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann - +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os + +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = ["cdist", "cpdist", "extract", "extractOne", "extract_iter"] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.process_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + cdist, + cpdist, + extract, + extract_iter, + extractOne, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.process_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + cdist, + cpdist, + extract, + extract_iter, + extractOne, + ) + + imported = True + + if not imported: + from rapidfuzz.process_cpp import ( # pyright: ignore[reportMissingImports] + cdist, + cpdist, + extract, + extract_iter, + extractOne, + ) +elif _impl == "python": + from rapidfuzz.process_py import cdist, cpdist, extract, extract_iter, extractOne +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.process_cpp_avx2 import ( # pyright: ignore[reportMissingImports] + cdist, + cpdist, + extract, + extract_iter, + extractOne, + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.process_cpp_sse2 import ( # pyright: ignore[reportMissingImports] + cdist, + cpdist, + extract, + extract_iter, + extractOne, + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.process_cpp import ( # pyright: ignore[reportMissingImports] + cdist, + cpdist, + extract, + extract_iter, + extractOne, + ) + + imported = True -_mod = "rapidfuzz.process" -extract = _fallback_import(_mod, "extract") -extractOne = _fallback_import(_mod, "extractOne") -extract_iter = _fallback_import(_mod, "extract_iter") -cdist = _fallback_import(_mod, "cdist") -cpdist = _fallback_import(_mod, "cpdist") + if not imported: + from rapidfuzz.process_py import ( + cdist, + cpdist, + extract, + extract_iter, + extractOne, + ) diff --git a/src/rapidfuzz/process.pyi b/src/rapidfuzz/process.pyi index 42d02632..92008029 100644 --- a/src/rapidfuzz/process.pyi +++ b/src/rapidfuzz/process.pyi @@ -1,15 +1,10 @@ from __future__ import annotations +from collections.abc import Collection, Generator, Hashable, Iterable, Mapping, Sequence from typing import ( Any, Callable, - Collection, - Generator, - Hashable, - Iterable, - Mapping, Protocol, - Sequence, TypeVar, overload, ) diff --git a/src/rapidfuzz/process_cpp.py b/src/rapidfuzz/process_cpp.py index 226d087f..b46bbef2 100644 --- a/src/rapidfuzz/process_cpp.py +++ b/src/rapidfuzz/process_cpp.py @@ -3,19 +3,23 @@ from __future__ import annotations from rapidfuzz.fuzz import ratio -from rapidfuzz.process_cpp_impl import FLOAT32 as _FLOAT32 -from rapidfuzz.process_cpp_impl import FLOAT64 as _FLOAT64 -from rapidfuzz.process_cpp_impl import INT8 as _INT8 -from rapidfuzz.process_cpp_impl import INT16 as _INT16 -from rapidfuzz.process_cpp_impl import INT32 as _INT32 -from rapidfuzz.process_cpp_impl import INT64 as _INT64 -from rapidfuzz.process_cpp_impl import UINT8 as _UINT8 -from rapidfuzz.process_cpp_impl import UINT16 as _UINT16 -from rapidfuzz.process_cpp_impl import UINT32 as _UINT32 -from rapidfuzz.process_cpp_impl import UINT64 as _UINT64 -from rapidfuzz.process_cpp_impl import cdist as _cdist -from rapidfuzz.process_cpp_impl import cpdist as _cpdist -from rapidfuzz.process_cpp_impl import extract, extract_iter, extractOne +from rapidfuzz.process_cpp_impl import ( + FLOAT32 as _FLOAT32, + FLOAT64 as _FLOAT64, + INT8 as _INT8, + INT16 as _INT16, + INT32 as _INT32, + INT64 as _INT64, + UINT8 as _UINT8, + UINT16 as _UINT16, + UINT32 as _UINT32, + UINT64 as _UINT64, + cdist as _cdist, + cpdist as _cpdist, + extract, + extract_iter, + extractOne, +) __all__ = ["extract", "extract_iter", "extractOne", "cdist", "cpdist"] diff --git a/src/rapidfuzz/utils.py b/src/rapidfuzz/utils.py index 586dba5b..c8601190 100644 --- a/src/rapidfuzz/utils.py +++ b/src/rapidfuzz/utils.py @@ -1,10 +1,64 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2022 Max Bachmann - +# Copyright (C) 2025 Max Bachmann from __future__ import annotations -from rapidfuzz._utils import fallback_import as _fallback_import +import contextlib +import os + +from rapidfuzz._feature_detector import AVX2, SSE2, supports __all__ = ["default_process"] -default_process = _fallback_import("rapidfuzz.utils", "default_process") +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.utils_cpp_avx2 import ( + default_process, # pyright: ignore[reportMissingImports] + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.utils_cpp_sse2 import ( + default_process, # pyright: ignore[reportMissingImports] + ) + + imported = True + + if not imported: + from rapidfuzz.utils_cpp import ( + default_process, # pyright: ignore[reportMissingImports] + ) +elif _impl == "python": + from rapidfuzz.utils_py import default_process +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from rapidfuzz.utils_cpp_avx2 import ( + default_process, # pyright: ignore[reportMissingImports] + ) + + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from rapidfuzz.utils_cpp_sse2 import ( + default_process, # pyright: ignore[reportMissingImports] + ) + + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from rapidfuzz.utils_cpp import ( + default_process, # pyright: ignore[reportMissingImports] + ) + + imported = True + + if not imported: + from rapidfuzz.utils_py import default_process diff --git a/src/rapidfuzz/utils.pyi b/src/rapidfuzz/utils.pyi index 9c802366..94b0062b 100644 --- a/src/rapidfuzz/utils.pyi +++ b/src/rapidfuzz/utils.pyi @@ -3,7 +3,8 @@ from __future__ import annotations -from typing import Hashable, Sequence, TypeVar +from collections.abc import Hashable, Sequence +from typing import TypeVar _StringType = TypeVar("_StringType", bound=Sequence[Hashable]) diff --git a/tests/freezeTools/script.py b/tests/freezeTools/script.py new file mode 100644 index 00000000..d54f6d39 --- /dev/null +++ b/tests/freezeTools/script.py @@ -0,0 +1,7 @@ +import rapidfuzz +from rapidfuzz.distance import metrics_py +from rapidfuzz.distance import metrics_cpp +rapidfuzz.distance.Levenshtein.distance('test', 'teste') +metrics_py.levenshtein_distance('test', 'teste') +metrics_cpp.levenshtein_distance('test', 'teste') +1/0 \ No newline at end of file diff --git a/src/rapidfuzz/generate.sh b/tools/generate_cython.sh similarity index 93% rename from src/rapidfuzz/generate.sh rename to tools/generate_cython.sh index 43a9bbaa..c8501a99 100755 --- a/src/rapidfuzz/generate.sh +++ b/tools/generate_cython.sh @@ -1,5 +1,5 @@ #!/bin/sh -curdir="${0%/*}" +curdir="${0%/*}/../src/rapidfuzz" generate_cython() { diff --git a/tools/generate_python.py b/tools/generate_python.py new file mode 100755 index 00000000..9a40d996 --- /dev/null +++ b/tools/generate_python.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (C) 2025 Max Bachmann +from __future__ import annotations + +import subprocess +from pathlib import Path + +import isort + +format = """ +# SPDX-License-Identifier: MIT +# Copyright (C) 2025 Max Bachmann +from __future__ import annotations + +import contextlib +import os + +from rapidfuzz._feature_detector import AVX2, SSE2, supports + +__all__ = [ + {exports} +] + +_impl = os.environ.get("RAPIDFUZZ_IMPLEMENTATION") +if _impl == "cpp": + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from {module}_cpp_avx2 import {includes} # pyright: ignore[reportMissingImports] + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from {module}_cpp_sse2 import {includes} # pyright: ignore[reportMissingImports] + imported = True + + if not imported: + from {module}_cpp import {includes} # pyright: ignore[reportMissingImports] +elif _impl == "python": + from {module}_py import {includes} +else: + imported = False + if supports(AVX2): + with contextlib.suppress(ImportError): + from {module}_cpp_avx2 import {includes} # pyright: ignore[reportMissingImports] + imported = True + + if not imported and supports(SSE2): + with contextlib.suppress(ImportError): + from {module}_cpp_sse2 import {includes} # pyright: ignore[reportMissingImports] + imported = True + + if not imported: + with contextlib.suppress(ImportError): + from {module}_cpp import {includes} # pyright: ignore[reportMissingImports] + imported = True + + if not imported: + from {module}_py import {includes} +""" + + +def generate(module, importModule, includes): + segments = module.split(".") + segments[-1] = segments[-1] + ".py" + path = Path(__file__).parent.parent / "src" + for segment in segments: + path = path.joinpath(segment) + + print(f"generating {path}") + if isinstance(includes, list): + includesStr = ", ".join(includes) + exportStr = ", ".join(f'"{x}"' for x in includes) + else: + includesStr = ", ".join(f"{k} as {v}" for k, v in includes.items()) + exportStr = ", ".join(f'"{x}"' for x in includes.values()) + + formatted = format.format(module=importModule, includes=includesStr, exports=exportStr) + + config = isort.settings.Config(combine_as_imports=True) + formatted = isort.code(formatted, config=config) + + with path.open("w", encoding="utf-8") as f: + f.write(formatted) + + print(f"formatting {path} using ruff") + subprocess.run(["ruff", "format", path], check=False) + subprocess.run(["ruff", "check", path, "--fix"], check=False) + + +generate( + "rapidfuzz.fuzz", + "rapidfuzz.fuzz", + [ + "ratio", + "partial_ratio", + "partial_ratio_alignment", + "token_sort_ratio", + "token_set_ratio", + "token_ratio", + "partial_token_sort_ratio", + "partial_token_set_ratio", + "partial_token_ratio", + "WRatio", + "QRatio", + ], +) + + +generate( + "rapidfuzz.process", + "rapidfuzz.process", + [ + "extract", + "extractOne", + "extract_iter", + "cdist", + "cpdist", + ], +) + +generate( + "rapidfuzz.utils", + "rapidfuzz.utils", + [ + "default_process", + ], +) + +generate( + "rapidfuzz.distance._initialize", + "rapidfuzz.distance._initialize", + [ + "Editop", + "Editops", + "Opcode", + "Opcodes", + "ScoreAlignment", + "MatchingBlock", + ], +) + +generate( + "rapidfuzz.distance.DamerauLevenshtein", + "rapidfuzz.distance.metrics", + { + "damerau_levenshtein_distance": "distance", + "damerau_levenshtein_similarity": "similarity", + "damerau_levenshtein_normalized_distance": "normalized_distance", + "damerau_levenshtein_normalized_similarity": "normalized_similarity", + }, +) + +generate( + "rapidfuzz.distance.Hamming", + "rapidfuzz.distance.metrics", + { + "hamming_distance": "distance", + "hamming_similarity": "similarity", + "hamming_normalized_distance": "normalized_distance", + "hamming_normalized_similarity": "normalized_similarity", + "hamming_editops": "editops", + "hamming_opcodes": "opcodes", + }, +) + +generate( + "rapidfuzz.distance.Indel", + "rapidfuzz.distance.metrics", + { + "indel_distance": "distance", + "indel_similarity": "similarity", + "indel_normalized_distance": "normalized_distance", + "indel_normalized_similarity": "normalized_similarity", + "indel_editops": "editops", + "indel_opcodes": "opcodes", + }, +) + +generate( + "rapidfuzz.distance.Jaro", + "rapidfuzz.distance.metrics", + { + "jaro_distance": "distance", + "jaro_similarity": "similarity", + "jaro_normalized_distance": "normalized_distance", + "jaro_normalized_similarity": "normalized_similarity", + }, +) + +generate( + "rapidfuzz.distance.JaroWinkler", + "rapidfuzz.distance.metrics", + { + "jaro_winkler_distance": "distance", + "jaro_winkler_similarity": "similarity", + "jaro_winkler_normalized_distance": "normalized_distance", + "jaro_winkler_normalized_similarity": "normalized_similarity", + }, +) + +generate( + "rapidfuzz.distance.LCSseq", + "rapidfuzz.distance.metrics", + { + "lcs_seq_distance": "distance", + "lcs_seq_similarity": "similarity", + "lcs_seq_normalized_distance": "normalized_distance", + "lcs_seq_normalized_similarity": "normalized_similarity", + "lcs_seq_editops": "editops", + "lcs_seq_opcodes": "opcodes", + }, +) + +generate( + "rapidfuzz.distance.Levenshtein", + "rapidfuzz.distance.metrics", + { + "levenshtein_distance": "distance", + "levenshtein_similarity": "similarity", + "levenshtein_normalized_distance": "normalized_distance", + "levenshtein_normalized_similarity": "normalized_similarity", + "levenshtein_editops": "editops", + "levenshtein_opcodes": "opcodes", + }, +) + +generate( + "rapidfuzz.distance.OSA", + "rapidfuzz.distance.metrics", + { + "osa_distance": "distance", + "osa_similarity": "similarity", + "osa_normalized_distance": "normalized_distance", + "osa_normalized_similarity": "normalized_similarity", + }, +) + +generate( + "rapidfuzz.distance.Postfix", + "rapidfuzz.distance.metrics", + { + "postfix_distance": "distance", + "postfix_similarity": "similarity", + "postfix_normalized_distance": "normalized_distance", + "postfix_normalized_similarity": "normalized_similarity", + }, +) + +generate( + "rapidfuzz.distance.Prefix", + "rapidfuzz.distance.metrics", + { + "prefix_distance": "distance", + "prefix_similarity": "similarity", + "prefix_normalized_distance": "normalized_distance", + "prefix_normalized_similarity": "normalized_similarity", + }, +) diff --git a/tools/test_process_typing.py b/tools/test_process_typing.py index 756c16a6..0fb7dad9 100644 --- a/tools/test_process_typing.py +++ b/tools/test_process_typing.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Generator, Hashable, Sequence +from collections.abc import Generator, Hashable, Sequence import numpy as np import numpy.typing as npt