"""
Utility functions
=================
.. moduleauthor:: Benjamin Ye <GitHub: bbye98>
This module contains a collection of utility functions.
"""
from difflib import SequenceMatcher
from importlib.util import find_spec
from typing import Any, Union
if FOUND_LEVENSHTEIN := find_spec("Levenshtein") is not None:
import Levenshtein
if FOUND_NUMPY := find_spec("numpy") is not None:
import numpy as np
__all__ = ["format_multivalue", "gestalt_ratio", "levenshtein_ratio"]
[docs]
def gestalt_ratio(
reference: str, strings: Union[str, list[str]]
) -> Union[float, list[float], "np.ndarray[float]"]:
"""
Compute the Gestalt or Ratcliff–Obershelp ratios, a measure of
similarity, for strings with respect to a reference string.
Parameters
----------
reference : `str`
Reference string.
strings : `str` or `list`
Strings to compare with `reference`.
Returns
-------
ratios : `float`, `list`, or `numpy.ndarray`
Gestalt or Ratcliff–Obershelp ratios. If `strings` is a `str`, a
`float` is returned. If `strings` is a `list`, a `numpy.ndarray`
is returned if NumPy is installed; otherwise, a `list` is
returned.
"""
if isinstance(strings, str):
return SequenceMatcher(None, reference, strings).ratio()
gen = (SequenceMatcher(None, reference, s).ratio() for s in strings)
if FOUND_NUMPY:
return np.fromiter(gen, dtype=float, count=len(strings))
return list(gen)
[docs]
def levenshtein_ratio(
reference: str, strings: Union[str, list[str]]
) -> Union[float, list[float], "np.ndarray[float]"]:
"""
Compute the Levenshtein ratios, a measure of similarity, for
strings with respect to a reference string.
Parameters
----------
reference : `str`
Reference string.
strings : `str` or `list`
Strings to compare with `reference`.
Returns
-------
ratios : `float`, `list`, or `numpy.ndarray`
Levenshtein ratios. If `strings` is a `str`, a `float` is
returned. If `strings` is a `list`, a `numpy.ndarray` is
returned if NumPy is installed; otherwise, a `list` is returned.
"""
if not FOUND_LEVENSHTEIN:
emsg = (
"The Levenshtein module was not found, so "
"minim.utility.levenshtein_ratio() is unavailable."
)
raise ImportError(emsg)
if isinstance(strings, str):
return Levenshtein.ratio(reference, strings)
gen = (Levenshtein.ratio(reference, s) for s in strings)
if FOUND_NUMPY:
return np.fromiter(gen, dtype=float, count=len(strings))
return list(gen)