diff --git a/CHANGELOG.md b/CHANGELOG.md index 744f477..40d3ac7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ CHANGELOG
============== +## 6.7.4 (2021-11-29) +--------------------- +- Update `editdistpy` dependency version +- Update `LevenshteinFast` and `DamerauOsaFast` to match the functionality of the `editdistpy` library + ## 6.7.3 (2021-11-27) --------------------- - Update `editdistpy` dependency version diff --git a/symspellpy/__init__.py b/symspellpy/__init__.py index 657d17d..6ca3af1 100644 --- a/symspellpy/__init__.py +++ b/symspellpy/__init__.py @@ -19,7 +19,7 @@ .. moduleauthor:: Wolf Garbe """ -__version__ = "6.7.3" +__version__ = "6.7.4" import logging import os diff --git a/symspellpy/editdistance.py b/symspellpy/editdistance.py index 859eab3..38ad297 100644 --- a/symspellpy/editdistance.py +++ b/symspellpy/editdistance.py @@ -445,21 +445,6 @@ def distance(self, string_1: str, string_2: str, max_distance: int) -> int: are equivalent, otherwise a positive number whose magnitude increases as difference between the strings increases. """ - if string_1 is None or string_2 is None: - return helpers.null_distance_results(string_1, string_2, max_distance) - if max_distance <= 0: - return 0 if string_1 == string_2 else -1 - max_distance = int(min(2 ** 31 - 1, max_distance)) - # if strings of different lengths, ensure shorter string is in string_1. - # This can result in a little faster speed by spending more time spinning - # just the inner loop during the main processing. - len_1 = len(string_1) - len_2 = len(string_2) - if len_1 > len_2: - string_2, string_1 = string_1, string_2 - len_2, len_1 = len_1, len_2 - if len_2 - len_1 > max_distance: - return -1 return levenshtein.distance(string_1, string_2, max_distance) @@ -483,19 +468,4 @@ def distance(self, string_1: str, string_2: str, max_distance: int) -> int: are equivalent, otherwise a positive number whose magnitude increases as difference between the strings increases. """ - if string_1 is None or string_2 is None: - return helpers.null_distance_results(string_1, string_2, max_distance) - if max_distance <= 0: - return 0 if string_1 == string_2 else -1 - max_distance = int(min(2 ** 31 - 1, max_distance)) - # if strings of different lengths, ensure shorter string is in string_1. - # This can result in a little faster speed by spending more time spinning - # just the inner loop during the main processing. - len_1 = len(string_1) - len_2 = len(string_2) - if len_1 > len_2: - string_2, string_1 = string_1, string_2 - len_2, len_1 = len_1, len_2 - if len_2 - len_1 > max_distance: - return -1 return damerau_osa.distance(string_1, string_2, max_distance) diff --git a/tests/benchmarks.ipynb b/tests/benchmarks.ipynb index 5576dc8..6a491e3 100644 --- a/tests/benchmarks.ipynb +++ b/tests/benchmarks.ipynb @@ -63,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -106,17 +106,17 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "214 µs ± 770 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", - "130 µs ± 538 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", - "192 µs ± 346 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", - "130 µs ± 369 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" + "219 µs ± 1.35 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", + "130 µs ± 692 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", + "195 µs ± 775 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", + "130 µs ± 925 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" ] } ], @@ -129,17 +129,17 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "20.5 ms ± 175 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", - "10.9 ms ± 217 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", - "18 ms ± 67 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", - "10.5 ms ± 125 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "21.8 ms ± 207 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", + "11 ms ± 223 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", + "19.1 ms ± 64.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", + "10.3 ms ± 49.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -152,17 +152,17 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "1.62 ms ± 2.94 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", - "1.62 ms ± 7.84 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", - "1.75 ms ± 90.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", - "1.65 ms ± 12.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + "1.64 ms ± 23.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", + "1.65 ms ± 19.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", + "1.63 ms ± 4.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", + "1.63 ms ± 3.91 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" ] } ],