diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66fd13c --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..1cdabc6 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,46 @@ +run: + concurrency: 4 + tests: true + +linters-settings: + errcheck: + check-type-assertions: false + check-blank: true + govet: + check-shadowing: true + golint: + min-confidence: 0.8 + goimports: + local-prefixes: github.com/rohitsubedi/levenshtein + gocyclo: + min-complexity: 10 + maligned: + suggest-new: true + dupl: + threshold: 75 + goconst: + min-len: 3 + min-occurrences: 3 + depguard: + list-type: blacklist + include-go-root: false + misspell: + locale: US + ignore-words: + - someword + lll: + line-length: 120 + tab-width: 1 + unused: + check-exported: false + unparam: + check-exported: false + nakedret: + max-func-lines: 20 + prealloc: + simple: true + range-loops: true # Report preallocation suggestions on range loops, true by default + for-loops: false # Report preallocation suggestions on for loops, false by default +linters: + enable-all: true + fast: false diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..65c2f36 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,19 @@ +language: go + +go_import_path: github.com/rohitsubedi/levenshtein + +before_install: + - export GO111MODULE=on + +go: + - 1.13.x + - 1.14.x + - 1.15.x + - tip + +install: + - go get github.com/stretchr/testify@v1.6.1 + - go get github.com/golangci/golangci-lint/cmd/golangci-lint + +script: + - make check diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9f88fb8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Rohit Subedi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d7100b3 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +check: test lint + +test: ## Runs the unit tests. + @go test $(PKG_NAME) + +lint: ## Runs the linter. + @golangci-lint run diff --git a/README.md b/README.md new file mode 100644 index 0000000..6244db3 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# levenshtein \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..1ad945e --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/rohitsubedi/levenshtein + +go 1.15 + +require github.com/stretchr/testify v1.6.1 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..afe7890 --- /dev/null +++ b/go.sum @@ -0,0 +1,11 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/levenshtein.go b/levenshtein.go new file mode 100644 index 0000000..1670c5f --- /dev/null +++ b/levenshtein.go @@ -0,0 +1,71 @@ +package levenshtein + +const increment = 1 + +func GetLevenshteinDistance(str1, str2 []rune, caseSensitive bool) int { + rows := len(str2) + cols := len(str1) + + // If one of the given string is empty, Levenshtein distance is the max length of two string + if rows == 0 || cols == 0 { + return max(rows, cols) + } + + // Minimize space complexity. O(min(str1, str2) instead of O(str1) + if cols > rows { + rows, cols = cols, rows + str1, str2 = str2, str1 + } + + dp := make([]uint16, cols+increment) + + for i := 0; i <= rows; i++ { + left := uint16(i) + + for j := 1; j <= cols; j++ { + if i == 0 { + dp[j], left = uint16(j), dp[j] + } else { + diag, top := dp[j-1], dp[j] + + if !isSameChar(str2[i-1], str1[j-1], caseSensitive) { + diag = min(left, min(diag, top)) + uint16(increment) + } + + dp[j-1], left = left, diag + } + } + + dp[cols] = left + } + + return int(dp[cols]) +} + +func isSameChar(c1, c2 rune, caseSensitive bool) bool { + if c1 == c2 { + return true + } + + if caseSensitive { + return false + } + + return c1 == c2-32 || c1-32 == c2 +} + +func min(a, b uint16) uint16 { + if a <= b { + return a + } + + return b +} + +func max(a, b int) int { + if a >= b { + return a + } + + return b +} diff --git a/test/levenshtein_test.go b/test/levenshtein_test.go new file mode 100644 index 0000000..1d726bd --- /dev/null +++ b/test/levenshtein_test.go @@ -0,0 +1,95 @@ +package test_test + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/rohitsubedi/levenshtein" +) + +func TestMinOperationToConvertOneStringToAnother1(t *testing.T) { + destString := "DE-2341-00001" + // For same strings + minOperation := levenshtein.GetLevenshteinDistance([]rune(destString), []rune(destString), false) + assert.Equal(t, 0, minOperation) + // For same strings but different case(lower case) + minOperation = levenshtein.GetLevenshteinDistance([]rune(strings.ToLower(destString)), []rune(destString), false) + assert.Equal(t, 0, minOperation) + // For string with 1 char missing + minOperation = levenshtein.GetLevenshteinDistance([]rune("D-2341-00001"), []rune(destString), false) + assert.Equal(t, 1, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-231-00001"), []rune(destString), false) + assert.Equal(t, 1, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-2341-0001"), []rune(destString), false) + assert.Equal(t, 1, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-234100001"), []rune(destString), false) + assert.Equal(t, 1, minOperation) + // For string with 1 extra char + minOperation = levenshtein.GetLevenshteinDistance([]rune("DEE-2341-00001"), []rune(destString), false) + assert.Equal(t, 1, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-23441-00001"), []rune(destString), false) + assert.Equal(t, 1, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-2341-000001"), []rune(destString), false) + assert.Equal(t, 1, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-2341--00001"), []rune(destString), false) + assert.Equal(t, 1, minOperation) + // For string with 2 char missing + minOperation = levenshtein.GetLevenshteinDistance([]rune("-2341-00001"), []rune(destString), false) + assert.Equal(t, 2, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-21-00001"), []rune(destString), false) + assert.Equal(t, 2, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-2341-000"), []rune(destString), false) + assert.Equal(t, 2, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE234100001"), []rune(destString), false) + assert.Equal(t, 2, minOperation) +} + +func TestMinOperationToConvertOneStringToAnother2(t *testing.T) { + destString := "DE-2341-00001" + // For string with 2 extra char + minOperation := levenshtein.GetLevenshteinDistance([]rune("DEE--2341-00001"), []rune(destString), false) + assert.Equal(t, 2, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-2344-1-00001"), []rune(destString), false) + assert.Equal(t, 2, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-2341-0000199"), []rune(destString), false) + assert.Equal(t, 2, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE--2341--00001"), []rune(destString), false) + assert.Equal(t, 2, minOperation) + // For string with more than 2 char missing + minOperation = levenshtein.GetLevenshteinDistance([]rune("2341-00001"), []rune(destString), false) + assert.Equal(t, 3, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE-34-001"), []rune(destString), false) + assert.Equal(t, 4, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DE34-001"), []rune(destString), false) + assert.Equal(t, 5, minOperation) + // For string with more than 2 extra char + minOperation = levenshtein.GetLevenshteinDistance([]rune("DEE--23341-00001"), []rune(destString), false) + assert.Equal(t, 3, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DEE--23341-000001"), []rune(destString), false) + assert.Equal(t, 4, minOperation) + minOperation = levenshtein.GetLevenshteinDistance([]rune("DEE--233414-000001"), []rune(destString), false) + assert.Equal(t, 5, minOperation) + // Test case sensitive + minOperation = levenshtein.GetLevenshteinDistance([]rune("ROHIT"), []rune("rohit"), true) + assert.Equal(t, 5, minOperation) + // Test case in sensitive + minOperation = levenshtein.GetLevenshteinDistance([]rune("ROHIT"), []rune("rohit"), false) + assert.Equal(t, 0, minOperation) + // UTF-8 characters + minOperation = levenshtein.GetLevenshteinDistance([]rune("rohit"), []rune("röhit"), false) + assert.Equal(t, 1, minOperation) + // Test empty string + minOperation = levenshtein.GetLevenshteinDistance([]rune("ROHIT"), []rune(""), true) + assert.Equal(t, 5, minOperation) + // Test empty string + minOperation = levenshtein.GetLevenshteinDistance([]rune("ö"), []rune(""), true) + assert.Equal(t, 1, minOperation) + // Test case in sensitive + minOperation = levenshtein.GetLevenshteinDistance([]rune("ROHIT"), []rune("tihor"), false) + assert.Equal(t, 4, minOperation) + // UTF-8 characters same + minOperation = levenshtein.GetLevenshteinDistance([]rune("röhit"), []rune("röhit"), false) + assert.Equal(t, 0, minOperation) +}