Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Invalidate gem name using levenshtein distance for gems with ten million downloads #2037

Merged
merged 3 commits into from
Jul 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions app/models/gem_typo.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
require "rubygems/text"

class GemTypo
attr_reader :protected_gem

include Gem::Text

DOWNLOADS_THRESHOLD = 10_000_000
SIZE_THRESHOLD = 4

def initialize(rubygem_name)
@rubygem_name = rubygem_name.downcase
@distance_threshold = distance_threshold
end

def protected_typo?
return false if @rubygem_name.size < GemTypo::SIZE_THRESHOLD

protected_gems.each do |protected_gem|
distance = levenshtein_distance(@rubygem_name, protected_gem)
if distance <= @distance_threshold
@protected_gem = protected_gem
return true
end
end

false
end

private

def distance_threshold
@rubygem_name.size == GemTypo::SIZE_THRESHOLD ? 1 : 2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about to "memoize" this to not execute same call on every iteration?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved it to the initializer.

end

def protected_gems
Rubygem.joins(:gem_download)
.where("gem_downloads.count > ?", GemTypo::DOWNLOADS_THRESHOLD)
.where.not(name: @rubygem_name)
.pluck(:name)
end
end
8 changes: 8 additions & 0 deletions app/models/rubygem.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Rubygem < ApplicationRecord
uniqueness: { case_sensitive: false },
if: :needs_name_validation?
validate :blacklist_names_exclusion
validate :protected_gem_typo, on: :create

after_create :update_unresolved
before_destroy :mark_unresolved
Expand Down Expand Up @@ -308,6 +309,13 @@ def blacklist_names_exclusion
errors.add :name, "'#{name}' is a reserved gem name."
end

def protected_gem_typo
gem_typo = GemTypo.new(name)

return unless gem_typo.protected_typo?
errors.add :name, "'#{name}' is too close to typo-protected gem: #{gem_typo.protected_gem}"
end

def update_unresolved
Dependency.where(unresolved_name: name).find_each do |dependency|
dependency.update_resolved(self)
Expand Down
14 changes: 14 additions & 0 deletions test/functional/api/v1/rubygems_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,20 @@ def self.should_respond_to(format)
end
end

context "On POST to create with a protected gem name" do
setup do
above_downloads_thres = GemTypo::DOWNLOADS_THRESHOLD + 1
create(:rubygem, name: "best", downloads: above_downloads_thres)
post :create, body: gem_file("test-1.0.0.gem").read
end

should respond_with :forbidden
should "not register new gem" do
assert_equal 1, Rubygem.count
assert_equal "There was a problem saving your gem: Name 'test' is too close to typo-protected gem: best", @response.body
end
end

context "On POST to create for someone else's gem" do
setup do
@other_user = create(:user)
Expand Down
44 changes: 44 additions & 0 deletions test/unit/gem_typo_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
require "test_helper"

class GemTypoTest < ActiveSupport::TestCase
context "with above downloads threshold gem" do
setup do
above_downloads_thres = GemTypo::DOWNLOADS_THRESHOLD + 1
create(:rubygem, name: "four", downloads: above_downloads_thres)
end

should "return false for exact match" do
gem_typo = GemTypo.new("four")
assert_equal false, gem_typo.protected_typo?
end

should "return false for gem name size below protected threshold" do
gem_typo = GemTypo.new("fou")
assert_equal false, gem_typo.protected_typo?
end

context "size equals protected threshold" do
should "return true for one character distance" do
gem_typo = GemTypo.new("fous")
assert_equal true, gem_typo.protected_typo?
end

should "return false for two character distance" do
gem_typo = GemTypo.new("foss")
assert_equal false, gem_typo.protected_typo?
end
end

context "size above protected threshold" do
should "return true for two character distance" do
gem_typo = GemTypo.new("fourss")
assert_equal true, gem_typo.protected_typo?
end

should "return false for three characher distance" do
gem_typo = GemTypo.new("foursss")
assert_equal false, gem_typo.protected_typo?
end
end
end
end