From 9563103d59daf2b6ecc262a27f44c2ca89736cce Mon Sep 17 00:00:00 2001 From: Jonathan Claudius Date: Fri, 7 Sep 2018 17:22:46 -0400 Subject: [PATCH 1/3] Add prototype model for GemTypo --- app/models/gem_typo.rb | 46 ++++++++++++++++++++++ test/unit/gem_typo_test.rb | 78 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 app/models/gem_typo.rb create mode 100644 test/unit/gem_typo_test.rb diff --git a/app/models/gem_typo.rb b/app/models/gem_typo.rb new file mode 100644 index 00000000000..0b266056003 --- /dev/null +++ b/app/models/gem_typo.rb @@ -0,0 +1,46 @@ +require 'rubygems/text' + +class GemTypo + PROTECTED_GEMS = [ + 'rspec-core', + 'diff-lcs', + 'rspec-expectations', + 'rspec-mocks', + 'rspec', + 'bundler', + 'rspec-support', + 'multi_json', + 'rack', + 'rake' + ].freeze + + DISTANCE_THRESHOLD = 1 + + GEM_EXCEPTIONS = [ + 'rspec-coreZ' + # Add exceptions here to manage gems which share a close distance, + # but are manually reviewed and accepted by rubygems team + ].freeze + + include Gem::Text + + def initialize(rubygem_name, opts = {}) + @rubygem_name = rubygem_name + @protected_gems = opts[:protected_gems] || GemTypo::PROTECTED_GEMS + @distance_threshold = opts[:distance_threshold] || GemTypo::DISTANCE_THRESHOLD + @gem_exceptions = opts[:gem_exceptions] || GemTypo::GEM_EXCEPTIONS + end + + def protected_typo? + @protected_gems.each do |protected_gem| + return false if @rubygem_name == protected_gem + distance = levenshtein_distance(@rubygem_name, protected_gem) + if distance <= @distance_threshold && + !@gem_exceptions.include?(@rubygem_name) + return true + end + end + + false + end +end diff --git a/test/unit/gem_typo_test.rb b/test/unit/gem_typo_test.rb new file mode 100644 index 00000000000..90ebbc41f9c --- /dev/null +++ b/test/unit/gem_typo_test.rb @@ -0,0 +1,78 @@ +require 'test_helper' +require 'gem_typo' + +class GemTypoTest < ActiveSupport::TestCase + teardown do + Rails.cache.clear + end + + should 'return false for exact match' do + gem_typo = GemTypo.new('rspec-core') + assert_equal false, gem_typo.protected_typo? + end + + should 'return true for 1 char distance match' do + gem_typo = GemTypo.new('rspec-core2') + assert_equal true, gem_typo.protected_typo? + end + + should 'return false for 2 char distance match' do + gem_typo = GemTypo.new('rspec-core12') + assert_equal false, gem_typo.protected_typo? + end + + should 'return false for 3 char distance match' do + gem_typo = GemTypo.new('rspec-core123') + assert_equal false, gem_typo.protected_typo? + end + + should 'return false for 1 char distance match on the exception list' do + gem_typo = GemTypo.new('rspec-coreZ') + assert_equal false, gem_typo.protected_typo? + end + + should 'allow customized protected_gems' do + opts = { + protected_gems: ["hello"] + } + + gem_typo = GemTypo.new('hello', opts) + assert_equal false, gem_typo.protected_typo? + + gem_typo = GemTypo.new('hello1', opts) + assert_equal true, gem_typo.protected_typo? + end + + should 'allow customized distance_threshold' do + opts = { + distance_threshold: 3 + } + + gem_typo = GemTypo.new('rack', opts) + assert_equal false, gem_typo.protected_typo? + + gem_typo = GemTypo.new('rack1', opts) + assert_equal true, gem_typo.protected_typo? + + gem_typo = GemTypo.new('rack12', opts) + assert_equal true, gem_typo.protected_typo? + + gem_typo = GemTypo.new('rack123', opts) + assert_equal true, gem_typo.protected_typo? + + gem_typo = GemTypo.new('rack1234', opts) + assert_equal false, gem_typo.protected_typo? + end + + should 'allow customized protected_gem_exceptions' do + opts = { + gem_exceptions: ["rake1"] + } + + gem_typo = GemTypo.new('rake', opts) + assert_equal false, gem_typo.protected_typo? + + gem_typo = GemTypo.new('rake1', opts) + assert_equal false, gem_typo.protected_typo? + end +end From f667870698b0d082311701e168c129bf2b8d4834 Mon Sep 17 00:00:00 2001 From: Jonathan Claudius Date: Fri, 7 Sep 2018 18:40:21 -0400 Subject: [PATCH 2/3] Add protected_gem_typo_protection to rubygem validation --- app/models/rubygem.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app/models/rubygem.rb b/app/models/rubygem.rb index 9388d0fe95e..21ddee42a6e 100644 --- a/app/models/rubygem.rb +++ b/app/models/rubygem.rb @@ -18,6 +18,7 @@ class Rubygem < ApplicationRecord uniqueness: { case_sensitive: false }, if: :needs_name_validation? validate :blacklist_names_exclusion + validate :protected_gem_typo_protection after_create :update_unresolved before_destroy :mark_unresolved @@ -308,6 +309,12 @@ def blacklist_names_exclusion errors.add :name, "'#{name}' is a reserved gem name." end + def protected_gem_typo_protection + gem_typo = GemTypo.new(name) + return unless gem_typo.protected_typo? + errors.add :name, "'#{name}' is too close to a typo-protected gem." + end + def update_unresolved Dependency.where(unresolved_name: name).find_each do |dependency| dependency.update_resolved(self) From 5cd2749fbe5fd53193aaa474a1444760252962b1 Mon Sep 17 00:00:00 2001 From: Aditya Prakash Date: Sun, 23 Jun 2019 16:29:38 +0530 Subject: [PATCH 3/3] Update gem typo logic and tests Use downloads count as threshold instead of static list. Limit validation to new gems. Update distance threshold as mentioned in segiddins PR. Existing gem we may consider blocking: https://gist.github.com/sonalkr132/af05b030af793ce17a69245152d5aa5f total: 4859 (2.98%) --- app/models/gem_typo.rb | 56 ++++----- app/models/rubygem.rb | 7 +- .../api/v1/rubygems_controller_test.rb | 14 +++ test/unit/gem_typo_test.rb | 114 ++++++------------ 4 files changed, 84 insertions(+), 107 deletions(-) diff --git a/app/models/gem_typo.rb b/app/models/gem_typo.rb index 0b266056003..d0585e286cc 100644 --- a/app/models/gem_typo.rb +++ b/app/models/gem_typo.rb @@ -1,46 +1,42 @@ -require 'rubygems/text' +require "rubygems/text" class GemTypo - PROTECTED_GEMS = [ - 'rspec-core', - 'diff-lcs', - 'rspec-expectations', - 'rspec-mocks', - 'rspec', - 'bundler', - 'rspec-support', - 'multi_json', - 'rack', - 'rake' - ].freeze - - DISTANCE_THRESHOLD = 1 - - GEM_EXCEPTIONS = [ - 'rspec-coreZ' - # Add exceptions here to manage gems which share a close distance, - # but are manually reviewed and accepted by rubygems team - ].freeze + attr_reader :protected_gem include Gem::Text - def initialize(rubygem_name, opts = {}) - @rubygem_name = rubygem_name - @protected_gems = opts[:protected_gems] || GemTypo::PROTECTED_GEMS - @distance_threshold = opts[:distance_threshold] || GemTypo::DISTANCE_THRESHOLD - @gem_exceptions = opts[:gem_exceptions] || GemTypo::GEM_EXCEPTIONS + DOWNLOADS_THRESHOLD = 10_000_000 + SIZE_THRESHOLD = 4 + + def initialize(rubygem_name) + @rubygem_name = rubygem_name.downcase + @distance_threshold = distance_threshold end def protected_typo? - @protected_gems.each do |protected_gem| - return false if @rubygem_name == protected_gem + return false if @rubygem_name.size < GemTypo::SIZE_THRESHOLD + + protected_gems.each do |protected_gem| distance = levenshtein_distance(@rubygem_name, protected_gem) - if distance <= @distance_threshold && - !@gem_exceptions.include?(@rubygem_name) + if distance <= @distance_threshold + @protected_gem = protected_gem return true end end false end + + private + + def distance_threshold + @rubygem_name.size == GemTypo::SIZE_THRESHOLD ? 1 : 2 + end + + def protected_gems + Rubygem.joins(:gem_download) + .where("gem_downloads.count > ?", GemTypo::DOWNLOADS_THRESHOLD) + .where.not(name: @rubygem_name) + .pluck(:name) + end end diff --git a/app/models/rubygem.rb b/app/models/rubygem.rb index 21ddee42a6e..49f6b9ddce5 100644 --- a/app/models/rubygem.rb +++ b/app/models/rubygem.rb @@ -18,7 +18,7 @@ class Rubygem < ApplicationRecord uniqueness: { case_sensitive: false }, if: :needs_name_validation? validate :blacklist_names_exclusion - validate :protected_gem_typo_protection + validate :protected_gem_typo, on: :create after_create :update_unresolved before_destroy :mark_unresolved @@ -309,10 +309,11 @@ def blacklist_names_exclusion errors.add :name, "'#{name}' is a reserved gem name." end - def protected_gem_typo_protection + def protected_gem_typo gem_typo = GemTypo.new(name) + return unless gem_typo.protected_typo? - errors.add :name, "'#{name}' is too close to a typo-protected gem." + errors.add :name, "'#{name}' is too close to typo-protected gem: #{gem_typo.protected_gem}" end def update_unresolved diff --git a/test/functional/api/v1/rubygems_controller_test.rb b/test/functional/api/v1/rubygems_controller_test.rb index 0a9f3076502..2a2108fb394 100644 --- a/test/functional/api/v1/rubygems_controller_test.rb +++ b/test/functional/api/v1/rubygems_controller_test.rb @@ -308,6 +308,20 @@ def self.should_respond_to(format) end end + context "On POST to create with a protected gem name" do + setup do + above_downloads_thres = GemTypo::DOWNLOADS_THRESHOLD + 1 + create(:rubygem, name: "best", downloads: above_downloads_thres) + post :create, body: gem_file("test-1.0.0.gem").read + end + + should respond_with :forbidden + should "not register new gem" do + assert_equal 1, Rubygem.count + assert_equal "There was a problem saving your gem: Name 'test' is too close to typo-protected gem: best", @response.body + end + end + context "On POST to create for someone else's gem" do setup do @other_user = create(:user) diff --git a/test/unit/gem_typo_test.rb b/test/unit/gem_typo_test.rb index 90ebbc41f9c..7fa51811d95 100644 --- a/test/unit/gem_typo_test.rb +++ b/test/unit/gem_typo_test.rb @@ -1,78 +1,44 @@ -require 'test_helper' -require 'gem_typo' +require "test_helper" class GemTypoTest < ActiveSupport::TestCase - teardown do - Rails.cache.clear - end - - should 'return false for exact match' do - gem_typo = GemTypo.new('rspec-core') - assert_equal false, gem_typo.protected_typo? - end - - should 'return true for 1 char distance match' do - gem_typo = GemTypo.new('rspec-core2') - assert_equal true, gem_typo.protected_typo? - end - - should 'return false for 2 char distance match' do - gem_typo = GemTypo.new('rspec-core12') - assert_equal false, gem_typo.protected_typo? - end - - should 'return false for 3 char distance match' do - gem_typo = GemTypo.new('rspec-core123') - assert_equal false, gem_typo.protected_typo? - end - - should 'return false for 1 char distance match on the exception list' do - gem_typo = GemTypo.new('rspec-coreZ') - assert_equal false, gem_typo.protected_typo? - end - - should 'allow customized protected_gems' do - opts = { - protected_gems: ["hello"] - } - - gem_typo = GemTypo.new('hello', opts) - assert_equal false, gem_typo.protected_typo? - - gem_typo = GemTypo.new('hello1', opts) - assert_equal true, gem_typo.protected_typo? - end - - should 'allow customized distance_threshold' do - opts = { - distance_threshold: 3 - } - - gem_typo = GemTypo.new('rack', opts) - assert_equal false, gem_typo.protected_typo? - - gem_typo = GemTypo.new('rack1', opts) - assert_equal true, gem_typo.protected_typo? - - gem_typo = GemTypo.new('rack12', opts) - assert_equal true, gem_typo.protected_typo? - - gem_typo = GemTypo.new('rack123', opts) - assert_equal true, gem_typo.protected_typo? - - gem_typo = GemTypo.new('rack1234', opts) - assert_equal false, gem_typo.protected_typo? - end - - should 'allow customized protected_gem_exceptions' do - opts = { - gem_exceptions: ["rake1"] - } - - gem_typo = GemTypo.new('rake', opts) - assert_equal false, gem_typo.protected_typo? - - gem_typo = GemTypo.new('rake1', opts) - assert_equal false, gem_typo.protected_typo? + context "with above downloads threshold gem" do + setup do + above_downloads_thres = GemTypo::DOWNLOADS_THRESHOLD + 1 + create(:rubygem, name: "four", downloads: above_downloads_thres) + end + + should "return false for exact match" do + gem_typo = GemTypo.new("four") + assert_equal false, gem_typo.protected_typo? + end + + should "return false for gem name size below protected threshold" do + gem_typo = GemTypo.new("fou") + assert_equal false, gem_typo.protected_typo? + end + + context "size equals protected threshold" do + should "return true for one character distance" do + gem_typo = GemTypo.new("fous") + assert_equal true, gem_typo.protected_typo? + end + + should "return false for two character distance" do + gem_typo = GemTypo.new("foss") + assert_equal false, gem_typo.protected_typo? + end + end + + context "size above protected threshold" do + should "return true for two character distance" do + gem_typo = GemTypo.new("fourss") + assert_equal true, gem_typo.protected_typo? + end + + should "return false for three characher distance" do + gem_typo = GemTypo.new("foursss") + assert_equal false, gem_typo.protected_typo? + end + end end end