Skip to content

Commit

Permalink
Improve performance (#437)
Browse files Browse the repository at this point in the history
* Don't use treemap for finding pages and file
* Reimplement global find
* Ensure File.canonical_pat prevents path traversal. Add test.
* GitAccess: improve #tree! performance
* Expect file mode to be an Integer
* Refactor Page and File to use Pathname
* file.rb: remove unneeded version.to_s call (see gollum/gollum#1972)
* Provide compatibility with latest minitest.
  • Loading branch information
dometto authored Aug 1, 2023
1 parent 0db2ff5 commit ef19cba
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 45 deletions.
2 changes: 1 addition & 1 deletion gemspec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def specification(version, default_adapter, platform = nil)
s.add_development_dependency 'kramdown', '~> 2.3'
s.add_development_dependency 'kramdown-parser-gfm', '~> 1.1.0'
s.add_development_dependency 'RedCloth', '~> 4.2.9'
s.add_development_dependency 'mocha', '~> 1.11'
s.add_development_dependency 'mocha', '~> 2.0'
s.add_development_dependency 'shoulda', '~> 4.0'
s.add_development_dependency 'wikicloth', '~> 0.8.3'
s.add_development_dependency 'bibtex-ruby', '~> 6.0'
Expand Down
4 changes: 2 additions & 2 deletions gollum-lib.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ require File.join(File.dirname(__FILE__), 'lib', 'gollum-lib', 'version.rb')
# This file needs to conditionally define the default adapter for MRI and Java, because this is the file that is included from the Gemfile.
# In addition, the default Java adapter needs to be defined in gollum-lib_java.gemspec beause that file is used to *build* the Java gem.
if RUBY_PLATFORM == 'java' then
default_adapter = ['gollum-rjgit_adapter', '~> 1.0']
default_adapter = ['gollum-rjgit_adapter', '~> 2.0']
else
default_adapter = ['gollum-rugged_adapter', '~> 2.0']
default_adapter = ['gollum-rugged_adapter', '~> 3.0']
end
Gem::Specification.new &specification(Gollum::Lib::VERSION, default_adapter)
2 changes: 1 addition & 1 deletion gollum-lib_java.gemspec
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
require File.join(File.dirname(__FILE__), 'gemspec.rb')
require File.join(File.dirname(__FILE__), 'lib', 'gollum-lib', 'version.rb')
default_adapter = ['gollum-rjgit_adapter', '~> 1.0']
default_adapter = ['gollum-rjgit_adapter', '~> 2.0']
Gem::Specification.new &specification(Gollum::Lib::VERSION, default_adapter, "java")
56 changes: 33 additions & 23 deletions lib/gollum-lib/file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,21 @@ class File

class << self

# For use with self.find: returns true if the given query corresponds to the in-repo path of the BlobEntry.
#
# query - The String path to match.
# entry - The BlobEntry to check against.
# global_match - (Not implemented for File, see Page.path_match)
# hyphened_tags - If true, replace spaces in match_path with hyphens.
# case_insensitive - If true, compare query and match_path case-insensitively
def path_match(query, entry, global_match = false, hyphened_tags = false, case_insensitive = false)
path_compare(query, ::File.join('/', entry.path), hyphened_tags, case_insensitive)
end
# Get a canonical path to a file.
# Ensures that the result is always under page_file_dir (prevents path traversal), if set.
# Removes leading slashes.
#
# path - One or more String path elements to join together. `nil` values are ignored.
# page_file_dir - kwarg String, default: nil
def canonical_path(*path, page_file_dir: nil)
prefix = Pathname.new('/') + page_file_dir.to_s
rest = Pathname.new('/').join(*path.compact).cleanpath.to_s[1..-1]
result = (prefix + rest).cleanpath.to_s[1..-1]
result.sub!(/^\/+/, '') if Gem.win_platform? # On Windows, Pathname#cleanpath will leave double slashes at the start of a path, so replace all (not just the first) leading slashes
result
end

# For use with self.path_match: returns true if 'query' and 'match_path' match, strictly or taking account of the following parameters:
# For use with self.find: returns true if 'query' and 'match_path' match, strictly or taking account of the following parameters:
# hyphened_tags - If true, replace spaces in match_path with hyphens.
# case_insensitive - If true, compare query and match_path case-insensitively
def path_compare(query, match_path, hyphened_tags, case_insensitive)
Expand All @@ -41,24 +44,31 @@ def path_compare(query, match_path, hyphened_tags, case_insensitive)
# version - The String version ID to find.
# try_on_disk - If true, try to return just a reference to a file
# that exists on the disk.
# global_match - If true, find a File matching path's filename, but not it's directory (so anywhere in the repo)
# global_match - If true, find a File matching path's filename, but not its directory (so anywhere in the repo)
#
# Returns a Gollum::File or nil if the file could not be found. Note
# that if you specify try_on_disk=true, you may or may not get a file
# for which on_disk? is actually true.
def self.find(wiki, path, version, try_on_disk = false, global_match = false)
map = wiki.tree_map_for(version.to_s)

query_path = Pathname.new(::File.join(['/', wiki.page_file_dir, path].compact)).cleanpath.to_s
query_path.sub!(/^\/\//, '/') if Gem.win_platform? # On Windows, Pathname#cleanpath will leave double slashes at the start of a path intact, so sub them out.
query_path = self.canonical_path(path, page_file_dir: wiki.page_file_dir)
dir, filename = Pathname.new(query_path).split
dir = dir.to_s

begin
entry = map.detect do |entry|
path_match(query_path, entry, global_match, wiki.hyphened_tag_lookup, wiki.case_insensitive_tag_lookup)
if global_match && self.respond_to?(:global_find) # Only implemented for Gollum::Page
return self.global_find(wiki, version, query_path, try_on_disk)
else
begin
root = wiki.commit_for(version)
return nil unless root
tree = dir == '.' ? root.tree : root.tree / dir
return nil unless tree
entry = tree.find_blob do |blob_name|
path_compare(filename.to_s, blob_name, wiki.hyphened_tag_lookup, wiki.case_insensitive_tag_lookup)
end
entry ? self.new(wiki, entry, dir, version, try_on_disk) : nil
rescue Gollum::Git::NoSuchShaFound
nil
end
entry ? self.new(wiki, entry.blob(wiki.repo), entry.dir, version, try_on_disk) : nil
rescue Gollum::Git::NoSuchShaFound
nil
end
end

Expand All @@ -74,7 +84,7 @@ def self.find(wiki, path, version, try_on_disk = false, global_match = false)
def initialize(wiki, blob, path, version, try_on_disk = false)
@wiki = wiki
@blob = blob
@path = "#{path}/#{blob.name}"[1..-1]
@path = self.class.canonical_path(path, blob.name)
@version = version.is_a?(Gollum::Git::Commit) ? version : @wiki.commit_for(version)
get_disk_reference if try_on_disk
end
Expand Down
10 changes: 3 additions & 7 deletions lib/gollum-lib/git_access.rb
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,11 @@ def tree!(sha)
items = []
tree.each do |entry|
if entry[:type] == 'blob'
items << BlobEntry.new(entry[:sha], entry[:path], entry[:size], entry[:mode].to_i(8))
next if @page_file_dir && !entry[:path].start_with?("#{@page_file_dir}/")
items << BlobEntry.new(entry[:sha], entry[:path], entry[:size], entry[:mode])
end
end
if (dir = @page_file_dir)
regex = /^#{dir}\//
items.select { |i| i.path =~ regex }
else
items
end
items
end

# Reads the content from the Git db at the given SHA.
Expand Down
35 changes: 26 additions & 9 deletions lib/gollum-lib/page.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,39 @@ class Page < Gollum::File
SUBPAGENAMES = [:header, :footer, :sidebar]

class << self
# For use with self.find: returns true if the given query corresponds to the in-repo path of the BlobEntry.
# For use with self.global_find: returns true if the given query corresponds to the in-repo path of the BlobEntry.
#
# query - The String path to match.
# entry - The BlobEntry to check against.
# global_match - If true, find a File matching path's filename, but not its directory (so anywhere in the repo)
def path_match(query, entry, global_match = false, hyphened_tags = false, case_insensitive = false)
def global_path_match(query, entry, hyphened_tags = false, case_insensitive = false)
return false if "#{entry.name}".empty?
return false unless valid_extension?(entry.name)
entry_name = valid_extension?(query) ? entry.name : strip_filename(entry.name)
match_path = ::File.join([
'/',
global_match ? nil : entry.dir,
entry_name
].compact)
match_path = Pathname.new('/').join(*[
entry.dir,
entry.name
].compact
).to_s
path_compare(query, match_path, hyphened_tags, case_insensitive)
end

def global_find(wiki, version, query, try_on_disk)
map = wiki.tree_map_for(version.to_s)
begin
entry = map.detect do |entry|
global_path_match(query, entry, wiki.hyphened_tag_lookup, wiki.case_insensitive_tag_lookup)
end
entry ? self.new(wiki, entry.blob(wiki.repo), entry.dir, version, try_on_disk) : nil
rescue Gollum::Git::NoSuchShaFound
nil
end
end

def path_compare(query, match_path, hyphened_tags, case_insensitive)
return false unless valid_extension?(match_path)
cmp = valid_extension?(query) ? match_path : strip_filename(match_path)
super(query, cmp, hyphened_tags, case_insensitive)
end

end

# Checks if a filename has a valid, registered extension
Expand Down
4 changes: 2 additions & 2 deletions test/helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
# external
require 'rubygems'
require 'shoulda'
require 'mocha/test_unit'
require 'minitest/reporters'
require 'mocha/test_unit'
require 'twitter_cldr'
require 'tempfile'

Expand All @@ -24,7 +24,7 @@
require 'i18n'
I18n.enforce_available_locales = false

MiniTest::Reporters.use!
Minitest::Reporters.use!

dir = File.dirname(File.expand_path(__FILE__))
$LOAD_PATH.unshift(File.join(dir, '..', 'lib'))
Expand Down
11 changes: 11 additions & 0 deletions test/test_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@
@wiki = Gollum::Wiki.new(testpath("examples/lotr.git"))
end

test 'canonical_path method does not allow path traversal exploit' do
page_file_dir = nil
dir = 'pages'
file = '../../'
assert_equal Gollum::File.canonical_path(dir, file, page_file_dir: page_file_dir), ''
page_file_dir = 'pages'
assert_equal Gollum::File.canonical_path(dir, file, page_file_dir: page_file_dir), 'pages'
# also removes leading slashes
assert_equal Gollum::File.canonical_path('/foo/bar', page_file_dir: page_file_dir), 'pages/foo/bar'
end

test "existing file" do
commit = @wiki.repo.commits.first
file = @wiki.file("Mordor/todo.txt")
Expand Down

0 comments on commit ef19cba

Please sign in to comment.