Skip to content

Commit

Permalink
Implement Hash.new(capacity:)
Browse files Browse the repository at this point in the history
[Feature #19236]

When building a large hash, pre-allocating it with enough
capacity can save many re-hashes and significantly improve
performance.

```
/opt/rubies/3.3.0/bin/ruby --disable=gems -rrubygems -I./benchmark/lib ./benchmark/benchmark-driver/exe/benchmark-driver \
	            --executables="compare-ruby::../miniruby-master -I.ext/common --disable-gem" \
	            --executables="built-ruby::./miniruby --disable-gem" \
	            --output=markdown --output-compare -v $(find ./benchmark -maxdepth 1 -name 'hash_new' -o -name '*hash_new*.yml' -o -name '*hash_new*.rb' | sort)
compare-ruby: ruby 3.4.0dev (2024-03-25T11:48:11Z master f53209f) +YJIT dev [arm64-darwin23]
last_commit=[ruby/irb] Cache RDoc::RI::Driver.new (ruby/irb#911)
built-ruby: ruby 3.4.0dev (2024-03-25T15:29:40Z hash-new-rb 77652b08a2) +YJIT dev [arm64-darwin23]
warming up...

|                    |compare-ruby|built-ruby|
|:-------------------|-----------:|---------:|
|new                 |      7.614M|    5.976M|
|                    |       1.27x|         -|
|new_with_capa_1k    |     13.931k|   15.698k|
|                    |           -|     1.13x|
|new_with_capa_100k  |     124.746|   148.283|
|                    |           -|     1.19x|
```
  • Loading branch information
byroot committed Mar 25, 2024
1 parent a31ca35 commit a74b891
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 47 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ lcov*.info
/config.status
/config.status.lineno
/configure
/confdefs.h
/conftest.c
/conftest.err
/coverage/simplecov
/coverage/simplecov-html
/coverage/doclie
Expand Down
16 changes: 16 additions & 0 deletions benchmark/hash_new.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
prelude: |
has_hash_with_capa = Hash.instance_method(:initialize).parameters.include?([:key, :capacity])
strings_1k = 1_000.times.map { |i| i.to_s.freeze }
strings_100k = 100_000.times.map { |i| i.to_s.freeze }
benchmark:
new: Hash.new
new_with_capa_1k: |
h = has_hash_with_capa ? Hash.new(capacity: strings_1k.size) : {}
strings_1k.each do |x|
h[x] = true
end
new_with_capa_100k: |
h = has_hash_with_capa ? Hash.new(capacity: strings_100k.size) : {}
strings_100k.each do |x|
h[x] = true
end
2 changes: 2 additions & 0 deletions common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -1200,6 +1200,7 @@ BUILTIN_RB_SRCS = \
$(srcdir)/trace_point.rb \
$(srcdir)/warning.rb \
$(srcdir)/array.rb \
$(srcdir)/hash.rb \
$(srcdir)/kernel.rb \
$(srcdir)/ractor.rb \
$(srcdir)/symbol.rb \
Expand Down Expand Up @@ -7781,6 +7782,7 @@ hash.$(OBJEXT): {$(VPATH)}debug_counter.h
hash.$(OBJEXT): {$(VPATH)}defines.h
hash.$(OBJEXT): {$(VPATH)}encoding.h
hash.$(OBJEXT): {$(VPATH)}hash.c
hash.$(OBJEXT): {$(VPATH)}hash.rbinc
hash.$(OBJEXT): {$(VPATH)}id.h
hash.$(OBJEXT): {$(VPATH)}id_table.h
hash.$(OBJEXT): {$(VPATH)}intern.h
Expand Down
64 changes: 18 additions & 46 deletions hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include "ruby/thread_native.h"
#include "ruby/ractor.h"
#include "vm_sync.h"
#include "builtin.h"

/* Flags of RHash
*
Expand Down Expand Up @@ -1762,59 +1763,29 @@ set_proc_default(VALUE hash, VALUE proc)
RHASH_SET_IFNONE(hash, proc);
}

/*
* call-seq:
* Hash.new(default_value = nil) -> new_hash
* Hash.new {|hash, key| ... } -> new_hash
*
* Returns a new empty +Hash+ object.
*
* The initial default value and initial default proc for the new hash
* depend on which form above was used. See {Default Values}[rdoc-ref:Hash@Default+Values].
*
* If neither an argument nor a block given,
* initializes both the default value and the default proc to <tt>nil</tt>:
* h = Hash.new
* h.default # => nil
* h.default_proc # => nil
*
* If argument <tt>default_value</tt> given but no block given,
* initializes the default value to the given <tt>default_value</tt>
* and the default proc to <tt>nil</tt>:
* h = Hash.new(false)
* h.default # => false
* h.default_proc # => nil
*
* If a block given but no argument, stores the block as the default proc
* and sets the default value to <tt>nil</tt>:
* h = Hash.new {|hash, key| "Default value for #{key}" }
* h.default # => nil
* h.default_proc.class # => Proc
* h[:nosuch] # => "Default value for nosuch"
*/

static VALUE
rb_hash_initialize(int argc, VALUE *argv, VALUE hash)
static void
rb_hash_init(VALUE hash, long capa, VALUE ifnone_unset, VALUE ifnone, VALUE block)
{
rb_hash_modify(hash);
RUBY_ASSERT(RHASH_SIZE(hash) == 0);
RUBY_ASSERT(RHASH_AR_TABLE_P(hash));

if (rb_block_given_p()) {
rb_check_arity(argc, 0, 0);
SET_PROC_DEFAULT(hash, rb_block_proc());
if (capa > 0) {
hash_st_table_init(hash, &objhash, capa);
}
else {
rb_check_arity(argc, 0, 1);

VALUE options, ifnone;
rb_scan_args(argc, argv, "01:", &ifnone, &options);
if (NIL_P(ifnone) && !NIL_P(options)) {
ifnone = options;
rb_warn_deprecated_to_remove("3.4", "Calling Hash.new with keyword arguments", "Hash.new({ key: value })");
if (!NIL_P(block)) {
if (ifnone_unset != Qtrue) {
rb_check_arity(1, 0, 0);
}
else {
SET_PROC_DEFAULT(hash, block);
}
}
else if (ifnone_unset != Qtrue) {
RHASH_SET_IFNONE(hash, ifnone);
}

return hash;
hash_verify(hash);
}

static VALUE rb_hash_to_a(VALUE hash);
Expand Down Expand Up @@ -7150,7 +7121,6 @@ Init_Hash(void)
rb_define_alloc_func(rb_cHash, empty_hash_alloc);
rb_define_singleton_method(rb_cHash, "[]", rb_hash_s_create, -1);
rb_define_singleton_method(rb_cHash, "try_convert", rb_hash_s_try_convert, 1);
rb_define_method(rb_cHash, "initialize", rb_hash_initialize, -1);
rb_define_method(rb_cHash, "initialize_copy", rb_hash_replace, 1);
rb_define_method(rb_cHash, "rehash", rb_hash_rehash, 0);

Expand Down Expand Up @@ -7477,3 +7447,5 @@ Init_Hash(void)

HASH_ASSERT(sizeof(ar_hint_t) * RHASH_AR_TABLE_MAX_SIZE == sizeof(VALUE));
}

#include "hash.rbinc"
48 changes: 48 additions & 0 deletions hash.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
class Hash
# call-seq:
# Hash.new(default_value = nil) -> new_hash
# Hash.new(default_value = nil, capacity: size) -> new_hash
# Hash.new {|hash, key| ... } -> new_hash
# Hash.new(capacity: size) {|hash, key| ... } -> new_hash
#
# Returns a new empty +Hash+ object.
#
# The initial default value and initial default proc for the new hash
# depend on which form above was used. See {Default Values}[rdoc-ref:Hash@Default+Values].
#
# If neither an argument nor a block is given,
# initializes both the default value and the default proc to <tt>nil</tt>:
# h = Hash.new
# h.default # => nil
# h.default_proc # => nil
#
# If argument <tt>default_value</tt> is given but no block is given,
# initializes the default value to the given <tt>default_value</tt>
# and the default proc to <tt>nil</tt>:
# h = Hash.new(false)
# h.default # => false
# h.default_proc # => nil
#
# If a block is given but no <tt>default_value</tt>, stores the block as the default proc
# and sets the default value to <tt>nil</tt>:
# h = Hash.new {|hash, key| "Default value for #{key}" }
# h.default # => nil
# h.default_proc.class # => Proc
# h[:nosuch] # => "Default value for nosuch"
#
# If both a block and a <tt>default_value</tt> are given, raises an +ArgumentError+
#
# If the optional keyword argument +capacity+ is given, the hash will be allocated
# with enough capacity to accomodate this many keys without having to be resized.
def initialize(ifnone = (ifnone_unset = true), capacity: 0, &block)
if Primitive.mandatory_only?
Primitive.attr! :leaf
else
b = block # TODO: builtin bug
Primitive.cstmt!(%{
rb_hash_init(self, NUM2LONG(capacity), ifnone_unset, ifnone, b);
return Qnil;
})
end
end
end
1 change: 1 addition & 0 deletions inits.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ rb_call_builtin_inits(void)
BUILTIN(pack);
BUILTIN(warning);
BUILTIN(array);
BUILTIN(hash);
BUILTIN(kernel);
BUILTIN(symbol);
BUILTIN(timev);
Expand Down
16 changes: 15 additions & 1 deletion spec/ruby/core/hash/new_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
-> { Hash.new(nil) { 0 } }.should raise_error(ArgumentError)
end

ruby_version_is "3.3" do
ruby_version_is "3.3"..."3.4" do
it "emits a deprecation warning if keyword arguments are passed" do
-> { Hash.new(unknown: true) }.should complain(
Regexp.new(Regexp.escape("Calling Hash.new with keyword arguments is deprecated and will be removed in Ruby 3.4; use Hash.new({ key: value }) instead"))
Expand All @@ -46,4 +46,18 @@
Hash.new({ unknown: true }).default.should == { unknown: true }
end
end

ruby_version_is "3.4" do
it "accepts a capacity: argument" do
Hash.new(5, capacity: 42).default.should == 5
Hash.new(capacity: 42).default.should == nil
(Hash.new(capacity: 42) { 1 }).default_proc.should_not == nil
end

it "raises an error if unknown keyword arguments are passed" do
-> { Hash.new(unknown: true) }.should raise_error(ArgumentError)
-> { Hash.new(1, unknown: true) }.should raise_error(ArgumentError)
-> { Hash.new(unknown: true) { 0 } }.should raise_error(ArgumentError)
end
end
end

0 comments on commit a74b891

Please sign in to comment.