查找最常见的哈希值

时间:2013-08-29 06:32:27

标签: ruby hash

我有以下哈希: h = Hash["a","foo", "b","bar", "c","foo"] 我想返回最常见的值,在本例中为foo。最有效的方法是什么?

this question类似,但适用于哈希。

6 个答案:

答案 0 :(得分:4)

您可以将值作为数组获取,然后插入您链接的解决方案。

h.values.group_by { |e| e }.values.max_by(&:size).first
#=> foo

答案 1 :(得分:3)

我们可以这样做:

h = Hash["a","foo", "b","bar", "c","foo", "d", "bar", 'e', 'foobar']
p h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
# >> "foo"

更新(比我的第一个解决方案慢)

h = Hash["a","foo", "b","bar", "c","foo"]
h.group_by { |_,v| v }.max_by{|_,v| v.size}.first
# >> "foo"

<强>基准

require 'benchmark'

def seanny123(h)
  h.values.group_by { |e| e }.values.max_by(&:size).first
end

def stefan(h)
  frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
  value, count = frequencies.max_by { |k, v| v }
  value
end

def yevgeniy_anfilofyev(h)
  h.group_by{|(_,v)| v }.sort_by{|(_,v)| v.size }[-1][0]
end

def acts_as_geek(h)
  v = h.values
  max = v.map {|i| v.count(i)}.max
  v.select {|i| v.count(i) == max}.uniq
end

def squiguy(h)
  v = h.values
  v.reduce do |memo, val|
    v.count(memo) > v.count(val) ? memo : val
  end
end

def babai1(h)
  h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
end

def babai2(h)
  h.group_by { |_,v| v }.max_by{|_,v| v.size}.first
end


def benchmark(h,n)
  Benchmark.bm(20) do |x|
    x.report("Seanny123")           { n.times { seanny123(h) } }
    x.report("Stefan")              { n.times { stefan(h) } }
    x.report("Yevgeniy Anfilofyev") { n.times { yevgeniy_anfilofyev(h) } }
    x.report("acts_as_geek")        { n.times { acts_as_geek(h) } }
    x.report("squiguy")             { n.times { squiguy(h) } }
    x.report("Babai1")               { n.times { babai1(h) } }
    x.report("Babai2")               { n.times { babai2(h) } }
  end
end

n = 10
h = {}
1000.times do |i|
  h["a#{i}"] = "foo"
  h["b#{i}"] = "bar"
  h["c#{i}"] = "foo"
end
benchmark(h, n)

<强>结果: -

                           user     system      total        real
Seanny123              0.020000   0.000000   0.020000 (  0.015550)
Stefan                 0.040000   0.000000   0.040000 (  0.044666)
Yevgeniy Anfilofyev    0.020000   0.000000   0.020000 (  0.023162)
acts_as_geek          16.160000   0.000000  16.160000 ( 16.223582)
squiguy               15.740000   0.000000  15.740000 ( 15.768917)
Babai1                 0.020000   0.000000   0.020000 (  0.015430)
Babai2                 0.020000   0.000000   0.020000 (  0.025711)

答案 2 :(得分:1)

您可以使用Enumerable#inject计算频率:

frequencies = h.inject(Hash.new(0)) { |h, (k,v)| h[v] += 1 ; h }
#=> {"foo"=>2, "bar"=>1}

Enumerable#each_with_object

frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
#=> {"foo"=>2, "bar"=>1}

Enumerable#max_by的最大值:

value, count = frequencies.max_by { |k, v| v }
#=> ["foo", 2]

value
#=> "foo"

答案 3 :(得分:1)

基准

使用小哈希:

n = 100000
h = Hash["a","foo", "b","bar", "c","foo"]
benchmark(h, n)

结果:

                           user     system      total        real
Seanny123              0.220000   0.000000   0.220000 (  0.222342)
Stefan                 0.260000   0.000000   0.260000 (  0.263583)
Yevgeniy Anfilofyev    0.350000   0.000000   0.350000 (  0.341685)
acts_as_geek           0.300000   0.000000   0.300000 (  0.306601)
squiguy                0.140000   0.000000   0.140000 (  0.139141)
Babai                  0.220000   0.000000   0.220000 (  0.218616)

使用大量哈希:

n = 10
h = {}
1000.times do |i|
  h["a#{i}"] = "foo"
  h["b#{i}"] = "bar"
  h["c#{i}"] = "foo"
end
benchmark(h, n)

结果:

                           user     system      total        real
Seanny123              0.060000   0.000000   0.060000 (  0.059068)
Stefan                 0.100000   0.000000   0.100000 (  0.100760)
Yevgeniy Anfilofyev    0.080000   0.000000   0.080000 (  0.080988)
acts_as_geek          97.020000   0.020000  97.040000 ( 97.072220)
squiguy               97.480000   0.020000  97.500000 ( 97.535130)
Babai                  0.050000   0.000000   0.050000 (  0.058653)

基准代码:

require 'benchmark'

def seanny123(h)
  h.values.group_by { |e| e }.values.max_by(&:size).first
end

def stefan(h)
  frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
  value, count = frequencies.max_by { |k, v| v }
  value
end

def yevgeniy_anfilofyev(h)
  h.group_by{|(_,v)| v }.sort_by{|(_,v)| v.size }[-1][0]
end

def acts_as_geek(h)
  v = h.values
  max = v.map {|i| v.count(i)}.max
  v.select {|i| v.count(i) == max}.uniq
end

def squiguy(h)
  v = h.values
  v.reduce do |memo, val|
    v.count(memo) > v.count(val) ? memo : val
  end
end

def babai(h)
  h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
end


def benchmark(h,n)
  Benchmark.bm(20) do |x|
    x.report("Seanny123")           { n.times { seanny123(h) } }
    x.report("Stefan")              { n.times { stefan(h) } }
    x.report("Yevgeniy Anfilofyev") { n.times { yevgeniy_anfilofyev(h) } }
    x.report("acts_as_geek")        { n.times { acts_as_geek(h) } }
    x.report("squiguy")             { n.times { squiguy(h) } }
    x.report("Babai")               { n.times { babai(h) } }
  end
end

答案 4 :(得分:0)

更新:请勿使用我的解决方案。我的电脑必须使用很多时钟周期来计算它。不知道功能方法在这里会如此缓慢。

如何使用Enumerable#reduce

h = Hash["a","foo", "b","bar", "c","foo"]
v = h.values
most = v.reduce do |memo, val|
         v.count(memo) > v.count(val) ? memo : val
       end
p most

作为警告,如果两个或更多个共享哈希中相同的“最大”计数值,则只返回一个值。如果你关心所有“最大”值,这不是一个使用的解决方案。

这是一个基准。

#!/usr/bin/env ruby

require 'benchmark'

MULT = 10
arr = []
letters = ("a".."z").to_a
10000.times do
  arr << letters.sample
end
10000.times do |i|
  h[i] = arr[i]
end

Benchmark.bm do |rep|
  rep.report("Seanny123") {
    MULT.times do
      h.values.group_by { |e| e }.values.max_by(&:size).first
    end
  }

  rep.report("squiguy") {
    MULT.times do
      v = h.values
      most = v.reduce do |memo, val|
        v.count(memo) > v.count(val) ? memo : val
      end
    end
  }

  rep.report("acts_as_geek") {
    MULT.times do
      v = h.values
      max = v.map {|i| v.count(i)}.max
      v.select {|i| v.count(i) == max}.uniq
    end
  }

  rep.report("Yevgeniy Anfilofyev") {
    MULT.times do
      h.group_by{|(_,v)| v }
       .sort_by{|(_,v)| v.size }[-1][0]
    end
  }

  rep.report("Stefan") {
    MULT.times do
      frequencies = h.inject(Hash.new(0)) { |h, (k,v)| h[v] += 1 ; h }
      value, count = frequencies.max_by { |k, v| v }
    end
  }

  rep.report("Babai") {
    MULT.times do
      h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
    end
  }

end

答案 5 :(得分:0)

使用group_by但没有valuessort_by

h.group_by{|(_,v)| v }
 .sort_by{|(_,v)| v.size }[-1][0]