Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite merge helpers to work with frozen objects #558

Merged
merged 10 commits into from
Mar 5, 2017
2 changes: 1 addition & 1 deletion lib/simplecov.rb
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def usable?
require "simplecov/filter"
require "simplecov/formatter"
require "simplecov/last_run"
require "simplecov/merge_helpers"
require "simplecov/raw_coverage"
require "simplecov/result_merger"
require "simplecov/command_guesser"
require "simplecov/version"
Expand Down
37 changes: 0 additions & 37 deletions lib/simplecov/merge_helpers.rb

This file was deleted.

38 changes: 38 additions & 0 deletions lib/simplecov/raw_coverage.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
module SimpleCov
module RawCoverage
module_function

# Merges multiple Coverage.result hashes
def merge_results(*results)
results.reduce({}) do |result, merged|
merge_resultsets(result, merged)
end
end

# Merges two Coverage.result hashes
def merge_resultsets(result1, result2)
(result1.keys | result2.keys).each_with_object({}) do |filename, merged|
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the usage of | here

file1 = result1[filename]
file2 = result2[filename]
merged[filename] = merge_file_coverage(file1, file2)
end
end

def merge_file_coverage(file1, file2)
return (file1 || file2).dup unless file1 && file2

file1.zip(file2).map do |count1, count2|
Copy link
Collaborator

@bf4 bf4 Feb 17, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder about zip vs. map.with_index

GC.disable
a = Array.new(100) { 1 }.freeze; b = Array.new(100) { 2 }.freeze

count = GC.stat[:total_allocated_objects]; a.zip(b).map {|l,r| l + r }.tap { puts GC.stat[:total_allocated_objects] - count }
# 104
# => [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]

count = GC.stat[:total_allocated_objects]; a.map.with_index {|i,index| i + b[index] }.tap { puts GC.stat[:total_allocated_objects] - count }
# 5
# => [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
require 'benchmark/ips'
Benchmark.ips do |x| x.report('zip.map') { a.zip(b).map {|l,r| l + r } }; x.report('map.with_index') { a.map.with_index {|i,index| i + b[index] } }; x.compare! end

GC disabled

Warming up --------------------------------------
             zip.map     5.039k i/100ms
      map.with_index     7.615k i/100ms
Calculating -------------------------------------
             zip.map     40.456k (±10.8%) i/s -    201.560k in   5.040494s
      map.with_index     76.949k (± 2.9%) i/s -    388.365k in   5.051459s

Comparison:
      map.with_index:    76949.5 i/s
             zip.map:    40455.6 i/s - 1.90x  slower


GC enabled

Warming up --------------------------------------
             zip.map     5.860k i/100ms
      map.with_index     7.899k i/100ms
Calculating -------------------------------------
             zip.map     59.204k (± 3.4%) i/s -    298.860k in   5.053878s
      map.with_index     80.775k (± 3.0%) i/s -    410.748k in   5.089637s

Comparison:
      map.with_index:    80774.6 i/s
             zip.map:    59204.5 i/s - 1.36x  slower


Copy link
Collaborator

@bf4 bf4 Feb 17, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like previous behavior was each_with_index

-    def merge_resultset(array)
 -      new_array = dup
 -      array.each_with_index do |element, i|
 -        pair = [element, new_array[i]]
 -        new_array[i] = if pair.any?(&:nil?) && pair.map(&:to_i).all?(&:zero?)
 -                         nil
 -                       else
 -                         element.to_i + new_array[i].to_i
 -                       end
 -      end
 -      new_array
 -    end
 -  end
 Benchmark.ips do |x| x.report('zip.map') { a.zip(b).map {|l,r| l + r } }; x.report('map.with_index') { a.map.with_index {|i,index| i + b[index] } }; x.report('each_with_index') do |times| new_array = []; i = 0; while i < times; a.each_with_index {|i,index| new_array << i + b[index] }; i = i + 1; end; end;  x.compare! end


Warming up --------------------------------------
             zip.map          5.909k i/100ms
      map.with_index     7.878k i/100ms
     each_with_index     7.434k i/100ms
Calculating -------------------------------------
             zip.map     60.240k (± 3.2%) i/s -    301.359k in   5.007900s
      map.with_index     80.260k (± 5.7%) i/s -    401.778k in   5.025614s
     each_with_index     84.993k (± 5.0%) i/s -    423.738k in   5.000672s

Comparison:
     each_with_index:    84993.4 i/s
      map.with_index:    80260.1 i/s - same-ish: difference falls within error
             zip.map:    60239.5 i/s - 1.41x  slower

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good digging into that, thanks! :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we can have the best of both worlds by using lazy.zip.map:

bench.rb
require "benchmark/ips"

a = Array.new(100) { 1 }.freeze
b = Array.new(100) { 2 }.freeze

Benchmark.ips do |x|
  x.report('lazy.zip.map') { a.lazy.zip(b).map {|l,r| l + r } }
  x.report('zip.map') { a.zip(b).map {|l,r| l + r } }
  x.report('map.with_index') { a.map.with_index {|i,index| i + b[index] } }
  x.report('each_with_index') do |times|
    new_array = []
    i = 0
    while i < times
      a.each_with_index {|i,index| new_array << i + b[index] }
      i = i + 1
    end
  end
  x.compare!
end
Warming up --------------------------------------
        lazy.zip.map    17.413k i/100ms
             zip.map     7.960k i/100ms
      map.with_index    10.779k i/100ms
     each_with_index     9.897k i/100ms
Calculating -------------------------------------
        lazy.zip.map    186.025k (± 3.4%) i/s -    940.302k in   5.060879s
             zip.map     81.511k (± 4.0%) i/s -    413.920k in   5.087448s
      map.with_index    110.948k (± 3.4%) i/s -    560.508k in   5.058655s
     each_with_index    115.360k (± 2.2%) i/s -    583.923k in   5.064205s

Comparison:
        lazy.zip.map:   186024.9 i/s
     each_with_index:   115360.3 i/s - 1.61x  slower
      map.with_index:   110947.8 i/s - 1.68x  slower
             zip.map:    81510.6 i/s - 2.28x  slower

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops, that was so lazy that it didn't even create a result array. A correct implementation (with a .to_a on the end) is slower:

Warming up --------------------------------------
   lazy.zip.map.to_a     1.450k i/100ms
             zip.map     7.869k i/100ms
      map.with_index    10.748k i/100ms
     each_with_index     9.976k i/100ms
Calculating -------------------------------------
   lazy.zip.map.to_a     13.688k (±14.1%) i/s -     68.150k in   5.097768s
             zip.map     79.369k (± 8.6%) i/s -    393.450k in   5.005312s
      map.with_index    110.505k (± 4.0%) i/s -    558.896k in   5.067126s
     each_with_index    114.300k (± 4.2%) i/s -    578.608k in   5.071720s

Comparison:
     each_with_index:   114300.5 i/s
      map.with_index:   110505.2 i/s - same-ish: difference falls within error
             zip.map:    79368.7 i/s - 1.44x  slower
   lazy.zip.map.to_a:    13688.2 i/s - 8.35x  slower

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've reworked this to use map.with_index. Thanks for the attention to performance!

merge_line_coverage(count1, count2)
end
end

def merge_line_coverage(count1, count2)
sum = count1.to_i + count2.to_i
if sum.zero? && (count1.nil? || count2.nil?)
nil
else
sum
end
end
end
end
1 change: 0 additions & 1 deletion lib/simplecov/result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ class Result
# Initialize a new SimpleCov::Result from given Coverage.result (a Hash of filenames each containing an array of
# coverage data)
def initialize(original_result)
original_result = original_result.dup.extend(SimpleCov::HashMergeHelper) unless original_result.is_a? SimpleCov::HashMergeHelper
@original_result = original_result.freeze
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

@files = SimpleCov::FileList.new(original_result.map do |filename, coverage|
Copy link
Collaborator

@bf4 bf4 Feb 27, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good. In my own testing I found that the files list wasn't set correctly if I was trying to load and merge the results from another machine. Probably good for a future pr... but doesn't work with html formatter because it tries to read each file to overlay coverage in report source_file.rb

 def src
      # We intentionally read source code lazily to
      # suppress reading unused source code.
      @src ||= File.open(filename, "rb", &:readlines)
    end
require 'simplecov'
require 'json'
SimpleCov.coverage_dir 'coveragetest'
SimpleCov.formatters = [SimpleCov::Formatter::SimpleFormatter]
sourcefiles = Dir.glob('./resultset*') 
# work around  `if File.file?(filename)` when running on other filesystem
add_files = ->(sr) { files = sr.original_result.map {|filename, coverage| SimpleCov::SourceFile.new(filename, coverage) }.compact.sort_by(&:filename); sr.instance_variable_set(:@files, SimpleCov::FileList.new(files)); sr }

results = sourcefiles.map { |file| SimpleCov::Result.from_hash(JSON.parse(File.read(file))) }.each do |result|
  add_files.(result)
end; nil
result = SimpleCov::ResultMerger.merge_results(*results)
add_files.(result)
result.format!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so erm, I don't fully understand. Is this good to merge now or is this a bigger problem? (first you said it's probably good for another PR but then you said it doesn't work with the html formatter, so I'm a bit confused)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@PragTob good to merge. The 'good for another pr', would be a way to merge files not represented on the current file system, since that's out of scope. This works fine when running on the same filesystem (or if you were to munge the paths).

SimpleCov::SourceFile.new(filename, coverage) if File.file?(filename)
Expand Down
20 changes: 12 additions & 8 deletions lib/simplecov/result_merger.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,24 @@ def results
results
end

# Merge two or more SimpleCov::Results into a new one with merged
# coverage data and the command_name for the result consisting of a join
# on all source result's names
def merge_results(*results)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since there's already a results method and it is passed to this method as the argument, perhaps we can avoid possible which-results-are-we-using by changing the argument name? *resultsets? maybe?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right that there could be some confusion. I'm a little wary of calling this resultsets because this is an array of SimpleCov::Results, not an array of .resultset.json-type data. That actually seems like a problem with RawCoverage.merge_resultsets too. But I'm not super familiar with SimpleCov's terminology, so maybe resultsets is an OK name here?

Without any concern for backward compatibility, I'd be tempted to rename ResultMerger.results to ResultMerger.stored_results, and leave the argument here as *results.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bf4 Let me know how you'd like to proceed here. ❤️

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think I'd like to break backwards compatibility. I mean I wouldn't really consider it a public API of this gem, but from what I've seen I think some people have fiddled with this to merge results from multiple sources themselves so I wouldn't wanna break it for them.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@PragTob I listed a few in #558 (review)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@PragTob @aroben how about

def merge_results(*results = results())

this is something that I know has been done in Rails at Matz's recommendation when the default value for a param is a method by the same name

merged = SimpleCov::RawCoverage.merge_results(*results.map(&:original_result))
result = SimpleCov::Result.new(merged)
# Specify the command name
result.command_name = results.map(&:command_name).sort.join(", ")
result
end

#
# Gets all SimpleCov::Results from cache, merges them and produces a new
# SimpleCov::Result with merged coverage data and the command_name
# for the result consisting of a join on all source result's names
#
def merged_result
merged = {}
results.each do |result|
merged = result.original_result.merge_resultset(merged)
end
result = SimpleCov::Result.new(merged)
# Specify the command name
result.command_name = results.map(&:command_name).sort.join(", ")
result
merge_results(*results)
end

# Saves the given SimpleCov::Result in the resultset cache
Expand Down
93 changes: 93 additions & 0 deletions spec/raw_coverage_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
require "helper"

if SimpleCov.usable?
describe SimpleCov::RawCoverage do
describe "with two faked coverage resultsets" do
before do
@resultset1 = {
source_fixture("sample.rb") => [nil, 1, 1, 1, nil, nil, 1, 1, nil, nil],
source_fixture("app/models/user.rb") => [nil, 1, 1, 1, nil, nil, 1, 0, nil, nil],
source_fixture("app/controllers/sample_controller.rb") => [nil, 1, 1, 1, nil, nil, 1, 0, nil, nil],
source_fixture("resultset1.rb") => [1, 1, 1, 1],
source_fixture("parallel_tests.rb") => [nil, 0, nil, 0],
source_fixture("conditionally_loaded_1.rb") => [nil, 0, 1], # loaded only in the first resultset
source_fixture("three.rb") => [nil, 1, 1],
}

@resultset2 = {
source_fixture("sample.rb") => [1, nil, 1, 1, nil, nil, 1, 1, nil, nil],
source_fixture("app/models/user.rb") => [nil, 1, 5, 1, nil, nil, 1, 0, nil, nil],
source_fixture("app/controllers/sample_controller.rb") => [nil, 3, 1, nil, nil, nil, 1, 0, nil, nil],
source_fixture("resultset2.rb") => [nil, 1, 1, nil],
source_fixture("parallel_tests.rb") => [nil, nil, 0, 0],
source_fixture("conditionally_loaded_2.rb") => [nil, 0, 1], # loaded only in the second resultset
source_fixture("three.rb") => [nil, 1, 4],
}

@resultset3 = {
source_fixture("three.rb") => [nil, 1, 2],
}
end

context "a merge" do
subject do
SimpleCov::RawCoverage.merge_results(@resultset1, @resultset2, @resultset3)
end

it "has proper results for sample.rb" do
expect(subject[source_fixture("sample.rb")]).to eq([1, 1, 2, 2, nil, nil, 2, 2, nil, nil])
end

it "has proper results for user.rb" do
expect(subject[source_fixture("app/models/user.rb")]).to eq([nil, 2, 6, 2, nil, nil, 2, 0, nil, nil])
end

it "has proper results for sample_controller.rb" do
expect(subject[source_fixture("app/controllers/sample_controller.rb")]).to eq([nil, 4, 2, 1, nil, nil, 2, 0, nil, nil])
end

it "has proper results for resultset1.rb" do
expect(subject[source_fixture("resultset1.rb")]).to eq([1, 1, 1, 1])
end

it "has proper results for resultset2.rb" do
expect(subject[source_fixture("resultset2.rb")]).to eq([nil, 1, 1, nil])
end

it "has proper results for parallel_tests.rb" do
expect(subject[source_fixture("parallel_tests.rb")]).to eq([nil, nil, nil, 0])
end

it "has proper results for conditionally_loaded_1.rb" do
expect(subject[source_fixture("conditionally_loaded_1.rb")]).to eq([nil, 0, 1])
end

it "has proper results for conditionally_loaded_2.rb" do
expect(subject[source_fixture("conditionally_loaded_2.rb")]).to eq([nil, 0, 1])
end

it "has proper results for three.rb" do
expect(subject[source_fixture("three.rb")]).to eq([nil, 3, 7])
end
end
end

describe "with frozen resultsets" do
before do
@resultset1 = {
source_fixture("sample.rb").freeze => [nil, 1, 1, 1, nil, nil, 1, 1, nil, nil].freeze,
source_fixture("app/models/user.rb").freeze => [nil, 1, 1, 1, nil, nil, 1, 0, nil, nil].freeze,
}.freeze

@resultset2 = {
source_fixture("sample.rb").freeze => [1, nil, 1, 1, nil, nil, 1, 1, nil, nil].freeze,
source_fixture("app/models/user.rb").freeze => [nil, 1, 5, 1, nil, nil, 1, 0, nil, nil].freeze,
}.freeze
end

it "can merge without exceptions" do
SimpleCov::RawCoverage.merge_results(@resultset1, @resultset2)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

To clarify intent we could do expect {}.not_to raise_error or something, not strictly needed though

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I would normally do something at the top like

example do
  expect {
     SimpleCov::RawCoverage.merge_results(@resultset1, @resultset2)
  }.not_to raise_error
end

I like example for the 'this is a valid usage', though I know some don't like it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added expect {}.not_to raise_error to this test.

end
end
end
end
42 changes: 2 additions & 40 deletions spec/merge_helpers_spec.rb → spec/result_merger_spec.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require "helper"

if SimpleCov.usable?
describe "merge helpers" do
describe SimpleCov::ResultMerger do
describe "with two faked coverage resultsets" do
before do
SimpleCov.use_merging true
Expand All @@ -12,7 +12,7 @@
source_fixture("resultset1.rb") => [1, 1, 1, 1],
source_fixture("parallel_tests.rb") => [nil, 0, nil, 0],
source_fixture("conditionally_loaded_1.rb") => [nil, 0, 1], # loaded only in the first resultset
}.extend(SimpleCov::HashMergeHelper)
}

@resultset2 = {
source_fixture("sample.rb") => [1, nil, 1, 1, nil, nil, 1, 1, nil, nil],
Expand All @@ -24,44 +24,6 @@
}
end

context "a merge" do
subject do
@resultset1.merge_resultset(@resultset2)
end

it "has proper results for sample.rb" do
expect(subject[source_fixture("sample.rb")]).to eq([1, 1, 2, 2, nil, nil, 2, 2, nil, nil])
end

it "has proper results for user.rb" do
expect(subject[source_fixture("app/models/user.rb")]).to eq([nil, 2, 6, 2, nil, nil, 2, 0, nil, nil])
end

it "has proper results for sample_controller.rb" do
expect(subject[source_fixture("app/controllers/sample_controller.rb")]).to eq([nil, 4, 2, 1, nil, nil, 2, 0, nil, nil])
end

it "has proper results for resultset1.rb" do
expect(subject[source_fixture("resultset1.rb")]).to eq([1, 1, 1, 1])
end

it "has proper results for resultset2.rb" do
expect(subject[source_fixture("resultset2.rb")]).to eq([nil, 1, 1, nil])
end

it "has proper results for parallel_tests.rb" do
expect(subject[source_fixture("parallel_tests.rb")]).to eq([nil, nil, nil, 0])
end

it "has proper results for conditionally_loaded_1.rb" do
expect(subject[source_fixture("conditionally_loaded_1.rb")]).to eq([nil, 0, 1])
end

it "has proper results for conditionally_loaded_2.rb" do
expect(subject[source_fixture("conditionally_loaded_2.rb")]).to eq([nil, 0, 1])
end
end

# See Github issue #6
it "returns an empty hash when the resultset cache file is empty" do
File.open(SimpleCov::ResultMerger.resultset_path, "w+") { |f| f.puts "" }
Expand Down