Skip to content

Commit

Permalink
Merge pull request #1532 from eregon/serialized-size-stats
Browse files Browse the repository at this point in the history
Add a task to show statisticts about serialized size/source size for the top 100 gems
  • Loading branch information
kddnewton authored Sep 22, 2023
2 parents c7231f1 + 291177e commit c9620ec
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ jobs:
run: bundle exec rake lex:topgems
- name: Parse Top 100 Gems
run: bundle exec rake parse:topgems
- name: Serialized size stats with all fields
run: bundle exec rake serialized_size:topgems
- name: Recompile with only semantic fields
run: YARP_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 bundle exec rake clobber compile
- name: Serialized size stats with only semantic fields
run: bundle exec rake serialized_size:topgems

memcheck:
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions rakelib/check_manifest.rake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ task :check_manifest => [:templates] do
rust
templates
test
top-100-gems
tmp
vendor
]
Expand Down
33 changes: 33 additions & 0 deletions rakelib/lex.rake
Original file line number Diff line number Diff line change
Expand Up @@ -356,3 +356,36 @@ task "lex:topgems": ["download:topgems", :compile] do
exit(1)
end
end

task "serialized_size:topgems": ["download:topgems"] do
$:.unshift(File.expand_path("../lib", __dir__))
require "yarp"

files = Dir["#{TOP_100_GEMS_DIR}/**/*.rb"]
total_source_size = 0
total_serialized_size = 0
ratios = []
files.each do |file|
source_size = File.size(file)
next if source_size == 0
total_source_size += source_size

serialized = YARP.dump_file(file)
serialized_size = serialized.bytesize
total_serialized_size += serialized_size

ratios << Rational(serialized_size, source_size)
end
f = '%.3f'
puts "Total sizes for top 100 gems:"
puts "total source size: #{'%9d' % total_source_size}"
puts "total serialized size: #{'%9d' % total_serialized_size}"
puts "total serialized/total source: #{f % (total_serialized_size.to_f / total_source_size)}"
puts
puts "Stats of ratio serialized/source per file:"
puts "average: #{f % (ratios.sum / ratios.size)}"
puts "median: #{f % ratios.sort[ratios.size/2]}"
puts "1st quartile: #{f % ratios.sort[ratios.size/4]}"
puts "3rd quartile: #{f % ratios.sort[ratios.size*3/4]}"
puts "min - max: #{"#{f} - #{f}" % ratios.minmax}"
end

0 comments on commit c9620ec

Please sign in to comment.