Merge pull request #3188 from hlindberg/PUP-3345-memory-leak

(PUP-3345) Fix memory leak (and add some tooling to find leaks)
author: Andrew Parker <andy@puppetlabs.com> 2014-10-14 11:33:28 -0700
committer: Andrew Parker <andy@puppetlabs.com> 2014-10-14 11:33:28 -0700
commit: 17d0d3fafdb7798253393d7845eba370a4e1349b (patch)
tree: 13c64b8e13ce4c4f10d45f080cfc178f0edcac8f
parent: c5a78fe149fe093e6bc659fb1279576581381afe (diff)
parent: 74089c6b88462dbbf6604f80b50fe90254bb62b8 (diff)
download: puppet-17d0d3fafdb7798253393d7845eba370a4e1349b.tar.gz
6 files changed, 322 insertions, 8 deletions
diff --git a/benchmarks/catalog_memory/benchmarker.rb b/benchmarks/catalog_memory/benchmarker.rb
new file mode 100644
index 000000000..955695666
--- /dev/null
+++ b/benchmarks/catalog_memory/benchmarker.rb
@@ -0,0 +1,115 @@
+require 'erb'
+require 'ostruct'
+require 'fileutils'
+require 'json'
+
+# For memory debugging - if the core_ext is not loaded, things break inside mass
+# require 'mass'
+require 'objspace'
+
+# Only runs for Ruby > 2.1.0, and must do this early since ObjectSpace.trace_object_allocations_start must be called
+# as early as possible.
+#
+RUBYVER_ARRAY = RUBY_VERSION.split(".").collect {|s| s.to_i }
+RUBYVER = (RUBYVER_ARRAY[0] << 16 | RUBYVER_ARRAY[1] << 8 | RUBYVER_ARRAY[2])
+if RUBYVER < (2 << 16 | 1 << 8 | 0)
+  puts "catalog_memory requires Ruby version >= 2.1.0 to run. Skipping"
+  exit(0)
+end
+
+ObjectSpace.trace_object_allocations_start
+
+class Benchmarker
+  include FileUtils
+
+
+  def initialize(target, size)
+    @target = target
+    @size = size
+    @@first_counts = nil
+    @@first_refs = nil
+    @@count = 0
+  end
+
+  def setup
+  end
+
+  def run(args=nil)
+    unless @initialized
+      require 'puppet'
+      config = File.join(@target, 'puppet.conf')
+      Puppet.initialize_settings(['--config', config])
+      @initialized = true
+    end
+    @@count += 1
+    env = Puppet.lookup(:environments).get('benchmarking')
+    node = Puppet::Node.new("testing", :environment => env)
+    # Mimic what apply does (or the benchmark will in part run for the *root* environment)
+    Puppet.push_context({:current_environment => env},'current env for benchmark')
+    Puppet::Resource::Catalog.indirection.find("testing", :use_node => node)
+    Puppet.pop_context
+    GC.start
+    sleep(2)
+    counted = ObjectSpace.count_objects({})
+    if @@first_counts && @@count == 10
+      diff = @@first_counts.merge(counted) {|k, base_v, new_v| new_v - base_v }
+      puts "Count of objects TOTAL = #{diff[:TOTAL]}, FREE = #{diff[:FREE]}, T_OBJECT = #{diff[:T_OBJECT]}, T_CLASS = #{diff[:T_CLASS]}"
+      changed = diff.reject {|k,v| v == 0}
+      puts "Number of changed classes = #{changed}"
+      GC.start
+      # Find references to leaked Objects
+      leaked_instances = ObjectSpace.each_object.reduce([]) {|x, o| x << o.object_id; x } - @@first_refs
+      File.open("diff.json", "w") do |f|
+        leaked_instances.each do |id|
+          o = ObjectSpace._id2ref(id)
+          f.write(ObjectSpace.dump(o)) if !o.nil?
+        end
+      end
+      # Output information where bound objects where instantiated
+      map_of_allocations = leaked_instances.reduce(Hash.new(0)) do |memo, x|
+        o = ObjectSpace._id2ref(x)
+        class_path = ObjectSpace.allocation_class_path(o)
+        class_path = class_path.nil? ? ObjectSpace.allocation_sourcefile(o) : class_path
+        if !class_path.nil?
+          method = ObjectSpace.allocation_method_id(o)
+          source_line = ObjectSpace.allocation_sourceline(o)
+          memo["#{class_path}##{method}-#{source_line}"] += 1
+        end
+        memo
+      end
+      map_of_allocations.sort_by {|k, v| v}.reverse.each {|k,v| puts "#{v} #{k}" }
+      # Dump the heap for further analysis
+      GC.start
+      ObjectSpace.dump_all(output: File.open('heap.json','w'))
+    elsif @@count == 1
+      # Set up baseline and output info for first run
+      @@first_counts = counted
+      @@first_refs = ObjectSpace.each_object.reduce([]) {|x, o| x << o.object_id; x }
+      diff = @@first_counts
+      puts "Count of objects TOTAL = #{diff[:TOTAL]}, FREE = #{diff[:FREE]}, T_OBJECT = #{diff[:T_OBJECT]}, T_CLASS = #{diff[:T_CLASS]}"
+    end
+
+  end
+
+  def generate
+    environment = File.join(@target, 'environments', 'benchmarking')
+    templates = File.join('benchmarks', 'empty_catalog')
+
+    mkdir_p(File.join(environment, 'modules'))
+    mkdir_p(File.join(environment, 'manifests'))
+
+    render(File.join(templates, 'site.pp.erb'),
+    File.join(environment, 'manifests', 'site.pp'),{})
+
+    render(File.join(templates, 'puppet.conf.erb'),
+           File.join(@target, 'puppet.conf'),
+           :location => @target)
+  end
+
+  def render(erb_file, output_file, bindings)
+    site = ERB.new(File.read(erb_file))
+    File.open(output_file, 'w') do |fh|
+      fh.write(site.result(OpenStruct.new(bindings).instance_eval { binding }))
+    end
+  end
+end
diff --git a/benchmarks/catalog_memory/description b/benchmarks/catalog_memory/description
new file mode 100644
index 000000000..4d1f2ec6d
--- /dev/null
+++ b/benchmarks/catalog_memory/description
@@ -0,0 +1,5 @@
+Benchmark scenario: Runs an empty catalog and dumps the state of the memory after all runs and a diff between first and last run
+Benchmark target: catalog compilation memory consumption / leak
+Parser: Future
+Requires: Ruby 2.1.0
+
diff --git a/benchmarks/catalog_memory/puppet.conf.erb b/benchmarks/catalog_memory/puppet.conf.erb
new file mode 100644
index 000000000..00e2986bf
--- /dev/null
+++ b/benchmarks/catalog_memory/puppet.conf.erb
@@ -0,0 +1,5 @@
+confdir = <%= location %>
+vardir = <%= location %>
+environmentpath = <%= File.join(location, 'environments') %>
+environment_timeout = '0'
+parser = future
diff --git a/benchmarks/catalog_memory/site.pp.erb b/benchmarks/catalog_memory/site.pp.erb
new file mode 100644
index 000000000..054628183
--- /dev/null
+++ b/benchmarks/catalog_memory/site.pp.erb
@@ -0,0 +1 @@
+notice('hello world')
+\ No newline at end of file
diff --git a/lib/puppet/resource.rb b/lib/puppet/resource.rb
index a5419512b..82dec4bb0 100644
--- a/lib/puppet/resource.rb
+++ b/lib/puppet/resource.rb
@@ -186,9 +186,6 @@ class Puppet::Resource
     @is_stage ||= @type.to_s.downcase == "stage"
   end
 
-  # Cache to reduce respond_to? lookups
-  @@nondeprecating_type = {}
-
   # Construct a resource from data.
   #
   # Constructs a resource instance with the given `type` and `title`. Multiple
@@ -242,12 +239,8 @@ class Puppet::Resource
       extract_parameters(params)
     end
 
-    if resource_type and ! @@nondeprecating_type[resource_type]
-      if resource_type.respond_to?(:deprecate_params)
+    if resource_type && resource_type.respond_to?(:deprecate_params)
         resource_type.deprecate_params(title, attributes[:parameters])
-      else
-        @@nondeprecating_type[resource_type] = true
-      end
     end
 
     tag(self.type)
diff --git a/tasks/memwalk.rake b/tasks/memwalk.rake
new file mode 100644
index 000000000..49077d6c8
--- /dev/null
+++ b/tasks/memwalk.rake
@@ -0,0 +1,195 @@
+# Walks the memory dumped into heap.json, and produces a graph of the memory dumped in diff.json
+# If a single argument (a hex address to one object) is given, the graph is limited to this object and what references it
+# The heap dumps should be in the format produced by Ruby ObjectSpace in Ruby version 2.1.0 or later.
+#
+# The command produces a .dot file that can be rendered with graphwiz dot into SVG. If a memwalk is performed for all
+# objects in the diff.json, the output file name is memwalk.dot. If it is produced for a single address, the name of the
+# output file is memwalk-<address>.dot
+#
+# The dot file can be rendered with something like: dot -Tsvg -omemwalk.svg memwalk.dot
+#
+desc "Process a diff.json of object ids, and a heap.json of a Ruby 2.1.0 ObjectSpace dump and produce a graph"
+task :memwalk, [:id] do |t, args|
+  puts "Memwalk"
+  puts "Computing for #{args[:id] ? args[:id] : 'all'}"
+  @single_id = args[:id] ? args[:id].to_i(16) : nil
+
+  require 'json'
+  #require 'debug'
+
+  TYPE = "type".freeze
+  ROOT = "root".freeze
+  ROOT_UC = "ROOT".freeze
+  ADDR = "address".freeze
+  NODE = "NODE".freeze
+  STRING = "STRING".freeze
+  DATA = "DATA".freeze
+  HASH = "HASH".freeze
+  ARRAY = "ARRAY".freeze
+  OBJECT = "OBJECT".freeze
+  CLASS = "CLASS".freeze
+
+  allocations = {}
+  # An array of integer addresses of the objects to trace bindings for
+  diff_index = {}
+  puts "Reading data"
+  begin
+    puts "Reading diff"
+    lines = 0;
+    File.readlines("diff.json").each do | line |
+      lines += 1
+      diff = JSON.parse(line)
+      case diff[ TYPE ]
+      when STRING, DATA, HASH, ARRAY
+        # skip the strings
+      else
+        diff_index[ diff[ ADDR ].to_i(16) ] = diff
+      end
+    end
+    puts "Read #{lines} number of diffs"
+  rescue => e
+    raise "ERROR READING DIFF at line #{lines} #{e.message[0, 200]}"
+  end
+
+  begin
+    puts "Reading heap"
+    lines = 0
+    allocation = nil
+    File.readlines("heap.json").each do | line |
+      lines += 1
+      allocation = JSON.parse(line)
+      case allocation[ TYPE ]
+      when ROOT_UC
+        # Graph for single id must include roots, as it may be a root that holds on to the reference
+        # a global variable, thread, etc.
+        #
+        if @single_id
+          allocations[ allocation[ ROOT ] ] = allocation
+        end
+      when NODE
+        # skip the NODE objects - they represent the loaded ruby code
+      when STRING
+        # skip all strings - they are everywhere
+      else
+        allocations[ allocation[ ADDR ].to_i(16) ] = allocation
+      end
+    end
+    puts "Read #{lines} number of entries"
+  rescue => e
+    require 'debug'
+    puts "ERROR READING HEAP #{e.message[0, 200]}"
+    raise e
+  end
+  @heap = allocations
+
+  puts "Building reference index"
+  # References is an index from a referenced object to an array with addresses to the objects that references it
+  @references = Hash.new { |h, k| h[k] = [] }
+  REFERENCES = "references".freeze
+  allocations.each do |k,v|
+    refs = v[ REFERENCES ]
+    if refs.is_a?(Array)
+      refs.each {|addr| @references[ addr.to_i(16) ] << k }
+    end
+  end
+
+  @printed = Set.new()
+
+  def print_object(addr, entry)
+    # only print each node once
+    return unless @printed.add?(addr)
+    begin
+    if addr.is_a?(String)
+      @output.write( "x#{node_name(addr)} [label=\"#{node_label(addr, entry)}\\n#{addr}\"];\n")
+    else
+      @output.write( "x#{node_name(addr)} [label=\"#{node_label(addr, entry)}\\n#{addr.to_s(16)}\"];\n")
+    end
+    rescue => e
+      require 'debug'
+      raise e
+    end
+  end
+
+  def node_label(addr, entry)
+    if entry[ TYPE ] == OBJECT
+      class_ref = entry[ "class" ].to_i(16)
+      @heap[ class_ref ][ "name" ]
+    elsif entry[ TYPE ] == CLASS
+      "CLASS #{entry[ "name"]}"
+    else
+      entry[TYPE]
+    end
+  end
+
+  def node_name(addr)
+    return addr if addr.is_a? String
+    addr.to_s(16)
+  end
+
+  def print_edge(from_addr, to_addr)
+    @output.write("x#{node_name(from_addr)}->x#{node_name(to_addr)};\n")
+  end
+
+  def closure_and_edges(diff)
+    edges = Set.new()
+    walked = Set.new()
+    puts "Number of diffs referenced = #{diff.count {|k,_| @references[k].is_a?(Array) && @references[k].size() > 0 }}"
+    diff.each {|k,_| walk(k, edges, walked) }
+    edges.each {|e| print_edge(*e) }
+  end
+
+  def walk(addr, edges, walked)
+    if !@heap[ addr ].nil?
+      print_object(addr, @heap[addr])
+
+      @references [ addr ].each do |r|
+        walk_to_object(addr, r, edges, walked)
+      end
+    end
+  end
+
+  def walk_to_object(to_addr, cursor, edges, walked)
+    return unless walked
+    # if walked to an object, or everything if a single_id is the target
+    if @heap[ cursor ][ TYPE ] == OBJECT || (@single_id && @heap[ cursor ][ TYPE ] == ROOT_UC || @heap[ cursor ][ TYPE ] == CLASS )
+      # and the edge is unique
+      if edges.add?( [ cursor, to_addr ] )
+        # then we may not have visited objects this objects is being referred from
+        print_object(cursor, @heap[ cursor ])
+        # Do not follow what binds a class
+        if @heap[ cursor ][ TYPE ] != CLASS
+          @references[ cursor ].each do |r|
+            walk_to_object(cursor, r, edges, walked.add?(r))
+            walked.delete(r)
+          end
+        end
+      end
+    else
+      # continue search until Object
+      @references[cursor].each do |r|
+        walk_to_object(to_addr, r, edges, walked.add?(r))
+      end
+    end
+  end
+
+  def single_closure_and_edges(the_target)
+    edges = Set.new()
+    walked = Set.new()
+    walk(the_target, edges, walked)
+    edges.each {|e| print_edge(*e) }
+  end
+
+  puts "creating graph"
+  if @single_id
+    @output = File.open("memwalk-#{@single_id.to_s(16)}.dot", "w")
+    @output.write("digraph root {\n")
+    single_closure_and_edges(@single_id)
+  else
+    @output = File.open("memwalk.dot", "w")
+    @output.write("digraph root {\n")
+    closure_and_edges(diff_index)
+  end
+  @output.write("}\n")
+  @output.close
+  puts "done"
+end
author	Andrew Parker <andy@puppetlabs.com>	2014-10-14 11:33:28 -0700
committer	Andrew Parker <andy@puppetlabs.com>	2014-10-14 11:33:28 -0700
commit	17d0d3fafdb7798253393d7845eba370a4e1349b (patch)
tree	13c64b8e13ce4c4f10d45f080cfc178f0edcac8f
parent	c5a78fe149fe093e6bc659fb1279576581381afe (diff)
parent	74089c6b88462dbbf6604f80b50fe90254bb62b8 (diff)
download	puppet-17d0d3fafdb7798253393d7845eba370a4e1349b.tar.gz