summaryrefslogtreecommitdiff
path: root/devel/bmake/files/mk/meta2deps.py
diff options
context:
space:
mode:
Diffstat (limited to 'devel/bmake/files/mk/meta2deps.py')
-rwxr-xr-xdevel/bmake/files/mk/meta2deps.py755
1 files changed, 755 insertions, 0 deletions
diff --git a/devel/bmake/files/mk/meta2deps.py b/devel/bmake/files/mk/meta2deps.py
new file mode 100755
index 00000000000..5a5a2315abb
--- /dev/null
+++ b/devel/bmake/files/mk/meta2deps.py
@@ -0,0 +1,755 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+"""
+This script parses each "meta" file and extracts the
+information needed to deduce build and src dependencies.
+
+It works much the same as the original shell script, but is
+*much* more efficient.
+
+The parsing work is handled by the class MetaFile.
+We only pay attention to a subset of the information in the
+"meta" files. Specifically:
+
+'CWD' to initialize our notion.
+
+'C' to track chdir(2) on a per process basis
+
+'R' files read are what we really care about.
+ directories read, provide a clue to resolving
+ subsequent relative paths. That is if we cannot find
+ them relative to 'cwd', we check relative to the last
+ dir read.
+
+'W' files opened for write or read-write,
+ for filemon V3 and earlier.
+
+'E' files executed.
+
+'L' files linked
+
+'V' the filemon version, this record is used as a clue
+ that we have reached the interesting bit.
+
+"""
+
+"""
+RCSid:
+ $Id: meta2deps.py,v 1.1.1.1 2020/05/24 05:35:53 nia Exp $
+
+ Copyright (c) 2011-2019, Simon J. Gerraty
+ Copyright (c) 2011-2017, Juniper Networks, Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+
+import os, re, sys
+
+def getv(dict, key, d=None):
+ """Lookup key in dict and return value or the supplied default."""
+ if key in dict:
+ return dict[key]
+ return d
+
+def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
+ """
+ Return an absolute path, resolving via cwd or last_dir if needed.
+ """
+ if path.endswith('/.'):
+ path = path[0:-2]
+ if len(path) > 0 and path[0] == '/':
+ return path
+ if path == '.':
+ return cwd
+ if path.startswith('./'):
+ return cwd + path[1:]
+ if last_dir == cwd:
+ last_dir = None
+ for d in [last_dir, cwd]:
+ if not d:
+ continue
+ if path == '..':
+ dw = d.split('/')
+ p = '/'.join(dw[:-1])
+ if not p:
+ p = '/'
+ return p
+ p = '/'.join([d,path])
+ if debug > 2:
+ print("looking for:", p, end=' ', file=debug_out)
+ if not os.path.exists(p):
+ if debug > 2:
+ print("nope", file=debug_out)
+ p = None
+ continue
+ if debug > 2:
+ print("found:", p, file=debug_out)
+ return p
+ return None
+
+def cleanpath(path):
+ """cleanup path without using realpath(3)"""
+ if path.startswith('/'):
+ r = '/'
+ else:
+ r = ''
+ p = []
+ w = path.split('/')
+ for d in w:
+ if not d or d == '.':
+ continue
+ if d == '..':
+ try:
+ p.pop()
+ continue
+ except:
+ break
+ p.append(d)
+
+ return r + '/'.join(p)
+
+def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
+ """
+ Return an absolute path, resolving via cwd or last_dir if needed.
+ this gets called a lot, so we try to avoid calling realpath.
+ """
+ rpath = resolve(path, cwd, last_dir, debug, debug_out)
+ if rpath:
+ path = rpath
+ if (path.find('/') < 0 or
+ path.find('./') > 0 or
+ path.endswith('/..')):
+ path = cleanpath(path)
+ return path
+
+def sort_unique(list, cmp=None, key=None, reverse=False):
+ list.sort(cmp, key, reverse)
+ nl = []
+ le = None
+ for e in list:
+ if e == le:
+ continue
+ le = e
+ nl.append(e)
+ return nl
+
+def add_trims(x):
+ return ['/' + x + '/',
+ '/' + x,
+ x + '/',
+ x]
+
+class MetaFile:
+ """class to parse meta files generated by bmake."""
+
+ conf = None
+ dirdep_re = None
+ host_target = None
+ srctops = []
+ objroots = []
+ excludes = []
+ seen = {}
+ obj_deps = []
+ src_deps = []
+ file_deps = []
+
+ def __init__(self, name, conf={}):
+ """if name is set we will parse it now.
+ conf can have the follwing keys:
+
+ SRCTOPS list of tops of the src tree(s).
+
+ CURDIR the src directory 'bmake' was run from.
+
+ RELDIR the relative path from SRCTOP to CURDIR
+
+ MACHINE the machine we built for.
+ set to 'none' if we are not cross-building.
+ More specifically if machine cannot be deduced from objdirs.
+
+ TARGET_SPEC
+ Sometimes MACHINE isn't enough.
+
+ HOST_TARGET
+ when we build for the pseudo machine 'host'
+ the object tree uses HOST_TARGET rather than MACHINE.
+
+ OBJROOTS a list of the common prefix for all obj dirs it might
+ end in '/' or '-'.
+
+ DPDEPS names an optional file to which per file dependencies
+ will be appended.
+ For example if 'some/path/foo.h' is read from SRCTOP
+ then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
+ This can allow 'bmake' to learn all the dirs within
+ the tree that depend on 'foo.h'
+
+ EXCLUDES
+ A list of paths to ignore.
+ ccache(1) can otherwise be trouble.
+
+ debug desired debug level
+
+ debug_out open file to send debug output to (sys.stderr)
+
+ """
+
+ self.name = name
+ self.debug = getv(conf, 'debug', 0)
+ self.debug_out = getv(conf, 'debug_out', sys.stderr)
+
+ self.machine = getv(conf, 'MACHINE', '')
+ self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
+ self.target_spec = getv(conf, 'TARGET_SPEC', '')
+ self.curdir = getv(conf, 'CURDIR')
+ self.reldir = getv(conf, 'RELDIR')
+ self.dpdeps = getv(conf, 'DPDEPS')
+ self.line = 0
+
+ if not self.conf:
+ # some of the steps below we want to do only once
+ self.conf = conf
+ self.host_target = getv(conf, 'HOST_TARGET')
+ for srctop in getv(conf, 'SRCTOPS', []):
+ if srctop[-1] != '/':
+ srctop += '/'
+ if not srctop in self.srctops:
+ self.srctops.append(srctop)
+ _srctop = os.path.realpath(srctop)
+ if _srctop[-1] != '/':
+ _srctop += '/'
+ if not _srctop in self.srctops:
+ self.srctops.append(_srctop)
+
+ trim_list = add_trims(self.machine)
+ if self.machine == 'host':
+ trim_list += add_trims(self.host_target)
+ if self.target_spec:
+ trim_list += add_trims(self.target_spec)
+
+ for objroot in getv(conf, 'OBJROOTS', []):
+ for e in trim_list:
+ if objroot.endswith(e):
+ # this is not what we want - fix it
+ objroot = objroot[0:-len(e)]
+
+ if objroot[-1] != '/':
+ objroot += '/'
+ if not objroot in self.objroots:
+ self.objroots.append(objroot)
+ _objroot = os.path.realpath(objroot)
+ if objroot[-1] == '/':
+ _objroot += '/'
+ if not _objroot in self.objroots:
+ self.objroots.append(_objroot)
+
+ # we want the longest match
+ self.srctops.sort(reverse=True)
+ self.objroots.sort(reverse=True)
+
+ self.excludes = getv(conf, 'EXCLUDES', [])
+
+ if self.debug:
+ print("host_target=", self.host_target, file=self.debug_out)
+ print("srctops=", self.srctops, file=self.debug_out)
+ print("objroots=", self.objroots, file=self.debug_out)
+ print("excludes=", self.excludes, file=self.debug_out)
+
+ self.dirdep_re = re.compile(r'([^/]+)/(.+)')
+
+ if self.dpdeps and not self.reldir:
+ if self.debug:
+ print("need reldir:", end=' ', file=self.debug_out)
+ if self.curdir:
+ srctop = self.find_top(self.curdir, self.srctops)
+ if srctop:
+ self.reldir = self.curdir.replace(srctop,'')
+ if self.debug:
+ print(self.reldir, file=self.debug_out)
+ if not self.reldir:
+ self.dpdeps = None # we cannot do it?
+
+ self.cwd = os.getcwd() # make sure this is initialized
+ self.last_dir = self.cwd
+
+ if name:
+ self.try_parse()
+
+ def reset(self):
+ """reset state if we are being passed meta files from multiple directories."""
+ self.seen = {}
+ self.obj_deps = []
+ self.src_deps = []
+ self.file_deps = []
+
+ def dirdeps(self, sep='\n'):
+ """return DIRDEPS"""
+ return sep.strip() + sep.join(self.obj_deps)
+
+ def src_dirdeps(self, sep='\n'):
+ """return SRC_DIRDEPS"""
+ return sep.strip() + sep.join(self.src_deps)
+
+ def file_depends(self, out=None):
+ """Append DPDEPS_${file} += ${RELDIR}
+ for each file we saw, to the output file."""
+ if not self.reldir:
+ return None
+ for f in sort_unique(self.file_deps):
+ print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
+ # these entries provide for reverse DIRDEPS lookup
+ for f in self.obj_deps:
+ print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
+
+ def seenit(self, dir):
+ """rememer that we have seen dir."""
+ self.seen[dir] = 1
+
+ def add(self, list, data, clue=''):
+ """add data to list if it isn't already there."""
+ if data not in list:
+ list.append(data)
+ if self.debug:
+ print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
+
+ def find_top(self, path, list):
+ """the logical tree may be split across multiple trees"""
+ for top in list:
+ if path.startswith(top):
+ if self.debug > 2:
+ print("found in", top, file=self.debug_out)
+ return top
+ return None
+
+ def find_obj(self, objroot, dir, path, input):
+ """return path within objroot, taking care of .dirdep files"""
+ ddep = None
+ for ddepf in [path + '.dirdep', dir + '/.dirdep']:
+ if not ddep and os.path.exists(ddepf):
+ ddep = open(ddepf, 'r').readline().strip('# \n')
+ if self.debug > 1:
+ print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
+ if ddep.endswith(self.machine):
+ ddep = ddep[0:-(1+len(self.machine))]
+ elif self.target_spec and ddep.endswith(self.target_spec):
+ ddep = ddep[0:-(1+len(self.target_spec))]
+
+ if not ddep:
+ # no .dirdeps, so remember that we've seen the raw input
+ self.seenit(input)
+ self.seenit(dir)
+ if self.machine == 'none':
+ if dir.startswith(objroot):
+ return dir.replace(objroot,'')
+ return None
+ m = self.dirdep_re.match(dir.replace(objroot,''))
+ if m:
+ ddep = m.group(2)
+ dmachine = m.group(1)
+ if dmachine != self.machine:
+ if not (self.machine == 'host' and
+ dmachine == self.host_target):
+ if self.debug > 2:
+ print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
+ ddep += '.' + dmachine
+
+ return ddep
+
+ def try_parse(self, name=None, file=None):
+ """give file and line number causing exception"""
+ try:
+ self.parse(name, file)
+ except:
+ # give a useful clue
+ print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
+ raise
+
+ def parse(self, name=None, file=None):
+ """A meta file looks like:
+
+ # Meta data file "path"
+ CMD "command-line"
+ CWD "cwd"
+ TARGET "target"
+ -- command output --
+ -- filemon acquired metadata --
+ # buildmon version 3
+ V 3
+ C "pid" "cwd"
+ E "pid" "path"
+ F "pid" "child"
+ R "pid" "path"
+ W "pid" "path"
+ X "pid" "status"
+ D "pid" "path"
+ L "pid" "src" "target"
+ M "pid" "old" "new"
+ S "pid" "path"
+ # Bye bye
+
+ We go to some effort to avoid processing a dependency more than once.
+ Of the above record types only C,E,F,L,R,V and W are of interest.
+ """
+
+ version = 0 # unknown
+ if name:
+ self.name = name;
+ if file:
+ f = file
+ cwd = self.last_dir = self.cwd
+ else:
+ f = open(self.name, 'r')
+ skip = True
+ pid_cwd = {}
+ pid_last_dir = {}
+ last_pid = 0
+
+ self.line = 0
+ if self.curdir:
+ self.seenit(self.curdir) # we ignore this
+
+ interesting = 'CEFLRV'
+ for line in f:
+ self.line += 1
+ # ignore anything we don't care about
+ if not line[0] in interesting:
+ continue
+ if self.debug > 2:
+ print("input:", line, end=' ', file=self.debug_out)
+ w = line.split()
+
+ if skip:
+ if w[0] == 'V':
+ skip = False
+ version = int(w[1])
+ """
+ if version < 4:
+ # we cannot ignore 'W' records
+ # as they may be 'rw'
+ interesting += 'W'
+ """
+ elif w[0] == 'CWD':
+ self.cwd = cwd = self.last_dir = w[1]
+ self.seenit(cwd) # ignore this
+ if self.debug:
+ print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
+ continue
+
+ pid = int(w[1])
+ if pid != last_pid:
+ if last_pid:
+ pid_last_dir[last_pid] = self.last_dir
+ cwd = getv(pid_cwd, pid, self.cwd)
+ self.last_dir = getv(pid_last_dir, pid, self.cwd)
+ last_pid = pid
+
+ # process operations
+ if w[0] == 'F':
+ npid = int(w[2])
+ pid_cwd[npid] = cwd
+ pid_last_dir[npid] = cwd
+ last_pid = npid
+ continue
+ elif w[0] == 'C':
+ cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
+ if cwd.endswith('/.'):
+ cwd = cwd[0:-2]
+ self.last_dir = pid_last_dir[pid] = cwd
+ pid_cwd[pid] = cwd
+ if self.debug > 1:
+ print("cwd=", cwd, file=self.debug_out)
+ continue
+
+ if w[2] in self.seen:
+ if self.debug > 2:
+ print("seen:", w[2], file=self.debug_out)
+ continue
+ # file operations
+ if w[0] in 'ML':
+ # these are special, tread src as read and
+ # target as write
+ self.parse_path(w[1].strip("'"), cwd, 'R', w)
+ self.parse_path(w[2].strip("'"), cwd, 'W', w)
+ continue
+ elif w[0] in 'ERWS':
+ path = w[2]
+ self.parse_path(path, cwd, w[0], w)
+
+ if not file:
+ f.close()
+
+ def is_src(self, base, dir, rdir):
+ """is base in srctop"""
+ for dir in [dir,rdir]:
+ if not dir:
+ continue
+ path = '/'.join([dir,base])
+ srctop = self.find_top(path, self.srctops)
+ if srctop:
+ if self.dpdeps:
+ self.add(self.file_deps, path.replace(srctop,''), 'file')
+ self.add(self.src_deps, dir.replace(srctop,''), 'src')
+ self.seenit(dir)
+ return True
+ return False
+
+ def parse_path(self, path, cwd, op=None, w=[]):
+ """look at a path for the op specified"""
+
+ if not op:
+ op = w[0]
+
+ # we are never interested in .dirdep files as dependencies
+ if path.endswith('.dirdep'):
+ return
+ for p in self.excludes:
+ if p and path.startswith(p):
+ if self.debug > 2:
+ print("exclude:", p, path, file=self.debug_out)
+ return
+ # we don't want to resolve the last component if it is
+ # a symlink
+ path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
+ if not path:
+ return
+ dir,base = os.path.split(path)
+ if dir in self.seen:
+ if self.debug > 2:
+ print("seen:", dir, file=self.debug_out)
+ return
+ # we can have a path in an objdir which is a link
+ # to the src dir, we may need to add dependencies for each
+ rdir = dir
+ dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
+ rdir = os.path.realpath(dir)
+ if rdir == dir:
+ rdir = None
+ # now put path back together
+ path = '/'.join([dir,base])
+ if self.debug > 1:
+ print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
+ if op in 'RWS':
+ if path in [self.last_dir, cwd, self.cwd, self.curdir]:
+ if self.debug > 1:
+ print("skipping:", path, file=self.debug_out)
+ return
+ if os.path.isdir(path):
+ if op in 'RW':
+ self.last_dir = path;
+ if self.debug > 1:
+ print("ldir=", self.last_dir, file=self.debug_out)
+ return
+
+ if op in 'ERW':
+ # finally, we get down to it
+ if dir == self.cwd or dir == self.curdir:
+ return
+ if self.is_src(base, dir, rdir):
+ self.seenit(w[2])
+ if not rdir:
+ return
+
+ objroot = None
+ for dir in [dir,rdir]:
+ if not dir:
+ continue
+ objroot = self.find_top(dir, self.objroots)
+ if objroot:
+ break
+ if objroot:
+ ddep = self.find_obj(objroot, dir, path, w[2])
+ if ddep:
+ self.add(self.obj_deps, ddep, 'obj')
+ if self.dpdeps and objroot.endswith('/stage/'):
+ sp = '/'.join(path.replace(objroot,'').split('/')[1:])
+ self.add(self.file_deps, sp, 'file')
+ else:
+ # don't waste time looking again
+ self.seenit(w[2])
+ self.seenit(dir)
+
+
+def main(argv, klass=MetaFile, xopts='', xoptf=None):
+ """Simple driver for class MetaFile.
+
+ Usage:
+ script [options] [key=value ...] "meta" ...
+
+ Options and key=value pairs contribute to the
+ dictionary passed to MetaFile.
+
+ -S "SRCTOP"
+ add "SRCTOP" to the "SRCTOPS" list.
+
+ -C "CURDIR"
+
+ -O "OBJROOT"
+ add "OBJROOT" to the "OBJROOTS" list.
+
+ -m "MACHINE"
+
+ -a "MACHINE_ARCH"
+
+ -H "HOST_TARGET"
+
+ -D "DPDEPS"
+
+ -d bumps debug level
+
+ """
+ import getopt
+
+ # import Psyco if we can
+ # it can speed things up quite a bit
+ have_psyco = 0
+ try:
+ import psyco
+ psyco.full()
+ have_psyco = 1
+ except:
+ pass
+
+ conf = {
+ 'SRCTOPS': [],
+ 'OBJROOTS': [],
+ 'EXCLUDES': [],
+ }
+
+ try:
+ machine = os.environ['MACHINE']
+ if machine:
+ conf['MACHINE'] = machine
+ machine_arch = os.environ['MACHINE_ARCH']
+ if machine_arch:
+ conf['MACHINE_ARCH'] = machine_arch
+ srctop = os.environ['SB_SRC']
+ if srctop:
+ conf['SRCTOPS'].append(srctop)
+ objroot = os.environ['SB_OBJROOT']
+ if objroot:
+ conf['OBJROOTS'].append(objroot)
+ except:
+ pass
+
+ debug = 0
+ output = True
+
+ opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
+ for o, a in opts:
+ if o == '-a':
+ conf['MACHINE_ARCH'] = a
+ elif o == '-d':
+ debug += 1
+ elif o == '-q':
+ output = False
+ elif o == '-H':
+ conf['HOST_TARGET'] = a
+ elif o == '-S':
+ if a not in conf['SRCTOPS']:
+ conf['SRCTOPS'].append(a)
+ elif o == '-C':
+ conf['CURDIR'] = a
+ elif o == '-O':
+ if a not in conf['OBJROOTS']:
+ conf['OBJROOTS'].append(a)
+ elif o == '-R':
+ conf['RELDIR'] = a
+ elif o == '-D':
+ conf['DPDEPS'] = a
+ elif o == '-m':
+ conf['MACHINE'] = a
+ elif o == '-T':
+ conf['TARGET_SPEC'] = a
+ elif o == '-X':
+ if a not in conf['EXCLUDES']:
+ conf['EXCLUDES'].append(a)
+ elif xoptf:
+ xoptf(o, a, conf)
+
+ conf['debug'] = debug
+
+ # get any var=val assignments
+ eaten = []
+ for a in args:
+ if a.find('=') > 0:
+ k,v = a.split('=')
+ if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
+ if k == 'SRCTOP':
+ k = 'SRCTOPS'
+ elif k == 'OBJROOT':
+ k = 'OBJROOTS'
+ if v not in conf[k]:
+ conf[k].append(v)
+ else:
+ conf[k] = v
+ eaten.append(a)
+ continue
+ break
+
+ for a in eaten:
+ args.remove(a)
+
+ debug_out = getv(conf, 'debug_out', sys.stderr)
+
+ if debug:
+ print("config:", file=debug_out)
+ print("psyco=", have_psyco, file=debug_out)
+ for k,v in list(conf.items()):
+ print("%s=%s" % (k,v), file=debug_out)
+
+ m = None
+ for a in args:
+ if a.endswith('.meta'):
+ if not os.path.exists(a):
+ continue
+ m = klass(a, conf)
+ elif a.startswith('@'):
+ # there can actually multiple files per line
+ for line in open(a[1:]):
+ for f in line.strip().split():
+ if not os.path.exists(f):
+ continue
+ m = klass(f, conf)
+
+ if output and m:
+ print(m.dirdeps())
+
+ print(m.src_dirdeps('\nsrc:'))
+
+ dpdeps = getv(conf, 'DPDEPS')
+ if dpdeps:
+ m.file_depends(open(dpdeps, 'wb'))
+
+ return m
+
+if __name__ == '__main__':
+ try:
+ main(sys.argv)
+ except:
+ # yes, this goes to stdout
+ print("ERROR: ", sys.exc_info()[1])
+ raise
+