Source code for pySPACE.tools.gprof2dot

#!/usr/bin/env python
""" Generate a dot graph from the output of several profilers.

    Original script by Jose Fonseca at 
    http://code.google.com/p/jrfonseca/wiki/Gprof2Dot
    
    Usage: python gprof2dot.py -f pstats profiling_file -r pySPACE | dot -Tpng -o output.png
    
    where profiling_file is the file that is generated by the cProfile module.
    
    The option '-r', '--restrict' is there to  eliminate functions in the
    profiling, that do not contain this string in their pathname [default: None].
    This option was added by Hendrik Woehrle and Mario Michael Krell.
    Furthermore some minor documentation changes were applied.

    For profiling pySPACE look at the documentation of pySPACE, especially launch.py
    and use the `--profile` option.

    Original LGPL 3+ licencing text::

        Copyright 2008-2009 Jose Fonseca

        This program is free software: you can redistribute it and/or modify it
        under the terms of the GNU Lesser General Public License as published
        by the Free Software Foundation, either version 3 of the License, or
        (at your option) any later version.

        This program is distributed in the hope that it will be useful,
        but WITHOUT ANY WARRANTY; without even the implied warranty of
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        GNU Lesser General Public License for more details.

        You should have received a copy of the GNU Lesser General Public License
        along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""

__version__ = "1.0"


import sys
import math
import os.path
import re
import textwrap
import optparse
import xml.parsers.expat


try:
    # Debugging helper module
    import debug
except ImportError:
    pass


[docs]def times(x): return u"%u\xd7" % (x,)
[docs]def percentage(p): return "%.02f%%" % (p*100.0,)
[docs]def add(a, b): return a + b
[docs]def equal(a, b): if a == b: return a else: return None
[docs]def fail(a, b): assert False
tol = 2 ** -23
[docs]def ratio(numerator, denominator): try: ratio = float(numerator)/float(denominator) except ZeroDivisionError: # 0/0 is undefined, but 1.0 yields more useful results return 1.0 if ratio < 0.0: if ratio < -tol: sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator)) return 0.0 if ratio > 1.0: if ratio > 1.0 + tol: sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator)) return 1.0 return ratio
[docs]class UndefinedEvent(Exception): """Raised when attempting to get an event which is undefined."""
[docs] def __init__(self, event): Exception.__init__(self) self.event = event
[docs] def __str__(self): return 'unspecified event %s' % self.event.name
[docs]class Event(object): """Describe a kind of event, and its basic operations."""
[docs] def __init__(self, name, null, aggregator, formatter = str): self.name = name self._null = null self._aggregator = aggregator self._formatter = formatter
[docs] def __eq__(self, other): return self is other
[docs] def __hash__(self): return id(self)
[docs] def null(self): return self._null
[docs] def aggregate(self, val1, val2): """Aggregate two event values.""" assert val1 is not None assert val2 is not None return self._aggregator(val1, val2)
[docs] def format(self, val): """Format an event value.""" assert val is not None return self._formatter(val)
CALLS = Event("Calls", 0, add, times) SAMPLES = Event("Samples", 0, add) SAMPLES2 = Event("Samples", 0, add) TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')') TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')') TOTAL_TIME = Event("Total time", 0.0, fail) TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage)
[docs]class Object(object): """Base class for all objects in profile which can store events."""
[docs] def __init__(self, events=None): if events is None: self.events = {} else: self.events = events
[docs] def __hash__(self): return id(self)
[docs] def __eq__(self, other): return self is other
[docs] def __contains__(self, event): return event in self.events
[docs] def __getitem__(self, event): try: return self.events[event] except KeyError: raise UndefinedEvent(event)
[docs] def __setitem__(self, event, value): if value is None: if event in self.events: del self.events[event] else: self.events[event] = value
[docs]class Call(Object): """A call between functions. There should be at most one call object for every pair of functions. """
[docs] def __init__(self, callee_id): Object.__init__(self) self.callee_id = callee_id self.ratio = None self.weight = None
[docs]class Function(Object): """A function."""
[docs] def __init__(self, id, name,path=None): Object.__init__(self) self.id = id self.name = name self.module = None self.process = None self.calls = {} self.called = None self.weight = None self.cycle = None self.path = path
[docs] def add_call(self, call): if call.callee_id in self.calls: sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id))) self.calls[call.callee_id] = call
[docs] def get_call(self, callee_id): if not callee_id in self.calls: call = Call(callee_id) call[SAMPLES] = 0 call[SAMPLES2] = 0 call[CALLS] = 0 self.calls[callee_id] = call return self.calls[callee_id]
# TODO: write utility functions
[docs] def __repr__(self): return self.name
[docs]class Cycle(Object): """A cycle made from recursive function calls."""
[docs] def __init__(self): Object.__init__(self) # XXX: Do cycles need an id? self.functions = set()
[docs] def add_function(self, function): assert function not in self.functions self.functions.add(function) # XXX: Aggregate events? if function.cycle is not None: for other in function.cycle.functions: if function not in self.functions: self.add_function(other) function.cycle = self
[docs]class Profile(Object): """The whole profile."""
[docs] def __init__(self): Object.__init__(self) self.functions = {} self.cycles = []
[docs] def add_function(self, function): if function.id in self.functions: sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id))) self.functions[function.id] = function
[docs] def add_cycle(self, cycle): self.cycles.append(cycle)
[docs] def validate(self): """Validate the edges.""" for function in self.functions.itervalues(): for callee_id in function.calls.keys(): assert function.calls[callee_id].callee_id == callee_id if callee_id not in self.functions: sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name)) del function.calls[callee_id]
[docs] def find_cycles(self): """Find cycles using Tarjan's strongly connected components algorithm.""" # Apply the Tarjan's algorithm successively until all functions are visited visited = set() for function in self.functions.itervalues(): if function not in visited: self._tarjan(function, 0, [], {}, {}, visited) cycles = [] for function in self.functions.itervalues(): if function.cycle is not None and function.cycle not in cycles: cycles.append(function.cycle) self.cycles = cycles if 0: for cycle in cycles: sys.stderr.write("Cycle:\n") for member in cycle.functions: sys.stderr.write("\tFunction %s\n" % member.name)
[docs] def _tarjan(self, function, order, stack, orders, lowlinks, visited): """Tarjan's strongly connected components algorithm. See also: - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm """ visited.add(function) orders[function] = order lowlinks[function] = order order += 1 pos = len(stack) stack.append(function) for call in function.calls.itervalues(): callee = self.functions[call.callee_id] # TODO: use a set to optimize lookup if callee not in orders: order = self._tarjan(callee, order, stack, orders, lowlinks, visited) lowlinks[function] = min(lowlinks[function], lowlinks[callee]) elif callee in stack: lowlinks[function] = min(lowlinks[function], orders[callee]) if lowlinks[function] == orders[function]: # Strongly connected component found members = stack[pos:] del stack[pos:] if len(members) > 1: cycle = Cycle() for member in members: cycle.add_function(member) return order
[docs] def call_ratios(self, event): # Aggregate for incoming calls cycle_totals = {} for cycle in self.cycles: cycle_totals[cycle] = 0.0 function_totals = {} for function in self.functions.itervalues(): function_totals[function] = 0.0 for function in self.functions.itervalues(): for call in function.calls.itervalues(): if call.callee_id != function.id: callee = self.functions[call.callee_id] function_totals[callee] += call[event] if callee.cycle is not None and callee.cycle is not function.cycle: cycle_totals[callee.cycle] += call[event] # Compute the ratios for function in self.functions.itervalues(): for call in function.calls.itervalues(): assert call.ratio is None if call.callee_id != function.id: callee = self.functions[call.callee_id] if callee.cycle is not None and callee.cycle is not function.cycle: total = cycle_totals[callee.cycle] else: total = function_totals[callee] call.ratio = ratio(call[event], total)
[docs] def integrate(self, outevent, inevent): """Propagate function time ratio allong the function calls. Must be called after finding the cycles. See also: - http://citeseer.ist.psu.edu/graham82gprof.html """ # Sanity checking assert outevent not in self for function in self.functions.itervalues(): assert outevent not in function assert inevent in function for call in function.calls.itervalues(): assert outevent not in call if call.callee_id != function.id: assert call.ratio is not None # Aggregate the input for each cycle for cycle in self.cycles: total = inevent.null() for function in self.functions.itervalues(): total = inevent.aggregate(total, function[inevent]) self[inevent] = total # Integrate along the edges total = inevent.null() for function in self.functions.itervalues(): total = inevent.aggregate(total, function[inevent]) self._integrate_function(function, outevent, inevent) self[outevent] = total
[docs] def _integrate_function(self, function, outevent, inevent): if function.cycle is not None: return self._integrate_cycle(function.cycle, outevent, inevent) else: if outevent not in function: total = function[inevent] for call in function.calls.itervalues(): if call.callee_id != function.id: total += self._integrate_call(call, outevent, inevent) function[outevent] = total return function[outevent]
[docs] def _integrate_call(self, call, outevent, inevent): assert outevent not in call assert call.ratio is not None callee = self.functions[call.callee_id] subtotal = call.ratio *self._integrate_function(callee, outevent, inevent) call[outevent] = subtotal return subtotal
[docs] def _integrate_cycle(self, cycle, outevent, inevent): if outevent not in cycle: # Compute the outevent for the whole cycle total = inevent.null() for member in cycle.functions: subtotal = member[inevent] for call in member.calls.itervalues(): callee = self.functions[call.callee_id] if callee.cycle is not cycle: subtotal += self._integrate_call(call, outevent, inevent) total += subtotal cycle[outevent] = total # Compute the time propagated to callers of this cycle callees = {} for function in self.functions.itervalues(): if function.cycle is not cycle: for call in function.calls.itervalues(): callee = self.functions[call.callee_id] if callee.cycle is cycle: try: callees[callee] += call.ratio except KeyError: callees[callee] = call.ratio for member in cycle.functions: member[outevent] = outevent.null() for callee, call_ratio in callees.iteritems(): ranks = {} call_ratios = {} partials = {} self._rank_cycle_function(cycle, callee, 0, ranks) self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set()) partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent) assert partial == max(partials.values()) assert not total or abs(1.0 - partial/(call_ratio*total)) <= 0.001 return cycle[outevent]
[docs] def _rank_cycle_function(self, cycle, function, rank, ranks): if function not in ranks or ranks[function] > rank: ranks[function] = rank for call in function.calls.itervalues(): if call.callee_id != function.id: callee = self.functions[call.callee_id] if callee.cycle is cycle: self._rank_cycle_function(cycle, callee, rank + 1, ranks)
[docs] def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited): if function not in visited: visited.add(function) for call in function.calls.itervalues(): if call.callee_id != function.id: callee = self.functions[call.callee_id] if callee.cycle is cycle: if ranks[callee] > ranks[function]: call_ratios[callee] = call_ratios.get(callee, 0.0) + call.ratio self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited)
[docs] def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent): if function not in partials: partial = partial_ratio*function[inevent] for call in function.calls.itervalues(): if call.callee_id != function.id: callee = self.functions[call.callee_id] if callee.cycle is not cycle: assert outevent in call partial += partial_ratio*call[outevent] else: if ranks[callee] > ranks[function]: callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent) call_ratio = ratio(call.ratio, call_ratios[callee]) call_partial = call_ratio*callee_partial try: call[outevent] += call_partial except UndefinedEvent: call[outevent] = call_partial partial += call_partial partials[function] = partial try: function[outevent] += partial except UndefinedEvent: function[outevent] = partial return partials[function]
[docs] def aggregate(self, event): """Aggregate an event for the whole profile.""" total = event.null() for function in self.functions.itervalues(): try: total = event.aggregate(total, function[event]) except UndefinedEvent: return self[event] = total
[docs] def ratio(self, outevent, inevent): assert outevent not in self assert inevent in self for function in self.functions.itervalues(): assert outevent not in function assert inevent in function function[outevent] = ratio(function[inevent], self[inevent]) for call in function.calls.itervalues(): assert outevent not in call if inevent in call: call[outevent] = ratio(call[inevent], self[inevent]) self[outevent] = 1.0
[docs] def prune(self, node_thres, edge_thres,restrict_string): """Prune the profile""" # compute the prune ratios for function in self.functions.itervalues(): try: function.weight = function[TOTAL_TIME_RATIO] except UndefinedEvent: pass for call in function.calls.itervalues(): callee = self.functions[call.callee_id] if TOTAL_TIME_RATIO in call: # handle exact cases first call.weight = call[TOTAL_TIME_RATIO] else: try: # make a safe estimate call.weight = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO]) except UndefinedEvent: pass # prune the nodes for function_id in self.functions.keys(): function = self.functions[function_id] if function.weight is not None: if function.weight < node_thres: del self.functions[function_id] for function_id in self.functions.keys(): function = self.functions[function_id] #print function #print function.path if not restrict_string in function.path: del self.functions[function_id] # prune the egdes for function in self.functions.itervalues(): for callee_id in function.calls.keys(): call = function.calls[callee_id] if callee_id not in self.functions or call.weight is not None and call.weight < edge_thres: del function.calls[callee_id]
[docs] def dump(self): for function in self.functions.itervalues(): sys.stderr.write('Function %s:\n' % (function.name,)) self._dump_events(function.events) for call in function.calls.itervalues(): callee = self.functions[call.callee_id] sys.stderr.write(' Call %s:\n' % (callee.name,)) self._dump_events(call.events) for cycle in self.cycles: sys.stderr.write('Cycle:\n') self._dump_events(cycle.events) for function in cycle.functions: sys.stderr.write(' Function %s\n' % (function.name,))
[docs] def _dump_events(self, events): for event, value in events.iteritems(): sys.stderr.write(' %s: %s\n' % (event.name, event.format(value)))
[docs]class Struct: """Masquerade a dictionary with a structure-like behavior."""
[docs] def __init__(self, attrs = None): if attrs is None: attrs = {} self.__dict__['_attrs'] = attrs
[docs] def __getattr__(self, name): try: return self._attrs[name] except KeyError: raise AttributeError(name)
[docs] def __setattr__(self, name, value): self._attrs[name] = value
[docs] def __str__(self): return str(self._attrs)
[docs] def __repr__(self): return repr(self._attrs)
[docs]class ParseError(Exception): """Raised when parsing to signal mismatches."""
[docs] def __init__(self, msg, line): self.msg = msg # TODO: store more source line information self.line = line
[docs] def __str__(self): return '%s: %r' % (self.msg, self.line)
[docs]class Parser: """Parser interface."""
[docs] def __init__(self): pass
[docs] def parse(self): raise NotImplementedError
[docs]class LineParser(Parser): """Base class for parsers that read line-based formats."""
[docs] def __init__(self, file): Parser.__init__(self) self._file = file self.__line = None self.__eof = False self.line_no = 0
[docs] def readline(self): line = self._file.readline() if not line: self.__line = '' self.__eof = True else: self.line_no += 1 self.__line = line.rstrip('\r\n')
[docs] def lookahead(self): assert self.__line is not None return self.__line
[docs] def consume(self): assert self.__line is not None line = self.__line self.readline() return line
[docs] def eof(self): assert self.__line is not None return self.__eof
XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4)
[docs]class XmlToken:
[docs] def __init__(self, type, name_or_data, attrs = None, line = None, column = None): assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF) self.type = type self.name_or_data = name_or_data self.attrs = attrs self.line = line self.column = column
[docs] def __str__(self): if self.type == XML_ELEMENT_START: return '<' + self.name_or_data + ' ...>' if self.type == XML_ELEMENT_END: return '</' + self.name_or_data + '>' if self.type == XML_CHARACTER_DATA: return self.name_or_data if self.type == XML_EOF: return 'end of file' assert 0
[docs]class XmlTokenizer: """Expat based XML tokenizer."""
[docs] def __init__(self, fp, skip_ws = True): self.fp = fp self.tokens = [] self.index = 0 self.final = False self.skip_ws = skip_ws self.character_pos = 0, 0 self.character_data = '' self.parser = xml.parsers.expat.ParserCreate() self.parser.StartElementHandler = self.handle_element_start self.parser.EndElementHandler = self.handle_element_end self.parser.CharacterDataHandler = self.handle_character_data
[docs] def handle_element_start(self, name, attributes): self.finish_character_data() line, column = self.pos() token = XmlToken(XML_ELEMENT_START, name, attributes, line, column) self.tokens.append(token)
[docs] def handle_element_end(self, name): self.finish_character_data() line, column = self.pos() token = XmlToken(XML_ELEMENT_END, name, None, line, column) self.tokens.append(token)
[docs] def handle_character_data(self, data): if not self.character_data: self.character_pos = self.pos() self.character_data += data
[docs] def finish_character_data(self): if self.character_data: if not self.skip_ws or not self.character_data.isspace(): line, column = self.character_pos token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column) self.tokens.append(token) self.character_data = ''
[docs] def next(self): size = 16*1024 while self.index >= len(self.tokens) and not self.final: self.tokens = [] self.index = 0 data = self.fp.read(size) self.final = len(data) < size try: self.parser.Parse(data, self.final) except xml.parsers.expat.ExpatError, e: #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS: if e.code == 3: pass else: raise e if self.index >= len(self.tokens): line, column = self.pos() token = XmlToken(XML_EOF, None, None, line, column) else: token = self.tokens[self.index] self.index += 1 return token
[docs] def pos(self): return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
[docs]class XmlTokenMismatch(Exception):
[docs] def __init__(self, expected, found): self.expected = expected self.found = found
[docs] def __str__(self): return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
[docs]class XmlParser(Parser): """Base XML document parser."""
[docs] def __init__(self, fp): Parser.__init__(self) self.tokenizer = XmlTokenizer(fp) self.consume()
[docs] def consume(self): self.token = self.tokenizer.next()
[docs] def match_element_start(self, name): return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name
[docs] def match_element_end(self, name): return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name
[docs] def element_start(self, name): while self.token.type == XML_CHARACTER_DATA: self.consume() if self.token.type != XML_ELEMENT_START: raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token) if self.token.name_or_data != name: raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token) attrs = self.token.attrs self.consume() return attrs
[docs] def element_end(self, name): while self.token.type == XML_CHARACTER_DATA: self.consume() if self.token.type != XML_ELEMENT_END: raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token) if self.token.name_or_data != name: raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token) self.consume()
[docs] def character_data(self, strip = True): data = '' while self.token.type == XML_CHARACTER_DATA: data += self.token.name_or_data self.consume() if strip: data = data.strip() return data
[docs]class GprofParser(Parser): """Parser for GNU gprof output. .. seealso:: - Chapter "Interpreting gprof's Output" from the GNU gprof manual http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph - File "cg_print.c" from the GNU gprof source code http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src """
[docs] def __init__(self, fp): Parser.__init__(self) self.fp = fp self.functions = {} self.cycles = {}
[docs] def readline(self): line = self.fp.readline() if not line: sys.stderr.write('error: unexpected end of file\n') sys.exit(1) line = line.rstrip('\r\n') return line
_int_re = re.compile(r'^\d+$') _float_re = re.compile(r'^\d+\.\d+$')
[docs] def translate(self, mo): """Extract a structure from a match object, while translating the types in the process.""" attrs = {} groupdict = mo.groupdict() for name, value in groupdict.iteritems(): if value is None: value = None elif self._int_re.match(value): value = int(value) elif self._float_re.match(value): value = float(value) attrs[name] = (value) return Struct(attrs)
_cg_header_re = re.compile( # original gprof header r'^\s+called/total\s+parents\s*$|' + r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' + r'^\s+called/total\s+children\s*$|' + # GNU gprof header r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$' ) _cg_ignore_re = re.compile( # spontaneous r'^\s+<spontaneous>\s*$|' # internal calls (such as "mcount") r'^.*\((\d+)\)$' ) _cg_primary_re = re.compile( r'^\[(?P<index>\d+)\]?' + r'\s+(?P<percentage_time>\d+\.\d+)' + r'\s+(?P<self>\d+\.\d+)' + r'\s+(?P<descendants>\d+\.\d+)' + r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + r'\s+(?P<name>\S.*?)' + r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + r'\s\[(\d+)\]$' ) _cg_parent_re = re.compile( r'^\s+(?P<self>\d+\.\d+)?' + r'\s+(?P<descendants>\d+\.\d+)?' + r'\s+(?P<called>\d+)(?:/(?P<called_total>\d+))?' + r'\s+(?P<name>\S.*?)' + r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + r'\s\[(?P<index>\d+)\]$' ) _cg_child_re = _cg_parent_re _cg_cycle_header_re = re.compile( r'^\[(?P<index>\d+)\]?' + r'\s+(?P<percentage_time>\d+\.\d+)' + r'\s+(?P<self>\d+\.\d+)' + r'\s+(?P<descendants>\d+\.\d+)' + r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' + r'\s\[(\d+)\]$' ) _cg_cycle_member_re = re.compile( r'^\s+(?P<self>\d+\.\d+)?' + r'\s+(?P<descendants>\d+\.\d+)?' + r'\s+(?P<called>\d+)(?:\+(?P<called_self>\d+))?' + r'\s+(?P<name>\S.*?)' + r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + r'\s\[(?P<index>\d+)\]$' ) _cg_sep_re = re.compile(r'^--+$')
[docs] def parse_function_entry(self, lines): parents = [] children = [] while True: if not lines: sys.stderr.write('warning: unexpected end of entry\n') line = lines.pop(0) if line.startswith('['): break # read function parent line mo = self._cg_parent_re.match(line) if not mo: if self._cg_ignore_re.match(line): continue sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) else: parent = self.translate(mo) parents.append(parent) # read primary line mo = self._cg_primary_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) return else: function = self.translate(mo) while lines: line = lines.pop(0) # read function subroutine line mo = self._cg_child_re.match(line) if not mo: if self._cg_ignore_re.match(line): continue sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) else: child = self.translate(mo) children.append(child) function.parents = parents function.children = children self.functions[function.index] = function
[docs] def parse_cycle_entry(self, lines): # read cycle header line line = lines[0] mo = self._cg_cycle_header_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) return cycle = self.translate(mo) # read cycle member lines cycle.functions = [] for line in lines[1:]: mo = self._cg_cycle_member_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) continue call = self.translate(mo) cycle.functions.append(call) self.cycles[cycle.cycle] = cycle
[docs] def parse_cg_entry(self, lines): if lines[0].startswith("["): self.parse_cycle_entry(lines) else: self.parse_function_entry(lines)
[docs] def parse_cg(self): """Parse the call graph.""" # skip call graph header while not self._cg_header_re.match(self.readline()): pass line = self.readline() while self._cg_header_re.match(line): line = self.readline() # process call graph entries entry_lines = [] while line != '\014': # form feed if line and not line.isspace(): if self._cg_sep_re.match(line): self.parse_cg_entry(entry_lines) entry_lines = [] else: entry_lines.append(line) line = self.readline()
[docs] def parse(self): self.parse_cg() self.fp.close() profile = Profile() profile[TIME] = 0.0 cycles = {} for index in self.cycles.iterkeys(): cycles[index] = Cycle() for entry in self.functions.itervalues(): # populate the function function = Function(entry.index, entry.name) function[TIME] = entry.self if entry.called is not None: function.called = entry.called if entry.called_self is not None: call = Call(entry.index) call[CALLS] = entry.called_self function.called += entry.called_self # populate the function calls for child in entry.children: call = Call(child.index) assert child.called is not None call[CALLS] = child.called if child.index not in self.functions: # NOTE: functions that were never called but were discovered by gprof's # static call graph analysis dont have a call graph entry so we need # to add them here missing = Function(child.index, child.name) function[TIME] = 0.0 function.called = 0 profile.add_function(missing) function.add_call(call) profile.add_function(function) if entry.cycle is not None: try: cycle = cycles[entry.cycle] except KeyError: sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle) cycle = Cycle() cycles[entry.cycle] = cycle cycle.add_function(function) profile[TIME] = profile[TIME] + function[TIME] for cycle in cycles.itervalues(): profile.add_cycle(cycle) # Compute derived events profile.validate() profile.ratio(TIME_RATIO, TIME) profile.call_ratios(CALLS) profile.integrate(TOTAL_TIME, TIME) profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) return profile
[docs]class CallgrindParser(LineParser): """Parser for valgrind's callgrind tool. See also: - http://valgrind.org/docs/manual/cl-format.html """ _call_re = re.compile('^calls=\s*(\d+)\s+((\d+|\+\d+|-\d+|\*)\s+)+$')
[docs] def __init__(self, infile): LineParser.__init__(self, infile) # Textual positions self.position_ids = {} self.positions = {} # Numeric positions self.num_positions = 1 self.cost_positions = ['line'] self.last_positions = [0] # Events self.num_events = 0 self.cost_events = [] self.profile = Profile() self.profile[SAMPLES] = 0
[docs] def parse(self): # read lookahead self.readline() self.parse_key('version') self.parse_key('creator') self.parse_part() # compute derived data self.profile.validate() self.profile.find_cycles() self.profile.ratio(TIME_RATIO, SAMPLES) self.profile.call_ratios(CALLS) self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return self.profile
[docs] def parse_part(self): while self.parse_header_line(): pass while self.parse_body_line(): pass if not self.eof() and False: sys.stderr.write('warning: line %u: unexpected line\n' % self.line_no) sys.stderr.write('%s\n' % self.lookahead()) return True
[docs] def parse_header_line(self): return \ self.parse_empty() or \ self.parse_comment() or \ self.parse_part_detail() or \ self.parse_description() or \ self.parse_event_specification() or \ self.parse_cost_line_def() or \ self.parse_cost_summary()
_detail_keys = set(('cmd', 'pid', 'thread', 'part'))
[docs] def parse_part_detail(self): return self.parse_keys(self._detail_keys)
[docs] def parse_description(self): return self.parse_key('desc') is not None
[docs] def parse_event_specification(self): event = self.parse_key('event') if event is None: return False return True
[docs] def parse_cost_line_def(self): pair = self.parse_keys(('events', 'positions')) if pair is None: return False key, value = pair items = value.split() if key == 'events': self.num_events = len(items) self.cost_events = items if key == 'positions': self.num_positions = len(items) self.cost_positions = items self.last_positions = [0]*self.num_positions return True
[docs] def parse_cost_summary(self): pair = self.parse_keys(('summary', 'totals')) if pair is None: return False return True
[docs] def parse_body_line(self): return \ self.parse_empty() or \ self.parse_comment() or \ self.parse_cost_line() or \ self.parse_position_spec() or \ self.parse_association_spec()
__subpos_re = r'(0x[0-9a-fA-F]+|\d+|\+\d+|-\d+|\*)' _cost_re = re.compile(r'^' + __subpos_re + r'( +' + __subpos_re + r')*' + r'( +\d+)*' + '$')
[docs] def parse_cost_line(self, calls=None): line = self.lookahead().rstrip() mo = self._cost_re.match(line) if not mo: return False function = self.get_function() values = line.split(' ') assert len(values) <= self.num_positions + self.num_events positions = values[0 : self.num_positions] events = values[self.num_positions : ] events += ['0']*(self.num_events - len(events)) for i in range(self.num_positions): position = positions[i] if position == '*': position = self.last_positions[i] elif position[0] in '-+': position = self.last_positions[i] + int(position) elif position.startswith('0x'): position = int(position, 16) else: position = int(position) self.last_positions[i] = position events = map(float, events) if calls is None: function[SAMPLES] += events[0] self.profile[SAMPLES] += events[0] else: callee = self.get_callee() callee.called += calls try: call = function.calls[callee.id] except KeyError: call = Call(callee.id) call[CALLS] = calls call[SAMPLES] = events[0] function.add_call(call) else: call[CALLS] += calls call[SAMPLES] += events[0] self.consume() return True
[docs] def parse_association_spec(self): line = self.lookahead() if not line.startswith('calls='): return False _, values = line.split('=', 1) values = values.strip().split() calls = int(values[0]) call_position = values[1:] self.consume() self.parse_cost_line(calls) return True
_position_re = re.compile('^(?P<position>[cj]?(?:ob|fl|fi|fe|fn))=\s*(?:\((?P<id>\d+)\))?(?:\s*(?P<name>.+))?') _position_table_map = { 'ob': 'ob', 'fl': 'fl', 'fi': 'fl', 'fe': 'fl', 'fn': 'fn', 'cob': 'ob', 'cfl': 'fl', 'cfi': 'fl', 'cfe': 'fl', 'cfn': 'fn', 'jfi': 'fl', } _position_map = { 'ob': 'ob', 'fl': 'fl', 'fi': 'fl', 'fe': 'fl', 'fn': 'fn', 'cob': 'cob', 'cfl': 'cfl', 'cfi': 'cfl', 'cfe': 'cfl', 'cfn': 'cfn', 'jfi': 'jfi', }
[docs] def parse_position_spec(self): line = self.lookahead() if line.startswith('jump=') or line.startswith('jcnd='): self.consume() return True mo = self._position_re.match(line) if not mo: return False position, id, name = mo.groups() if id: table = self._position_table_map[position] if name: self.position_ids[(table, id)] = name else: name = self.position_ids.get((table, id), '') self.positions[self._position_map[position]] = name self.consume() return True
[docs] def parse_empty(self): if self.eof(): return False line = self.lookahead() if line.strip(): return False self.consume() return True
[docs] def parse_comment(self): line = self.lookahead() if not line.startswith('#'): return False self.consume() return True
_key_re = re.compile(r'^(\w+):')
[docs] def parse_key(self, key): pair = self.parse_keys((key,)) if not pair: return None key, value = pair return value
[docs] def parse_keys(self, keys): line = self.lookahead() mo = self._key_re.match(line) if not mo: return None key, value = line.split(':', 1) if key not in keys: return None value = value.strip() self.consume() return key, value
[docs] def make_function(self, module, filename, name): # FIXME: module and filename are not being tracked reliably #id = '|'.join((module, filename, name)) id = name try: function = self.profile.functions[id] except KeyError: function = Function(id, name) function[SAMPLES] = 0 function.called = 0 self.profile.add_function(function) return function
[docs] def get_function(self): module = self.positions.get('ob', '') filename = self.positions.get('fl', '') function = self.positions.get('fn', '') return self.make_function(module, filename, function)
[docs] def get_callee(self): module = self.positions.get('cob', '') filename = self.positions.get('cfi', '') function = self.positions.get('cfn', '') return self.make_function(module, filename, function)
[docs]class OprofileParser(LineParser): """Parser for oprofile callgraph output. See also: - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph """ _fields_re = { 'samples': r'(\d+)', '%': r'(\S+)', 'linenr info': r'(?P<source>\(no location information\)|\S+:\d+)', 'image name': r'(?P<image>\S+(?:\s\(tgid:[^)]*\))?)', 'app name': r'(?P<application>\S+)', 'symbol name': r'(?P<symbol>\(no symbols\)|.+?)', }
[docs] def __init__(self, infile): LineParser.__init__(self, infile) self.entries = {} self.entry_re = None
[docs] def add_entry(self, callers, function, callees): try: entry = self.entries[function.id] except KeyError: self.entries[function.id] = (callers, function, callees) else: callers_total, function_total, callees_total = entry self.update_subentries_dict(callers_total, callers) function_total.samples += function.samples self.update_subentries_dict(callees_total, callees)
[docs] def update_subentries_dict(self, totals, partials): for partial in partials.itervalues(): try: total = totals[partial.id] except KeyError: totals[partial.id] = partial else: total.samples += partial.samples
[docs] def parse(self): # read lookahead self.readline() self.parse_header() while self.lookahead(): self.parse_entry() profile = Profile() reverse_call_samples = {} # populate the profile profile[SAMPLES] = 0 for _callers, _function, _callees in self.entries.itervalues(): function = Function(_function.id, _function.name) function[SAMPLES] = _function.samples profile.add_function(function) profile[SAMPLES] += _function.samples if _function.application: function.process = os.path.basename(_function.application) if _function.image: function.module = os.path.basename(_function.image) total_callee_samples = 0 for _callee in _callees.itervalues(): total_callee_samples += _callee.samples for _callee in _callees.itervalues(): if not _callee.self: call = Call(_callee.id) call[SAMPLES2] = _callee.samples function.add_call(call) # compute derived data profile.validate() profile.find_cycles() profile.ratio(TIME_RATIO, SAMPLES) profile.call_ratios(SAMPLES2) profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return profile
[docs] def parse_header(self): while not self.match_header(): self.consume() line = self.lookahead() fields = re.split(r'\s\s+', line) entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P<self>\s+\[self\])?$' self.entry_re = re.compile(entry_re) self.skip_separator()
[docs] def parse_entry(self): callers = self.parse_subentries() if self.match_primary(): function = self.parse_subentry() if function is not None: callees = self.parse_subentries() self.add_entry(callers, function, callees) self.skip_separator()
[docs] def parse_subentries(self): subentries = {} while self.match_secondary(): subentry = self.parse_subentry() subentries[subentry.id] = subentry return subentries
[docs] def parse_subentry(self): entry = Struct() line = self.consume() mo = self.entry_re.match(line) if not mo: raise ParseError('failed to parse', line) fields = mo.groupdict() entry.samples = int(mo.group(1)) if 'source' in fields and fields['source'] != '(no location information)': source = fields['source'] filename, lineno = source.split(':') entry.filename = filename entry.lineno = int(lineno) else: source = '' entry.filename = None entry.lineno = None entry.image = fields.get('image', '') entry.application = fields.get('application', '') if 'symbol' in fields and fields['symbol'] != '(no symbols)': entry.symbol = fields['symbol'] else: entry.symbol = '' if entry.symbol.startswith('"') and entry.symbol.endswith('"'): entry.symbol = entry.symbol[1:-1] entry.id = ':'.join((entry.application, entry.image, source, entry.symbol)) entry.self = fields.get('self', None) != None if entry.self: entry.id += ':self' if entry.symbol: entry.name = entry.symbol else: entry.name = entry.image return entry
[docs] def skip_separator(self): while not self.match_separator(): self.consume() self.consume()
[docs] def match_header(self): line = self.lookahead() return line.startswith('samples')
[docs] def match_separator(self): line = self.lookahead() return line == '-'*len(line)
[docs] def match_primary(self): line = self.lookahead() return not line[:1].isspace()
[docs] def match_secondary(self): line = self.lookahead() return line[:1].isspace()
[docs]class HProfParser(LineParser): """Parser for java hprof output See also: - http://java.sun.com/developer/technicalArticles/Programming/HPROF.html """ trace_re = re.compile(r'\t(.*)\((.*):(.*)\)') trace_id_re = re.compile(r'^TRACE (\d+):$')
[docs] def __init__(self, infile): LineParser.__init__(self, infile) self.traces = {} self.samples = {}
[docs] def parse(self): # read lookahead self.readline() while not self.lookahead().startswith('------'): self.consume() while not self.lookahead().startswith('TRACE '): self.consume() self.parse_traces() while not self.lookahead().startswith('CPU'): self.consume() self.parse_samples() # populate the profile profile = Profile() profile[SAMPLES] = 0 functions = {} # build up callgraph for id, trace in self.traces.iteritems(): if not id in self.samples: continue mtime = self.samples[id][0] last = None for func, file, line in trace: if not func in functions: function = Function(func, func) function[SAMPLES] = 0 profile.add_function(function) functions[func] = function function = functions[func] # allocate time to the deepest method in the trace if not last: function[SAMPLES] += mtime profile[SAMPLES] += mtime else: c = function.get_call(last) c[SAMPLES2] += mtime last = func # compute derived data profile.validate() profile.find_cycles() profile.ratio(TIME_RATIO, SAMPLES) profile.call_ratios(SAMPLES2) profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return profile
[docs] def parse_traces(self): while self.lookahead().startswith('TRACE '): self.parse_trace()
[docs] def parse_trace(self): l = self.consume() mo = self.trace_id_re.match(l) tid = mo.group(1) last = None trace = [] while self.lookahead().startswith('\t'): l = self.consume() match = self.trace_re.search(l) if not match: #sys.stderr.write('Invalid line: %s\n' % l) break else: function_name, file, line = match.groups() trace += [(function_name, file, line)] self.traces[int(tid)] = trace
[docs] def parse_samples(self): self.consume() self.consume() while not self.lookahead().startswith('CPU'): rank, percent_self, percent_accum, count, traceid, method = self.lookahead().split() self.samples[int(traceid)] = (int(count), method) self.consume()
[docs]class SysprofParser(XmlParser):
[docs] def __init__(self, stream): XmlParser.__init__(self, stream)
[docs] def parse(self): objects = {} nodes = {} self.element_start('profile') while self.token.type == XML_ELEMENT_START: if self.token.name_or_data == 'objects': assert not objects objects = self.parse_items('objects') elif self.token.name_or_data == 'nodes': assert not nodes nodes = self.parse_items('nodes') else: self.parse_value(self.token.name_or_data) self.element_end('profile') return self.build_profile(objects, nodes)
[docs] def parse_items(self, name): assert name[-1] == 's' items = {} self.element_start(name) while self.token.type == XML_ELEMENT_START: id, values = self.parse_item(name[:-1]) assert id not in items items[id] = values self.element_end(name) return items
[docs] def parse_item(self, name): attrs = self.element_start(name) id = int(attrs['id']) values = self.parse_values() self.element_end(name) return id, values
[docs] def parse_values(self): values = {} while self.token.type == XML_ELEMENT_START: name = self.token.name_or_data value = self.parse_value(name) assert name not in values values[name] = value return values
[docs] def parse_value(self, tag): self.element_start(tag) value = self.character_data() self.element_end(tag) if value.isdigit(): return int(value) if value.startswith('"') and value.endswith('"'): return value[1:-1] return value
[docs] def build_profile(self, objects, nodes): profile = Profile() profile[SAMPLES] = 0 for id, object in objects.iteritems(): # Ignore fake objects (process names, modules, "Everything", "kernel", etc.) if object['self'] == 0: continue function = Function(id, object['name']) function[SAMPLES] = object['self'] profile.add_function(function) profile[SAMPLES] += function[SAMPLES] for id, node in nodes.iteritems(): # Ignore fake calls if node['self'] == 0: continue # Find a non-ignored parent parent_id = node['parent'] while parent_id != 0: parent = nodes[parent_id] caller_id = parent['object'] if objects[caller_id]['self'] != 0: break parent_id = parent['parent'] if parent_id == 0: continue callee_id = node['object'] assert objects[caller_id]['self'] assert objects[callee_id]['self'] function = profile.functions[caller_id] samples = node['self'] try: call = function.calls[callee_id] except KeyError: call = Call(callee_id) call[SAMPLES2] = samples function.add_call(call) else: call[SAMPLES2] += samples # Compute derived events profile.validate() profile.find_cycles() profile.ratio(TIME_RATIO, SAMPLES) profile.call_ratios(SAMPLES2) profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return profile
[docs]class SharkParser(LineParser): """Parser for MacOSX Shark output. Author: tom@dbservice.com """
[docs] def __init__(self, infile): LineParser.__init__(self, infile) self.stack = [] self.entries = {}
[docs] def add_entry(self, function): try: entry = self.entries[function.id] except KeyError: self.entries[function.id] = (function, { }) else: function_total, callees_total = entry function_total.samples += function.samples
[docs] def add_callee(self, function, callee): func, callees = self.entries[function.id] try: entry = callees[callee.id] except KeyError: callees[callee.id] = callee else: entry.samples += callee.samples
[docs] def parse(self): self.readline() self.readline() self.readline() self.readline() match = re.compile(r'(?P<prefix>[|+ ]*)(?P<samples>\d+), (?P<symbol>[^,]+), (?P<image>.*)') while self.lookahead(): line = self.consume() mo = match.match(line) if not mo: raise ParseError('failed to parse', line) fields = mo.groupdict() prefix = len(fields.get('prefix', 0)) / 2 - 1 symbol = str(fields.get('symbol', 0)) image = str(fields.get('image', 0)) entry = Struct() entry.id = ':'.join([symbol, image]) entry.samples = int(fields.get('samples', 0)) entry.name = symbol entry.image = image # adjust the callstack if prefix < len(self.stack): del self.stack[prefix:] if prefix == len(self.stack): self.stack.append(entry) # if the callstack has had an entry, it's this functions caller if prefix > 0: self.add_callee(self.stack[prefix - 1], entry) self.add_entry(entry) profile = Profile() profile[SAMPLES] = 0 for _function, _callees in self.entries.itervalues(): function = Function(_function.id, _function.name) function[SAMPLES] = _function.samples profile.add_function(function) profile[SAMPLES] += _function.samples if _function.image: function.module = os.path.basename(_function.image) for _callee in _callees.itervalues(): call = Call(_callee.id) call[SAMPLES] = _callee.samples function.add_call(call) # compute derived data profile.validate() profile.find_cycles() profile.ratio(TIME_RATIO, SAMPLES) profile.call_ratios(SAMPLES) profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return profile
[docs]class XPerfParser(Parser): """Parser for CSVs generted by XPerf, from Microsoft Windows Performance Tools. """
[docs] def __init__(self, stream): Parser.__init__(self) self.stream = stream self.profile = Profile() self.profile[SAMPLES] = 0 self.column = {}
[docs] def parse(self): import csv reader = csv.reader( self.stream, delimiter = ',', quotechar = None, escapechar = None, doublequote = False, skipinitialspace = True, lineterminator = '\r\n', quoting = csv.QUOTE_NONE) it = iter(reader) row = reader.next() self.parse_header(row) for row in it: self.parse_row(row) # compute derived data self.profile.validate() self.profile.find_cycles() self.profile.ratio(TIME_RATIO, SAMPLES) self.profile.call_ratios(SAMPLES2) self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return self.profile
[docs] def parse_header(self, row): for column in range(len(row)): name = row[column] assert name not in self.column self.column[name] = column
[docs] def parse_row(self, row): fields = {} for name, column in self.column.iteritems(): value = row[column] for factory in int, float: try: value = factory(value) except ValueError: pass else: break fields[name] = value process = fields['Process Name'] symbol = fields['Module'] + '!' + fields['Function'] weight = fields['Weight'] count = fields['Count'] function = self.get_function(process, symbol) function[SAMPLES] += weight * count self.profile[SAMPLES] += weight * count stack = fields['Stack'] if stack != '?': stack = stack.split('/') assert stack[0] == '[Root]' if stack[-1] != symbol: # XXX: some cases the sampled function does not appear in the stack stack.append(symbol) caller = None for symbol in stack[1:]: callee = self.get_function(process, symbol) if caller is not None: try: call = caller.calls[callee.id] except KeyError: call = Call(callee.id) call[SAMPLES2] = count caller.add_call(call) else: call[SAMPLES2] += count caller = callee
[docs] def get_function(self, process, symbol): function_id = process + '!' + symbol try: function = self.profile.functions[function_id] except KeyError: module, name = symbol.split('!', 1) function = Function(function_id, name) function.process = process function.module = module function[SAMPLES] = 0 self.profile.add_function(function) return function
[docs]class SleepyParser(Parser): """Parser for GNU gprof output. See also: - http://www.codersnotes.com/sleepy/ - http://sleepygraph.sourceforge.net/ """
[docs] def __init__(self, filename): Parser.__init__(self) from zipfile import ZipFile self.database = ZipFile(filename) self.symbols = {} self.calls = {} self.profile = Profile()
_symbol_re = re.compile( r'^(?P<id>\w+)' + r'\s+"(?P<module>[^"]*)"' + r'\s+"(?P<procname>[^"]*)"' + r'\s+"(?P<sourcefile>[^"]*)"' + r'\s+(?P<sourceline>\d+)$' )
[docs] def parse_symbols(self): lines = self.database.read('symbols.txt').splitlines() for line in lines: mo = self._symbol_re.match(line) if mo: symbol_id, module, procname, sourcefile, sourceline = mo.groups() function_id = ':'.join([module, procname]) try: function = self.profile.functions[function_id] except KeyError: function = Function(function_id, procname) function.module = module function[SAMPLES] = 0 self.profile.add_function(function) self.symbols[symbol_id] = function
[docs] def parse_callstacks(self): lines = self.database.read("callstacks.txt").splitlines() for line in lines: fields = line.split() samples = int(fields[0]) callstack = fields[1:] callstack = [self.symbols[symbol_id] for symbol_id in callstack] callee = callstack[0] callee[SAMPLES] += samples self.profile[SAMPLES] += samples for caller in callstack[1:]: try: call = caller.calls[callee.id] except KeyError: call = Call(callee.id) call[SAMPLES2] = samples caller.add_call(call) else: call[SAMPLES2] += samples callee = caller
[docs] def parse(self): profile = self.profile profile[SAMPLES] = 0 self.parse_symbols() self.parse_callstacks() # Compute derived events profile.validate() profile.find_cycles() profile.ratio(TIME_RATIO, SAMPLES) profile.call_ratios(SAMPLES2) profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return profile
[docs]class AQtimeTable:
[docs] def __init__(self, name, fields): self.name = name self.fields = fields self.field_column = {} for column in range(len(fields)): self.field_column[fields[column]] = column self.rows = []
[docs] def __len__(self): return len(self.rows)
[docs] def __iter__(self): for values, children in self.rows: fields = {} for name, value in zip(self.fields, values): fields[name] = value children = dict([(child.name, child) for child in children]) yield fields, children raise StopIteration
[docs] def add_row(self, values, children=()): self.rows.append((values, children))
[docs]class AQtimeParser(XmlParser):
[docs] def __init__(self, stream): XmlParser.__init__(self, stream) self.tables = {}
[docs] def parse(self): self.element_start('AQtime_Results') self.parse_headers() results = self.parse_results() self.element_end('AQtime_Results') return self.build_profile(results)
[docs] def parse_headers(self): self.element_start('HEADERS') while self.token.type == XML_ELEMENT_START: self.parse_table_header() self.element_end('HEADERS')
[docs] def parse_table_header(self): attrs = self.element_start('TABLE_HEADER') name = attrs['NAME'] id = int(attrs['ID']) field_types = [] field_names = [] while self.token.type == XML_ELEMENT_START: field_type, field_name = self.parse_table_field() field_types.append(field_type) field_names.append(field_name) self.element_end('TABLE_HEADER') self.tables[id] = name, field_types, field_names
[docs] def parse_table_field(self): attrs = self.element_start('TABLE_FIELD') type = attrs['TYPE'] name = self.character_data() self.element_end('TABLE_FIELD') return type, name
[docs] def parse_results(self): self.element_start('RESULTS') table = self.parse_data() self.element_end('RESULTS') return table
[docs] def parse_data(self): rows = [] attrs = self.element_start('DATA') table_id = int(attrs['TABLE_ID']) table_name, field_types, field_names = self.tables[table_id] table = AQtimeTable(table_name, field_names) while self.token.type == XML_ELEMENT_START: row, children = self.parse_row(field_types) table.add_row(row, children) self.element_end('DATA') return table
[docs] def parse_row(self, field_types): row = [None]*len(field_types) children = [] self.element_start('ROW') while self.token.type == XML_ELEMENT_START: if self.token.name_or_data == 'FIELD': field_id, field_value = self.parse_field(field_types) row[field_id] = field_value elif self.token.name_or_data == 'CHILDREN': children = self.parse_children() else: raise XmlTokenMismatch("<FIELD ...> or <CHILDREN ...>", self.token) self.element_end('ROW') return row, children
[docs] def parse_field(self, field_types): attrs = self.element_start('FIELD') id = int(attrs['ID']) type = field_types[id] value = self.character_data() if type == 'Integer': value = int(value) elif type == 'Float': value = float(value) elif type == 'Address': value = int(value) elif type == 'String': pass else: assert False self.element_end('FIELD') return id, value
[docs] def parse_children(self): children = [] self.element_start('CHILDREN') while self.token.type == XML_ELEMENT_START: table = self.parse_data() assert table.name not in children children.append(table) self.element_end('CHILDREN') return children
[docs] def build_profile(self, results): assert results.name == 'Routines' profile = Profile() profile[TIME] = 0.0 for fields, tables in results: function = self.build_function(fields) children = tables['Children'] for fields, _ in children: call = self.build_call(fields) function.add_call(call) profile.add_function(function) profile[TIME] = profile[TIME] + function[TIME] profile[TOTAL_TIME] = profile[TIME] profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) return profile
[docs] def build_function(self, fields): function = Function(self.build_id(fields), self.build_name(fields)) function[TIME] = fields['Time'] function[TOTAL_TIME] = fields['Time with Children'] #function[TIME_RATIO] = fields['% Time']/100.0 #function[TOTAL_TIME_RATIO] = fields['% with Children']/100.0 return function
[docs] def build_call(self, fields): call = Call(self.build_id(fields)) call[TIME] = fields['Time'] call[TOTAL_TIME] = fields['Time with Children'] #call[TIME_RATIO] = fields['% Time']/100.0 #call[TOTAL_TIME_RATIO] = fields['% with Children']/100.0 return call
[docs] def build_id(self, fields): return ':'.join([fields['Module Name'], fields['Unit Name'], fields['Routine Name']])
[docs] def build_name(self, fields): # TODO: use more fields return fields['Routine Name']
[docs]class PstatsParser: """Parse Python profiling statistics saved with te pstats module."""
[docs] def __init__(self, *filename): import pstats try: self.stats = pstats.Stats(*filename) except ValueError: import hotshot.stats self.stats = hotshot.stats.load(filename[0]) self.profile = Profile() self.function_ids = {}
[docs] def get_function_name(self, (filename, line, name)): module = os.path.splitext(filename)[0] module = os.path.basename(module) return "%s:%d:%s" % (module, line, name)
[docs] def get_function(self, key): try: id = self.function_ids[key] except KeyError: id = len(self.function_ids) name = self.get_function_name(key) path = key[0] function = Function(id, name, path) self.profile.functions[id] = function self.function_ids[key] = id else: function = self.profile.functions[id] return function
[docs] def parse(self): self.profile[TIME] = 0.0 self.profile[TOTAL_TIME] = self.stats.total_tt for fn, (cc, nc, tt, ct, callers) in self.stats.stats.iteritems(): callee = self.get_function(fn) callee.called = nc callee[TOTAL_TIME] = ct callee[TIME] = tt self.profile[TIME] += tt self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct) for fn, value in callers.iteritems(): caller = self.get_function(fn) call = Call(callee.id) if isinstance(value, tuple): for i in xrange(0, len(value), 4): nc, cc, tt, ct = value[i:i+4] if CALLS in call: call[CALLS] += cc else: call[CALLS] = cc if TOTAL_TIME in call: call[TOTAL_TIME] += ct else: call[TOTAL_TIME] = ct else: call[CALLS] = value call[TOTAL_TIME] = ratio(value, nc)*ct caller.add_call(call) #self.stats.print_stats() #self.stats.print_callees() # Compute derived events self.profile.validate() self.profile.ratio(TIME_RATIO, TIME) self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) return self.profile
[docs]class Theme:
[docs] def __init__(self, bgcolor = (0.0, 0.0, 1.0), mincolor = (0.0, 0.0, 0.0), maxcolor = (0.0, 0.0, 1.0), fontname = "Arial", minfontsize = 10.0, maxfontsize = 10.0, minpenwidth = 0.5, maxpenwidth = 4.0, gamma = 2.2, skew = 1.0): self.bgcolor = bgcolor self.mincolor = mincolor self.maxcolor = maxcolor self.fontname = fontname self.minfontsize = minfontsize self.maxfontsize = maxfontsize self.minpenwidth = minpenwidth self.maxpenwidth = maxpenwidth self.gamma = gamma self.skew = skew
[docs] def graph_bgcolor(self): return self.hsl_to_rgb(*self.bgcolor)
[docs] def graph_fontname(self): return self.fontname
[docs] def graph_fontsize(self): return self.minfontsize
[docs] def node_bgcolor(self, weight): return self.color(weight)
[docs] def node_fgcolor(self, weight): return self.graph_bgcolor()
[docs] def node_fontsize(self, weight): return self.fontsize(weight)
[docs] def edge_color(self, weight): return self.color(weight)
[docs] def edge_fontsize(self, weight): return self.fontsize(weight)
[docs] def edge_penwidth(self, weight): return max(weight*self.maxpenwidth, self.minpenwidth)
[docs] def edge_arrowsize(self, weight): return 0.5 * math.sqrt(self.edge_penwidth(weight))
[docs] def fontsize(self, weight): return max(weight**2 * self.maxfontsize, self.minfontsize)
[docs] def color(self, weight): weight = min(max(weight, 0.0), 1.0) hmin, smin, lmin = self.mincolor hmax, smax, lmax = self.maxcolor if self.skew < 0: raise ValueError("Skew must be greater than 0") elif self.skew == 1.0: h = hmin + weight*(hmax - hmin) s = smin + weight*(smax - smin) l = lmin + weight*(lmax - lmin) else: base = self.skew h = hmin + ((hmax-hmin)*(-1.0 + (base ** weight)) / (base - 1.0)) s = smin + ((smax-smin)*(-1.0 + (base ** weight)) / (base - 1.0)) l = lmin + ((lmax-lmin)*(-1.0 + (base ** weight)) / (base - 1.0)) return self.hsl_to_rgb(h, s, l)
[docs] def hsl_to_rgb(self, h, s, l): """Convert a color from HSL color-model to RGB. See also: - http://www.w3.org/TR/css3-color/#hsl-color """ h = h % 1.0 s = min(max(s, 0.0), 1.0) l = min(max(l, 0.0), 1.0) if l <= 0.5: m2 = l*(s + 1.0) else: m2 = l + s - l*s m1 = l*2.0 - m2 r = self._hue_to_rgb(m1, m2, h + 1.0/3.0) g = self._hue_to_rgb(m1, m2, h) b = self._hue_to_rgb(m1, m2, h - 1.0/3.0) # Apply gamma correction r **= self.gamma g **= self.gamma b **= self.gamma return (r, g, b)
[docs] def _hue_to_rgb(self, m1, m2, h): if h < 0.0: h += 1.0 elif h > 1.0: h -= 1.0 if h*6 < 1.0: return m1 + (m2 - m1)*h*6.0 elif h*2 < 1.0: return m2 elif h*3 < 2.0: return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0 else: return m1
TEMPERATURE_COLORMAP = Theme( mincolor = (2.0/3.0, 0.80, 0.25), # dark blue maxcolor = (0.0, 1.0, 0.5), # satured red gamma = 1.0 ) PINK_COLORMAP = Theme( mincolor = (0.0, 1.0, 0.90), # pink maxcolor = (0.0, 1.0, 0.5), # satured red ) GRAY_COLORMAP = Theme( mincolor = (0.0, 0.0, 0.85), # light gray maxcolor = (0.0, 0.0, 0.0), # black ) BW_COLORMAP = Theme( minfontsize = 8.0, maxfontsize = 24.0, mincolor = (0.0, 0.0, 0.0), # black maxcolor = (0.0, 0.0, 0.0), # black minpenwidth = 0.1, maxpenwidth = 8.0, )
[docs]class DotWriter: """Writer for the DOT language. .. seealso:: - "The DOT Language" specification http://www.graphviz.org/doc/info/lang.html """
[docs] def __init__(self, fp): self.fp = fp
[docs] def graph(self, profile, theme): self.begin_graph() fontname = theme.graph_fontname() self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125) self.attr('node', fontname=fontname, shape="box", style="filled", fontcolor="white", width=0, height=0) self.attr('edge', fontname=fontname) for function in profile.functions.itervalues(): labels = [] if function.process is not None: labels.append(function.process) if function.module is not None: labels.append(function.module) labels.append(function.name) for event in TOTAL_TIME_RATIO, TIME_RATIO: if event in function.events: label = event.format(function[event]) labels.append(label) if function.called is not None: labels.append(u"%u\xd7" % (function.called,)) if function.weight is not None: weight = function.weight else: weight = 0.0 label = '\n'.join(labels) self.node(function.id, label = label, color = self.color(theme.node_bgcolor(weight)), fontcolor = self.color(theme.node_fgcolor(weight)), fontsize = "%.2f" % theme.node_fontsize(weight), ) for call in function.calls.itervalues(): callee = profile.functions[call.callee_id] labels = [] for event in TOTAL_TIME_RATIO, CALLS: if event in call.events: label = event.format(call[event]) labels.append(label) if call.weight is not None: weight = call.weight elif callee.weight is not None: weight = callee.weight else: weight = 0.0 label = '\n'.join(labels) self.edge(function.id, call.callee_id, label = label, color = self.color(theme.edge_color(weight)), fontcolor = self.color(theme.edge_color(weight)), fontsize = "%.2f" % theme.edge_fontsize(weight), penwidth = "%.2f" % theme.edge_penwidth(weight), labeldistance = "%.2f" % theme.edge_penwidth(weight), arrowsize = "%.2f" % theme.edge_arrowsize(weight), ) self.end_graph()
[docs] def begin_graph(self): self.write('digraph {\n')
[docs] def end_graph(self): self.write('}\n')
[docs] def attr(self, what, **attrs): self.write("\t") self.write(what) self.attr_list(attrs) self.write(";\n")
[docs] def node(self, node, **attrs): self.write("\t") self.id(node) self.attr_list(attrs) self.write(";\n")
[docs] def edge(self, src, dst, **attrs): self.write("\t") self.id(src) self.write(" -> ") self.id(dst) self.attr_list(attrs) self.write(";\n")
[docs] def attr_list(self, attrs): if not attrs: return self.write(' [') first = True for name, value in attrs.iteritems(): if first: first = False else: self.write(", ") self.id(name) self.write('=') self.id(value) self.write(']')
[docs] def id(self, id): if isinstance(id, (int, float)): s = str(id) elif isinstance(id, basestring): if id.isalnum() and not id.startswith('0x'): s = id else: s = self.escape(id) else: raise TypeError self.write(s)
[docs] def color(self, (r, g, b)): def float2int(f): if f <= 0.0: return 0 if f >= 1.0: return 255 return int(255.0*f + 0.5) return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)])
[docs] def escape(self, s): s = s.encode('utf-8') s = s.replace('\\', r'\\') s = s.replace('\n', r'\n') s = s.replace('\t', r'\t') s = s.replace('"', r'\"') return '"' + s + '"'
[docs] def write(self, s): self.fp.write(s)
[docs]class Main: """Main program.""" themes = { "color": TEMPERATURE_COLORMAP, "pink": PINK_COLORMAP, "gray": GRAY_COLORMAP, "bw": BW_COLORMAP, }
[docs] def main(self): """Main program.""" parser = optparse.OptionParser( usage="\n\t%prog [options] [file] ...", version="%%prog %s" % __version__) parser.add_option( '-o', '--output', metavar='FILE', type="string", dest="output", help="output filename [stdout]") parser.add_option( '-n', '--node-thres', metavar='PERCENTAGE', type="float", dest="node_thres", default=0.5, help="eliminate nodes below this threshold [default: %default]") parser.add_option( '-e', '--edge-thres', metavar='PERCENTAGE', type="float", dest="edge_thres", default=0.1, help="eliminate edges below this threshold [default: %default]") parser.add_option( '-f', '--format', type="choice", choices=('prof', 'callgrind', 'oprofile', 'hprof', 'sysprof', 'pstats', 'shark', 'sleepy', 'aqtime', 'xperf'), dest="format", default="prof", help="profile format: prof, callgrind, oprofile, hprof, sysprof, shark, sleepy, aqtime, pstats, or xperf [default: %default]") parser.add_option( '-c', '--colormap', type="choice", choices=('color', 'pink', 'gray', 'bw'), dest="theme", default="color", help="color map: color, pink, gray, or bw [default: %default]") parser.add_option( '-s', '--strip', action="store_true", dest="strip", default=False, help="strip function parameters, template parameters, and const modifiers from demangled C++ function names") parser.add_option( '-w', '--wrap', action="store_true", dest="wrap", default=False, help="wrap function names") parser.add_option( '-r', '--restrict', type="string", dest="restrict_string", default="", help="eliminate functions that do not contain this string in their pathname [default: None]") # add a new option to control skew of the colorization curve parser.add_option( '--skew', type="float", dest="theme_skew", default=1.0, help="skew the colorization curve. Values < 1.0 give more variety to lower percentages. Value > 1.0 give less variety to lower percentages") (self.options, self.args) = parser.parse_args(sys.argv[1:]) if len(self.args) > 1 and self.options.format != 'pstats': parser.error('incorrect number of arguments') try: self.theme = self.themes[self.options.theme] except KeyError: parser.error('invalid colormap \'%s\'' % self.options.theme) # set skew on the theme now that it has been picked. if self.options.theme_skew: self.theme.skew = self.options.theme_skew if self.options.format == 'prof': if not self.args: fp = sys.stdin else: fp = open(self.args[0], 'rt') parser = GprofParser(fp) elif self.options.format == 'callgrind': if not self.args: fp = sys.stdin else: fp = open(self.args[0], 'rt') parser = CallgrindParser(fp) elif self.options.format == 'oprofile': if not self.args: fp = sys.stdin else: fp = open(self.args[0], 'rt') parser = OprofileParser(fp) elif self.options.format == 'sysprof': if not self.args: fp = sys.stdin else: fp = open(self.args[0], 'rt') parser = SysprofParser(fp) elif self.options.format == 'hprof': if not self.args: fp = sys.stdin else: fp = open(self.args[0], 'rt') parser = HProfParser(fp) elif self.options.format == 'pstats': if not self.args: parser.error('at least a file must be specified for pstats input') parser = PstatsParser(*self.args) elif self.options.format == 'xperf': if not self.args: fp = sys.stdin else: fp = open(self.args[0], 'rt') parser = XPerfParser(fp) elif self.options.format == 'shark': if not self.args: fp = sys.stdin else: fp = open(self.args[0], 'rt') parser = SharkParser(fp) elif self.options.format == 'sleepy': if len(self.args) != 1: parser.error('exactly one file must be specified for sleepy input') parser = SleepyParser(self.args[0]) elif self.options.format == 'aqtime': if not self.args: fp = sys.stdin else: fp = open(self.args[0], 'rt') parser = AQtimeParser(fp) else: parser.error('invalid format \'%s\'' % self.options.format) self.profile = parser.parse() if self.options.output is None: self.output = sys.stdout else: self.output = open(self.options.output, 'wt') self.write_graph()
_parenthesis_re = re.compile(r'\([^()]*\)') _angles_re = re.compile(r'<[^<>]*>') _const_re = re.compile(r'\s+const$')
[docs] def strip_function_name(self, name): """Remove extraneous information from C++ demangled function names.""" # Strip function parameters from name by recursively removing paired parenthesis while True: name, n = self._parenthesis_re.subn('', name) if not n: break # Strip const qualifier name = self._const_re.sub('', name) # Strip template parameters from name by recursively removing paired angles while True: name, n = self._angles_re.subn('', name) if not n: break return name
[docs] def wrap_function_name(self, name): """Split the function name on multiple lines.""" if len(name) > 32: ratio = 2.0/3.0 height = max(int(len(name)/(1.0 - ratio) + 0.5), 1) width = max(len(name)/height, 32) # TODO: break lines in symbols name = textwrap.fill(name, width, break_long_words=False) # Take away spaces name = name.replace(", ", ",") name = name.replace("> >", ">>") name = name.replace("> >", ">>") # catch consecutive return name
[docs] def compress_function_name(self, name): """Compress function name according to the user preferences.""" if self.options.strip: name = self.strip_function_name(name) if self.options.wrap: name = self.wrap_function_name(name) # TODO: merge functions with same resulting name return name
[docs] def write_graph(self): dot = DotWriter(self.output) profile = self.profile profile.prune(self.options.node_thres/100.0, self.options.edge_thres/100.0, self.options.restrict_string) for function in profile.functions.itervalues(): function.name = self.compress_function_name(function.name) dot.graph(profile, self.theme)
if __name__ == '__main__': Main().main()