Source code for pySPACE.tools.gprof2dot

#!/usr/bin/env python
""" Generate a dot graph from the output of several profilers.

    Original script by Jose Fonseca at 
    http://code.google.com/p/jrfonseca/wiki/Gprof2Dot
    
    Usage: python gprof2dot.py -f pstats profiling_file -r pySPACE | dot -Tpng -o output.png
    
    where profiling_file is the file that is generated by the cProfile module.
    
    The option '-r', '--restrict' is there to  eliminate functions in the
    profiling, that do not contain this string in their pathname [default: None].
    This option was added by Hendrik Woehrle and Mario Michael Krell.
    Furthermore some minor documentation changes were applied.

    For profiling pySPACE look at the documentation of pySPACE, especially launch.py
    and use the `--profile` option.

    Original LGPL 3+ licencing text::

        Copyright 2008-2009 Jose Fonseca

        This program is free software: you can redistribute it and/or modify it
        under the terms of the GNU Lesser General Public License as published
        by the Free Software Foundation, either version 3 of the License, or
        (at your option) any later version.

        This program is distributed in the hope that it will be useful,
        but WITHOUT ANY WARRANTY; without even the implied warranty of
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        GNU Lesser General Public License for more details.

        You should have received a copy of the GNU Lesser General Public License
        along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""

__version__ = "1.0"


import sys
import math
import os.path
import re
import textwrap
import optparse
import xml.parsers.expat


try:
    # Debugging helper module
    import debug
except ImportError:
    pass


[docs]def times(x):
    return u"%u\xd7" % (x,)

[docs]def percentage(p):
    return "%.02f%%" % (p*100.0,)

[docs]def add(a, b):
    return a + b

[docs]def equal(a, b):
    if a == b:
        return a
    else:
        return None

[docs]def fail(a, b):
    assert False


tol = 2 ** -23

[docs]def ratio(numerator, denominator):
    try:
        ratio = float(numerator)/float(denominator)
    except ZeroDivisionError:
        # 0/0 is undefined, but 1.0 yields more useful results
        return 1.0
    if ratio < 0.0:
        if ratio < -tol:
            sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator))
        return 0.0
    if ratio > 1.0:
        if ratio > 1.0 + tol:
            sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator))
        return 1.0
    return ratio


[docs]class UndefinedEvent(Exception):
    """Raised when attempting to get an event which is undefined."""
    
[docs]    def __init__(self, event):
        Exception.__init__(self)
        self.event = event

[docs]    def __str__(self):
        return 'unspecified event %s' % self.event.name


[docs]class Event(object):
    """Describe a kind of event, and its basic operations."""

[docs]    def __init__(self, name, null, aggregator, formatter = str):
        self.name = name
        self._null = null
        self._aggregator = aggregator
        self._formatter = formatter

[docs]    def __eq__(self, other):
        return self is other

[docs]    def __hash__(self):
        return id(self)

[docs]    def null(self):
        return self._null

[docs]    def aggregate(self, val1, val2):
        """Aggregate two event values."""
        assert val1 is not None
        assert val2 is not None
        return self._aggregator(val1, val2)
    
[docs]    def format(self, val):
        """Format an event value."""
        assert val is not None
        return self._formatter(val)


CALLS = Event("Calls", 0, add, times)
SAMPLES = Event("Samples", 0, add)
SAMPLES2 = Event("Samples", 0, add)

TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')')
TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')')
TOTAL_TIME = Event("Total time", 0.0, fail)
TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage)


[docs]class Object(object):
    """Base class for all objects in profile which can store events."""

[docs]    def __init__(self, events=None):
        if events is None:
            self.events = {}
        else:
            self.events = events

[docs]    def __hash__(self):
        return id(self)

[docs]    def __eq__(self, other):
        return self is other

[docs]    def __contains__(self, event):
        return event in self.events
    
[docs]    def __getitem__(self, event):
        try:
            return self.events[event]
        except KeyError:
            raise UndefinedEvent(event)
    
[docs]    def __setitem__(self, event, value):
        if value is None:
            if event in self.events:
                del self.events[event]
        else:
            self.events[event] = value


[docs]class Call(Object):
    """A call between functions.
    
    There should be at most one call object for every pair of functions.
    """

[docs]    def __init__(self, callee_id):
        Object.__init__(self)
        self.callee_id = callee_id
        self.ratio = None
        self.weight = None


[docs]class Function(Object):
    """A function."""

[docs]    def __init__(self, id, name,path=None):
        Object.__init__(self)
        self.id = id
        self.name = name
        self.module = None
        self.process = None
        self.calls = {}
        self.called = None
        self.weight = None
        self.cycle = None
        self.path = path
    
[docs]    def add_call(self, call):
        if call.callee_id in self.calls:
            sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id)))
        self.calls[call.callee_id] = call

[docs]    def get_call(self, callee_id):
        if not callee_id in self.calls:
            call = Call(callee_id)
            call[SAMPLES] = 0
            call[SAMPLES2] = 0
            call[CALLS] = 0
            self.calls[callee_id] = call
        return self.calls[callee_id]

    # TODO: write utility functions

[docs]    def __repr__(self):
        return self.name


[docs]class Cycle(Object):
    """A cycle made from recursive function calls."""

[docs]    def __init__(self):
        Object.__init__(self)
        # XXX: Do cycles need an id?
        self.functions = set()

[docs]    def add_function(self, function):
        assert function not in self.functions
        self.functions.add(function)
        # XXX: Aggregate events?
        if function.cycle is not None:
            for other in function.cycle.functions:
                if function not in self.functions:
                    self.add_function(other)
        function.cycle = self


[docs]class Profile(Object):
    """The whole profile."""

[docs]    def __init__(self):
        Object.__init__(self)
        self.functions = {}
        self.cycles = []

[docs]    def add_function(self, function):
        if function.id in self.functions:
            sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id)))
        self.functions[function.id] = function

[docs]    def add_cycle(self, cycle):
        self.cycles.append(cycle)

[docs]    def validate(self):
        """Validate the edges."""

        for function in self.functions.itervalues():
            for callee_id in function.calls.keys():
                assert function.calls[callee_id].callee_id == callee_id
                if callee_id not in self.functions:
                    sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name))
                    del function.calls[callee_id]

[docs]    def find_cycles(self):
        """Find cycles using Tarjan's strongly connected components algorithm."""

        # Apply the Tarjan's algorithm successively until all functions are visited
        visited = set()
        for function in self.functions.itervalues():
            if function not in visited:
                self._tarjan(function, 0, [], {}, {}, visited)
        cycles = []
        for function in self.functions.itervalues():
            if function.cycle is not None and function.cycle not in cycles:
                cycles.append(function.cycle)
        self.cycles = cycles
        if 0:
            for cycle in cycles:
                sys.stderr.write("Cycle:\n")
                for member in cycle.functions:
                    sys.stderr.write("\tFunction %s\n" % member.name)
    
[docs]    def _tarjan(self, function, order, stack, orders, lowlinks, visited):
        """Tarjan's strongly connected components algorithm.

        See also:
        - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm
        """

        visited.add(function)
        orders[function] = order
        lowlinks[function] = order
        order += 1
        pos = len(stack)
        stack.append(function)
        for call in function.calls.itervalues():
            callee = self.functions[call.callee_id]
            # TODO: use a set to optimize lookup
            if callee not in orders:
                order = self._tarjan(callee, order, stack, orders, lowlinks, visited)
                lowlinks[function] = min(lowlinks[function], lowlinks[callee])
            elif callee in stack:
                lowlinks[function] = min(lowlinks[function], orders[callee])
        if lowlinks[function] == orders[function]:
            # Strongly connected component found
            members = stack[pos:]
            del stack[pos:]
            if len(members) > 1:
                cycle = Cycle()
                for member in members:
                    cycle.add_function(member)
        return order

[docs]    def call_ratios(self, event):
        # Aggregate for incoming calls
        cycle_totals = {}
        for cycle in self.cycles:
            cycle_totals[cycle] = 0.0
        function_totals = {}
        for function in self.functions.itervalues():
            function_totals[function] = 0.0
        for function in self.functions.itervalues():
            for call in function.calls.itervalues():
                if call.callee_id != function.id:
                    callee = self.functions[call.callee_id]
                    function_totals[callee] += call[event]
                    if callee.cycle is not None and callee.cycle is not function.cycle:
                        cycle_totals[callee.cycle] += call[event]

        # Compute the ratios
        for function in self.functions.itervalues():
            for call in function.calls.itervalues():
                assert call.ratio is None
                if call.callee_id != function.id:
                    callee = self.functions[call.callee_id]
                    if callee.cycle is not None and callee.cycle is not function.cycle:
                        total = cycle_totals[callee.cycle]
                    else:
                        total = function_totals[callee]
                    call.ratio = ratio(call[event], total)

[docs]    def integrate(self, outevent, inevent):
        """Propagate function time ratio allong the function calls.

        Must be called after finding the cycles.

        See also:
        - http://citeseer.ist.psu.edu/graham82gprof.html
        """

        # Sanity checking
        assert outevent not in self
        for function in self.functions.itervalues():
            assert outevent not in function
            assert inevent in function
            for call in function.calls.itervalues():
                assert outevent not in call
                if call.callee_id != function.id:
                    assert call.ratio is not None

        # Aggregate the input for each cycle 
        for cycle in self.cycles:
            total = inevent.null()
            for function in self.functions.itervalues():
                total = inevent.aggregate(total, function[inevent])
            self[inevent] = total

        # Integrate along the edges
        total = inevent.null()
        for function in self.functions.itervalues():
            total = inevent.aggregate(total, function[inevent])
            self._integrate_function(function, outevent, inevent)
        self[outevent] = total

[docs]    def _integrate_function(self, function, outevent, inevent):
        if function.cycle is not None:
            return self._integrate_cycle(function.cycle, outevent, inevent)
        else:
            if outevent not in function:
                total = function[inevent]
                for call in function.calls.itervalues():
                    if call.callee_id != function.id:
                        total += self._integrate_call(call, outevent, inevent)
                function[outevent] = total
            return function[outevent]
    
[docs]    def _integrate_call(self, call, outevent, inevent):
        assert outevent not in call
        assert call.ratio is not None
        callee = self.functions[call.callee_id]
        subtotal = call.ratio *self._integrate_function(callee, outevent, inevent)
        call[outevent] = subtotal
        return subtotal

[docs]    def _integrate_cycle(self, cycle, outevent, inevent):
        if outevent not in cycle:

            # Compute the outevent for the whole cycle
            total = inevent.null()
            for member in cycle.functions:
                subtotal = member[inevent]
                for call in member.calls.itervalues():
                    callee = self.functions[call.callee_id]
                    if callee.cycle is not cycle:
                        subtotal += self._integrate_call(call, outevent, inevent)
                total += subtotal
            cycle[outevent] = total
            
            # Compute the time propagated to callers of this cycle
            callees = {}
            for function in self.functions.itervalues():
                if function.cycle is not cycle:
                    for call in function.calls.itervalues():
                        callee = self.functions[call.callee_id]
                        if callee.cycle is cycle:
                            try:
                                callees[callee] += call.ratio
                            except KeyError:
                                callees[callee] = call.ratio
            
            for member in cycle.functions:
                member[outevent] = outevent.null()

            for callee, call_ratio in callees.iteritems():
                ranks = {}
                call_ratios = {}
                partials = {}
                self._rank_cycle_function(cycle, callee, 0, ranks)
                self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set())
                partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent)
                assert partial == max(partials.values())
                assert not total or abs(1.0 - partial/(call_ratio*total)) <= 0.001

        return cycle[outevent]

[docs]    def _rank_cycle_function(self, cycle, function, rank, ranks):
        if function not in ranks or ranks[function] > rank:
            ranks[function] = rank
            for call in function.calls.itervalues():
                if call.callee_id != function.id:
                    callee = self.functions[call.callee_id]
                    if callee.cycle is cycle:
                        self._rank_cycle_function(cycle, callee, rank + 1, ranks)

[docs]    def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited):
        if function not in visited:
            visited.add(function)
            for call in function.calls.itervalues():
                if call.callee_id != function.id:
                    callee = self.functions[call.callee_id]
                    if callee.cycle is cycle:
                        if ranks[callee] > ranks[function]:
                            call_ratios[callee] = call_ratios.get(callee, 0.0) + call.ratio
                            self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited)

[docs]    def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent):
        if function not in partials:
            partial = partial_ratio*function[inevent]
            for call in function.calls.itervalues():
                if call.callee_id != function.id:
                    callee = self.functions[call.callee_id]
                    if callee.cycle is not cycle:
                        assert outevent in call
                        partial += partial_ratio*call[outevent]
                    else:
                        if ranks[callee] > ranks[function]:
                            callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent)
                            call_ratio = ratio(call.ratio, call_ratios[callee])
                            call_partial = call_ratio*callee_partial
                            try:
                                call[outevent] += call_partial
                            except UndefinedEvent:
                                call[outevent] = call_partial
                            partial += call_partial
            partials[function] = partial
            try:
                function[outevent] += partial
            except UndefinedEvent:
                function[outevent] = partial
        return partials[function]

[docs]    def aggregate(self, event):
        """Aggregate an event for the whole profile."""

        total = event.null()
        for function in self.functions.itervalues():
            try:
                total = event.aggregate(total, function[event])
            except UndefinedEvent:
                return
        self[event] = total

[docs]    def ratio(self, outevent, inevent):
        assert outevent not in self
        assert inevent in self
        for function in self.functions.itervalues():
            assert outevent not in function
            assert inevent in function
            function[outevent] = ratio(function[inevent], self[inevent])
            for call in function.calls.itervalues():
                assert outevent not in call
                if inevent in call:
                    call[outevent] = ratio(call[inevent], self[inevent])
        self[outevent] = 1.0

[docs]    def prune(self, node_thres, edge_thres,restrict_string):
        """Prune the profile"""

        # compute the prune ratios
        for function in self.functions.itervalues():
            try:
                function.weight = function[TOTAL_TIME_RATIO]
            except UndefinedEvent:
                pass

            for call in function.calls.itervalues():
                callee = self.functions[call.callee_id]

                if TOTAL_TIME_RATIO in call:
                    # handle exact cases first
                    call.weight = call[TOTAL_TIME_RATIO] 
                else:
                    try:
                        # make a safe estimate
                        call.weight = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO]) 
                    except UndefinedEvent:
                        pass

        # prune the nodes
        for function_id in self.functions.keys():
            function = self.functions[function_id]
            if function.weight is not None:
                if function.weight < node_thres:
                    del self.functions[function_id]
                    
        for function_id in self.functions.keys():
            function = self.functions[function_id]
            #print function
            #print function.path
            if not restrict_string in function.path:
                del self.functions[function_id]        

        # prune the egdes
        for function in self.functions.itervalues():
            for callee_id in function.calls.keys():
                call = function.calls[callee_id]
                if callee_id not in self.functions or call.weight is not None and call.weight < edge_thres:
                    del function.calls[callee_id]
                    

    
[docs]    def dump(self):
        for function in self.functions.itervalues():
            sys.stderr.write('Function %s:\n' % (function.name,))
            self._dump_events(function.events)
            for call in function.calls.itervalues():
                callee = self.functions[call.callee_id]
                sys.stderr.write('  Call %s:\n' % (callee.name,))
                self._dump_events(call.events)
        for cycle in self.cycles:
            sys.stderr.write('Cycle:\n')
            self._dump_events(cycle.events)
            for function in cycle.functions:
                sys.stderr.write('  Function %s\n' % (function.name,))

[docs]    def _dump_events(self, events):
        for event, value in events.iteritems():
            sys.stderr.write('    %s: %s\n' % (event.name, event.format(value)))


[docs]class Struct:
    """Masquerade a dictionary with a structure-like behavior."""

[docs]    def __init__(self, attrs = None):
        if attrs is None:
            attrs = {}
        self.__dict__['_attrs'] = attrs
    
[docs]    def __getattr__(self, name):
        try:
            return self._attrs[name]
        except KeyError:
            raise AttributeError(name)

[docs]    def __setattr__(self, name, value):
        self._attrs[name] = value

[docs]    def __str__(self):
        return str(self._attrs)

[docs]    def __repr__(self):
        return repr(self._attrs)
    

[docs]class ParseError(Exception):
    """Raised when parsing to signal mismatches."""

[docs]    def __init__(self, msg, line):
        self.msg = msg
        # TODO: store more source line information
        self.line = line

[docs]    def __str__(self):
        return '%s: %r' % (self.msg, self.line)


[docs]class Parser:
    """Parser interface."""

[docs]    def __init__(self):
        pass

[docs]    def parse(self):
        raise NotImplementedError

    
[docs]class LineParser(Parser):
    """Base class for parsers that read line-based formats."""

[docs]    def __init__(self, file):
        Parser.__init__(self)
        self._file = file
        self.__line = None
        self.__eof = False
        self.line_no = 0

[docs]    def readline(self):
        line = self._file.readline()
        if not line:
            self.__line = ''
            self.__eof = True
        else:
            self.line_no += 1
        self.__line = line.rstrip('\r\n')

[docs]    def lookahead(self):
        assert self.__line is not None
        return self.__line

[docs]    def consume(self):
        assert self.__line is not None
        line = self.__line
        self.readline()
        return line

[docs]    def eof(self):
        assert self.__line is not None
        return self.__eof


XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4)


[docs]class XmlToken:

[docs]    def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
        assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF)
        self.type = type
        self.name_or_data = name_or_data
        self.attrs = attrs
        self.line = line
        self.column = column

[docs]    def __str__(self):
        if self.type == XML_ELEMENT_START:
            return '<' + self.name_or_data + ' ...>'
        if self.type == XML_ELEMENT_END:
            return '</' + self.name_or_data + '>'
        if self.type == XML_CHARACTER_DATA:
            return self.name_or_data
        if self.type == XML_EOF:
            return 'end of file'
        assert 0


[docs]class XmlTokenizer:
    """Expat based XML tokenizer."""

[docs]    def __init__(self, fp, skip_ws = True):
        self.fp = fp
        self.tokens = []
        self.index = 0
        self.final = False
        self.skip_ws = skip_ws
        
        self.character_pos = 0, 0
        self.character_data = ''
        
        self.parser = xml.parsers.expat.ParserCreate()
        self.parser.StartElementHandler  = self.handle_element_start
        self.parser.EndElementHandler    = self.handle_element_end
        self.parser.CharacterDataHandler = self.handle_character_data
    
[docs]    def handle_element_start(self, name, attributes):
        self.finish_character_data()
        line, column = self.pos()
        token = XmlToken(XML_ELEMENT_START, name, attributes, line, column)
        self.tokens.append(token)
    
[docs]    def handle_element_end(self, name):
        self.finish_character_data()
        line, column = self.pos()
        token = XmlToken(XML_ELEMENT_END, name, None, line, column)
        self.tokens.append(token)

[docs]    def handle_character_data(self, data):
        if not self.character_data:
            self.character_pos = self.pos()
        self.character_data += data
    
[docs]    def finish_character_data(self):
        if self.character_data:
            if not self.skip_ws or not self.character_data.isspace(): 
                line, column = self.character_pos
                token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column)
                self.tokens.append(token)
            self.character_data = ''
    
[docs]    def next(self):
        size = 16*1024
        while self.index >= len(self.tokens) and not self.final:
            self.tokens = []
            self.index = 0
            data = self.fp.read(size)
            self.final = len(data) < size
            try:
                self.parser.Parse(data, self.final)
            except xml.parsers.expat.ExpatError, e:
                #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
                if e.code == 3:
                    pass
                else:
                    raise e
        if self.index >= len(self.tokens):
            line, column = self.pos()
            token = XmlToken(XML_EOF, None, None, line, column)
        else:
            token = self.tokens[self.index]
            self.index += 1
        return token

[docs]    def pos(self):
        return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber


[docs]class XmlTokenMismatch(Exception):

[docs]    def __init__(self, expected, found):
        self.expected = expected
        self.found = found

[docs]    def __str__(self):
        return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))


[docs]class XmlParser(Parser):
    """Base XML document parser."""

[docs]    def __init__(self, fp):
        Parser.__init__(self)
        self.tokenizer = XmlTokenizer(fp)
        self.consume()
    
[docs]    def consume(self):
        self.token = self.tokenizer.next()

[docs]    def match_element_start(self, name):
        return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name
    
[docs]    def match_element_end(self, name):
        return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name

[docs]    def element_start(self, name):
        while self.token.type == XML_CHARACTER_DATA:
            self.consume()
        if self.token.type != XML_ELEMENT_START:
            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
        if self.token.name_or_data != name:
            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
        attrs = self.token.attrs
        self.consume()
        return attrs
    
[docs]    def element_end(self, name):
        while self.token.type == XML_CHARACTER_DATA:
            self.consume()
        if self.token.type != XML_ELEMENT_END:
            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
        if self.token.name_or_data != name:
            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
        self.consume()

[docs]    def character_data(self, strip = True):
        data = ''
        while self.token.type == XML_CHARACTER_DATA:
            data += self.token.name_or_data
            self.consume()
        if strip:
            data = data.strip()
        return data


[docs]class GprofParser(Parser):
    """Parser for GNU gprof output.

    .. seealso::

        - Chapter "Interpreting gprof's Output" from the GNU gprof manual
          http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph

        - File "cg_print.c" from the GNU gprof source code
          http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src
    """

[docs]    def __init__(self, fp):
        Parser.__init__(self)
        self.fp = fp
        self.functions = {}
        self.cycles = {}

[docs]    def readline(self):
        line = self.fp.readline()
        if not line:
            sys.stderr.write('error: unexpected end of file\n')
            sys.exit(1)
        line = line.rstrip('\r\n')
        return line

    _int_re = re.compile(r'^\d+$')
    _float_re = re.compile(r'^\d+\.\d+$')

[docs]    def translate(self, mo):
        """Extract a structure from a match object, while translating the types in the process."""
        attrs = {}
        groupdict = mo.groupdict()
        for name, value in groupdict.iteritems():
            if value is None:
                value = None
            elif self._int_re.match(value):
                value = int(value)
            elif self._float_re.match(value):
                value = float(value)
            attrs[name] = (value)
        return Struct(attrs)

    _cg_header_re = re.compile(
        # original gprof header
        r'^\s+called/total\s+parents\s*$|' +
        r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' +
        r'^\s+called/total\s+children\s*$|' +
        # GNU gprof header
        r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$'
    )

    _cg_ignore_re = re.compile(
        # spontaneous
        r'^\s+<spontaneous>\s*$|'
        # internal calls (such as "mcount")
        r'^.*\((\d+)\)$'
    )

    _cg_primary_re = re.compile(
        r'^\[(?P<index>\d+)\]?' + 
        r'\s+(?P<percentage_time>\d+\.\d+)' + 
        r'\s+(?P<self>\d+\.\d+)' + 
        r'\s+(?P<descendants>\d+\.\d+)' + 
        r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + 
        r'\s+(?P<name>\S.*?)' +
        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
        r'\s\[(\d+)\]$'
    )

    _cg_parent_re = re.compile(
        r'^\s+(?P<self>\d+\.\d+)?' + 
        r'\s+(?P<descendants>\d+\.\d+)?' + 
        r'\s+(?P<called>\d+)(?:/(?P<called_total>\d+))?' + 
        r'\s+(?P<name>\S.*?)' +
        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
        r'\s\[(?P<index>\d+)\]$'
    )

    _cg_child_re = _cg_parent_re

    _cg_cycle_header_re = re.compile(
        r'^\[(?P<index>\d+)\]?' + 
        r'\s+(?P<percentage_time>\d+\.\d+)' + 
        r'\s+(?P<self>\d+\.\d+)' + 
        r'\s+(?P<descendants>\d+\.\d+)' + 
        r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + 
        r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' +
        r'\s\[(\d+)\]$'
    )

    _cg_cycle_member_re = re.compile(
        r'^\s+(?P<self>\d+\.\d+)?' + 
        r'\s+(?P<descendants>\d+\.\d+)?' + 
        r'\s+(?P<called>\d+)(?:\+(?P<called_self>\d+))?' + 
        r'\s+(?P<name>\S.*?)' +
        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
        r'\s\[(?P<index>\d+)\]$'
    )

    _cg_sep_re = re.compile(r'^--+$')

[docs]    def parse_function_entry(self, lines):
        parents = []
        children = []

        while True:
            if not lines:
                sys.stderr.write('warning: unexpected end of entry\n')
            line = lines.pop(0)
            if line.startswith('['):
                break
        
            # read function parent line
            mo = self._cg_parent_re.match(line)
            if not mo:
                if self._cg_ignore_re.match(line):
                    continue
                sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
            else:
                parent = self.translate(mo)
                parents.append(parent)

        # read primary line
        mo = self._cg_primary_re.match(line)
        if not mo:
            sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
            return
        else:
            function = self.translate(mo)

        while lines:
            line = lines.pop(0)
            
            # read function subroutine line
            mo = self._cg_child_re.match(line)
            if not mo:
                if self._cg_ignore_re.match(line):
                    continue
                sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
            else:
                child = self.translate(mo)
                children.append(child)
        
        function.parents = parents
        function.children = children

        self.functions[function.index] = function

[docs]    def parse_cycle_entry(self, lines):

        # read cycle header line
        line = lines[0]
        mo = self._cg_cycle_header_re.match(line)
        if not mo:
            sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
            return
        cycle = self.translate(mo)

        # read cycle member lines
        cycle.functions = []
        for line in lines[1:]:
            mo = self._cg_cycle_member_re.match(line)
            if not mo:
                sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
                continue
            call = self.translate(mo)
            cycle.functions.append(call)
        
        self.cycles[cycle.cycle] = cycle

[docs]    def parse_cg_entry(self, lines):
        if lines[0].startswith("["):
            self.parse_cycle_entry(lines)
        else:
            self.parse_function_entry(lines)

[docs]    def parse_cg(self):
        """Parse the call graph."""

        # skip call graph header
        while not self._cg_header_re.match(self.readline()):
            pass
        line = self.readline()
        while self._cg_header_re.match(line):
            line = self.readline()

        # process call graph entries
        entry_lines = []
        while line != '\014': # form feed
            if line and not line.isspace():
                if self._cg_sep_re.match(line):
                    self.parse_cg_entry(entry_lines)
                    entry_lines = []
                else:
                    entry_lines.append(line)            
            line = self.readline()
    
[docs]    def parse(self):
        self.parse_cg()
        self.fp.close()

        profile = Profile()
        profile[TIME] = 0.0
        
        cycles = {}
        for index in self.cycles.iterkeys():
            cycles[index] = Cycle()

        for entry in self.functions.itervalues():
            # populate the function
            function = Function(entry.index, entry.name)
            function[TIME] = entry.self
            if entry.called is not None:
                function.called = entry.called
            if entry.called_self is not None:
                call = Call(entry.index)
                call[CALLS] = entry.called_self
                function.called += entry.called_self
            
            # populate the function calls
            for child in entry.children:
                call = Call(child.index)
                
                assert child.called is not None
                call[CALLS] = child.called

                if child.index not in self.functions:
                    # NOTE: functions that were never called but were discovered by gprof's 
                    # static call graph analysis dont have a call graph entry so we need
                    # to add them here
                    missing = Function(child.index, child.name)
                    function[TIME] = 0.0
                    function.called = 0
                    profile.add_function(missing)

                function.add_call(call)

            profile.add_function(function)

            if entry.cycle is not None:
                try:
                    cycle = cycles[entry.cycle]
                except KeyError:
                    sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle) 
                    cycle = Cycle()
                    cycles[entry.cycle] = cycle
                cycle.add_function(function)

            profile[TIME] = profile[TIME] + function[TIME]

        for cycle in cycles.itervalues():
            profile.add_cycle(cycle)

        # Compute derived events
        profile.validate()
        profile.ratio(TIME_RATIO, TIME)
        profile.call_ratios(CALLS)
        profile.integrate(TOTAL_TIME, TIME)
        profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)

        return profile


[docs]class CallgrindParser(LineParser):
    """Parser for valgrind's callgrind tool.
    
    See also:
    - http://valgrind.org/docs/manual/cl-format.html
    """

    _call_re = re.compile('^calls=\s*(\d+)\s+((\d+|\+\d+|-\d+|\*)\s+)+$')

[docs]    def __init__(self, infile):
        LineParser.__init__(self, infile)

        # Textual positions
        self.position_ids = {}
        self.positions = {}

        # Numeric positions
        self.num_positions = 1
        self.cost_positions = ['line']
        self.last_positions = [0]

        # Events
        self.num_events = 0
        self.cost_events = []

        self.profile = Profile()
        self.profile[SAMPLES] = 0

[docs]    def parse(self):
        # read lookahead
        self.readline()

        self.parse_key('version')
        self.parse_key('creator')
        self.parse_part()

        # compute derived data
        self.profile.validate()
        self.profile.find_cycles()
        self.profile.ratio(TIME_RATIO, SAMPLES)
        self.profile.call_ratios(CALLS)
        self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)

        return self.profile

[docs]    def parse_part(self):
        while self.parse_header_line():
            pass
        while self.parse_body_line():
            pass
        if not self.eof() and False:
            sys.stderr.write('warning: line %u: unexpected line\n' % self.line_no)
            sys.stderr.write('%s\n' % self.lookahead())
        return True

[docs]    def parse_header_line(self):
        return \
            self.parse_empty() or \
            self.parse_comment() or \
            self.parse_part_detail() or \
            self.parse_description() or \
            self.parse_event_specification() or \
            self.parse_cost_line_def() or \
            self.parse_cost_summary()

    _detail_keys = set(('cmd', 'pid', 'thread', 'part'))

[docs]    def parse_part_detail(self):
        return self.parse_keys(self._detail_keys)

[docs]    def parse_description(self):
        return self.parse_key('desc') is not None

[docs]    def parse_event_specification(self):
        event = self.parse_key('event')
        if event is None:
            return False
        return True

[docs]    def parse_cost_line_def(self):
        pair = self.parse_keys(('events', 'positions'))
        if pair is None:
            return False
        key, value = pair
        items = value.split()
        if key == 'events':
            self.num_events = len(items)
            self.cost_events = items
        if key == 'positions':
            self.num_positions = len(items)
            self.cost_positions = items
            self.last_positions = [0]*self.num_positions
        return True

[docs]    def parse_cost_summary(self):
        pair = self.parse_keys(('summary', 'totals'))
        if pair is None:
            return False
        return True

[docs]    def parse_body_line(self):
        return \
            self.parse_empty() or \
            self.parse_comment() or \
            self.parse_cost_line() or \
            self.parse_position_spec() or \
            self.parse_association_spec()

    __subpos_re = r'(0x[0-9a-fA-F]+|\d+|\+\d+|-\d+|\*)'
    _cost_re = re.compile(r'^' + 
        __subpos_re + r'( +' + __subpos_re + r')*' +
        r'( +\d+)*' +
    '$')

[docs]    def parse_cost_line(self, calls=None):
        line = self.lookahead().rstrip()
        mo = self._cost_re.match(line)
        if not mo:
            return False

        function = self.get_function()

        values = line.split(' ')
        assert len(values) <= self.num_positions + self.num_events

        positions = values[0 : self.num_positions]
        events = values[self.num_positions : ]
        events += ['0']*(self.num_events - len(events))

        for i in range(self.num_positions):
            position = positions[i]
            if position == '*':
                position = self.last_positions[i]
            elif position[0] in '-+':
                position = self.last_positions[i] + int(position)
            elif position.startswith('0x'):
                position = int(position, 16)
            else:
                position = int(position)
            self.last_positions[i] = position

        events = map(float, events)

        if calls is None:
            function[SAMPLES] += events[0] 
            self.profile[SAMPLES] += events[0]
        else:
            callee = self.get_callee()
            callee.called += calls
    
            try:
                call = function.calls[callee.id]
            except KeyError:
                call = Call(callee.id)
                call[CALLS] = calls
                call[SAMPLES] = events[0]
                function.add_call(call)
            else:
                call[CALLS] += calls
                call[SAMPLES] += events[0]

        self.consume()
        return True

[docs]    def parse_association_spec(self):
        line = self.lookahead()
        if not line.startswith('calls='):
            return False

        _, values = line.split('=', 1)
        values = values.strip().split()
        calls = int(values[0])
        call_position = values[1:]
        self.consume()

        self.parse_cost_line(calls)

        return True

    _position_re = re.compile('^(?P<position>[cj]?(?:ob|fl|fi|fe|fn))=\s*(?:\((?P<id>\d+)\))?(?:\s*(?P<name>.+))?')

    _position_table_map = {
        'ob': 'ob',
        'fl': 'fl',
        'fi': 'fl',
        'fe': 'fl',
        'fn': 'fn',
        'cob': 'ob',
        'cfl': 'fl',
        'cfi': 'fl',
        'cfe': 'fl',
        'cfn': 'fn',
        'jfi': 'fl',
    }

    _position_map = {
        'ob': 'ob',
        'fl': 'fl',
        'fi': 'fl',
        'fe': 'fl',
        'fn': 'fn',
        'cob': 'cob',
        'cfl': 'cfl',
        'cfi': 'cfl',
        'cfe': 'cfl',
        'cfn': 'cfn',
        'jfi': 'jfi',
    }

[docs]    def parse_position_spec(self):
        line = self.lookahead()
        
        if line.startswith('jump=') or line.startswith('jcnd='):
            self.consume()
            return True

        mo = self._position_re.match(line)
        if not mo:
            return False

        position, id, name = mo.groups()
        if id:
            table = self._position_table_map[position]
            if name:
                self.position_ids[(table, id)] = name
            else:
                name = self.position_ids.get((table, id), '')
        self.positions[self._position_map[position]] = name

        self.consume()
        return True

[docs]    def parse_empty(self):
        if self.eof():
            return False
        line = self.lookahead()
        if line.strip():
            return False
        self.consume()
        return True

[docs]    def parse_comment(self):
        line = self.lookahead()
        if not line.startswith('#'):
            return False
        self.consume()
        return True

    _key_re = re.compile(r'^(\w+):')

[docs]    def parse_key(self, key):
        pair = self.parse_keys((key,))
        if not pair:
            return None
        key, value = pair
        return value

[docs]    def parse_keys(self, keys):
        line = self.lookahead()
        mo = self._key_re.match(line)
        if not mo:
            return None
        key, value = line.split(':', 1)
        if key not in keys:
            return None
        value = value.strip()
        self.consume()
        return key, value

[docs]    def make_function(self, module, filename, name):
        # FIXME: module and filename are not being tracked reliably
        #id = '|'.join((module, filename, name))
        id = name
        try:
            function = self.profile.functions[id]
        except KeyError:
            function = Function(id, name)
            function[SAMPLES] = 0
            function.called = 0
            self.profile.add_function(function)
        return function

[docs]    def get_function(self):
        module = self.positions.get('ob', '')
        filename = self.positions.get('fl', '') 
        function = self.positions.get('fn', '') 
        return self.make_function(module, filename, function)

[docs]    def get_callee(self):
        module = self.positions.get('cob', '')
        filename = self.positions.get('cfi', '') 
        function = self.positions.get('cfn', '') 
        return self.make_function(module, filename, function)


[docs]class OprofileParser(LineParser):
    """Parser for oprofile callgraph output.
    
    See also:
    - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph
    """

    _fields_re = {
        'samples': r'(\d+)',
        '%': r'(\S+)',
        'linenr info': r'(?P<source>\(no location information\)|\S+:\d+)',
        'image name': r'(?P<image>\S+(?:\s\(tgid:[^)]*\))?)',
        'app name': r'(?P<application>\S+)',
        'symbol name': r'(?P<symbol>\(no symbols\)|.+?)',
    }

[docs]    def __init__(self, infile):
        LineParser.__init__(self, infile)
        self.entries = {}
        self.entry_re = None

[docs]    def add_entry(self, callers, function, callees):
        try:
            entry = self.entries[function.id]
        except KeyError:
            self.entries[function.id] = (callers, function, callees)
        else:
            callers_total, function_total, callees_total = entry
            self.update_subentries_dict(callers_total, callers)
            function_total.samples += function.samples
            self.update_subentries_dict(callees_total, callees)
    
[docs]    def update_subentries_dict(self, totals, partials):
        for partial in partials.itervalues():
            try:
                total = totals[partial.id]
            except KeyError:
                totals[partial.id] = partial
            else:
                total.samples += partial.samples
        
[docs]    def parse(self):
        # read lookahead
        self.readline()

        self.parse_header()
        while self.lookahead():
            self.parse_entry()

        profile = Profile()

        reverse_call_samples = {}
        
        # populate the profile
        profile[SAMPLES] = 0
        for _callers, _function, _callees in self.entries.itervalues():
            function = Function(_function.id, _function.name)
            function[SAMPLES] = _function.samples
            profile.add_function(function)
            profile[SAMPLES] += _function.samples

            if _function.application:
                function.process = os.path.basename(_function.application)
            if _function.image:
                function.module = os.path.basename(_function.image)

            total_callee_samples = 0
            for _callee in _callees.itervalues():
                total_callee_samples += _callee.samples

            for _callee in _callees.itervalues():
                if not _callee.self:
                    call = Call(_callee.id)
                    call[SAMPLES2] = _callee.samples
                    function.add_call(call)
                
        # compute derived data
        profile.validate()
        profile.find_cycles()
        profile.ratio(TIME_RATIO, SAMPLES)
        profile.call_ratios(SAMPLES2)
        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)

        return profile

[docs]    def parse_header(self):
        while not self.match_header():
            self.consume()
        line = self.lookahead()
        fields = re.split(r'\s\s+', line)
        entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P<self>\s+\[self\])?$'
        self.entry_re = re.compile(entry_re)
        self.skip_separator()

[docs]    def parse_entry(self):
        callers = self.parse_subentries()
        if self.match_primary():
            function = self.parse_subentry()
            if function is not None:
                callees = self.parse_subentries()
                self.add_entry(callers, function, callees)
        self.skip_separator()

[docs]    def parse_subentries(self):
        subentries = {}
        while self.match_secondary():
            subentry = self.parse_subentry()
            subentries[subentry.id] = subentry
        return subentries

[docs]    def parse_subentry(self):
        entry = Struct()
        line = self.consume()
        mo = self.entry_re.match(line)
        if not mo:
            raise ParseError('failed to parse', line)
        fields = mo.groupdict()
        entry.samples = int(mo.group(1))
        if 'source' in fields and fields['source'] != '(no location information)':
            source = fields['source']
            filename, lineno = source.split(':')
            entry.filename = filename
            entry.lineno = int(lineno)
        else:
            source = ''
            entry.filename = None
            entry.lineno = None
        entry.image = fields.get('image', '')
        entry.application = fields.get('application', '')
        if 'symbol' in fields and fields['symbol'] != '(no symbols)':
            entry.symbol = fields['symbol']
        else:
            entry.symbol = ''
        if entry.symbol.startswith('"') and entry.symbol.endswith('"'):
            entry.symbol = entry.symbol[1:-1]
        entry.id = ':'.join((entry.application, entry.image, source, entry.symbol))
        entry.self = fields.get('self', None) != None
        if entry.self:
            entry.id += ':self'
        if entry.symbol:
            entry.name = entry.symbol
        else:
            entry.name = entry.image
        return entry

[docs]    def skip_separator(self):
        while not self.match_separator():
            self.consume()
        self.consume()

[docs]    def match_header(self):
        line = self.lookahead()
        return line.startswith('samples')

[docs]    def match_separator(self):
        line = self.lookahead()
        return line == '-'*len(line)

[docs]    def match_primary(self):
        line = self.lookahead()
        return not line[:1].isspace()
    
[docs]    def match_secondary(self):
        line = self.lookahead()
        return line[:1].isspace()


[docs]class HProfParser(LineParser):
    """Parser for java hprof output
    
    See also:
    - http://java.sun.com/developer/technicalArticles/Programming/HPROF.html
    """

    trace_re = re.compile(r'\t(.*)\((.*):(.*)\)')
    trace_id_re = re.compile(r'^TRACE (\d+):$')

[docs]    def __init__(self, infile):
        LineParser.__init__(self, infile)
        self.traces = {}
        self.samples = {}

[docs]    def parse(self):
        # read lookahead
        self.readline()

        while not self.lookahead().startswith('------'): self.consume()
        while not self.lookahead().startswith('TRACE '): self.consume()

        self.parse_traces()

        while not self.lookahead().startswith('CPU'):
            self.consume()

        self.parse_samples()

        # populate the profile
        profile = Profile()
        profile[SAMPLES] = 0

        functions = {}

        # build up callgraph
        for id, trace in self.traces.iteritems():
            if not id in self.samples: continue
            mtime = self.samples[id][0]
            last = None

            for func, file, line in trace:
                if not func in functions:
                    function = Function(func, func)
                    function[SAMPLES] = 0
                    profile.add_function(function)
                    functions[func] = function

                function = functions[func]
                # allocate time to the deepest method in the trace
                if not last:
                    function[SAMPLES] += mtime
                    profile[SAMPLES] += mtime
                else:
                    c = function.get_call(last)
                    c[SAMPLES2] += mtime

                last = func

        # compute derived data
        profile.validate()
        profile.find_cycles()
        profile.ratio(TIME_RATIO, SAMPLES)
        profile.call_ratios(SAMPLES2)
        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)

        return profile

[docs]    def parse_traces(self):
        while self.lookahead().startswith('TRACE '):
            self.parse_trace()

[docs]    def parse_trace(self):
        l = self.consume()
        mo = self.trace_id_re.match(l)
        tid = mo.group(1)
        last = None
        trace = []

        while self.lookahead().startswith('\t'):
            l = self.consume()
            match = self.trace_re.search(l)
            if not match:
                #sys.stderr.write('Invalid line: %s\n' % l)
                break
            else:
                function_name, file, line = match.groups()
                trace += [(function_name, file, line)]

        self.traces[int(tid)] = trace

[docs]    def parse_samples(self):
        self.consume()
        self.consume()

        while not self.lookahead().startswith('CPU'):
            rank, percent_self, percent_accum, count, traceid, method = self.lookahead().split()
            self.samples[int(traceid)] = (int(count), method)
            self.consume()


[docs]class SysprofParser(XmlParser):

[docs]    def __init__(self, stream):
        XmlParser.__init__(self, stream)

[docs]    def parse(self):
        objects = {}
        nodes = {}

        self.element_start('profile')
        while self.token.type == XML_ELEMENT_START:
            if self.token.name_or_data == 'objects':
                assert not objects
                objects = self.parse_items('objects')
            elif self.token.name_or_data == 'nodes':
                assert not nodes
                nodes = self.parse_items('nodes')
            else:
                self.parse_value(self.token.name_or_data)
        self.element_end('profile')

        return self.build_profile(objects, nodes)

[docs]    def parse_items(self, name):
        assert name[-1] == 's'
        items = {}
        self.element_start(name)
        while self.token.type == XML_ELEMENT_START:
            id, values = self.parse_item(name[:-1])
            assert id not in items
            items[id] = values
        self.element_end(name)
        return items

[docs]    def parse_item(self, name):
        attrs = self.element_start(name)
        id = int(attrs['id'])
        values = self.parse_values()
        self.element_end(name)
        return id, values

[docs]    def parse_values(self):
        values = {}
        while self.token.type == XML_ELEMENT_START:
            name = self.token.name_or_data
            value = self.parse_value(name)
            assert name not in values
            values[name] = value
        return values

[docs]    def parse_value(self, tag):
        self.element_start(tag)
        value = self.character_data()
        self.element_end(tag)
        if value.isdigit():
            return int(value)
        if value.startswith('"') and value.endswith('"'):
            return value[1:-1]
        return value

[docs]    def build_profile(self, objects, nodes):
        profile = Profile()
        
        profile[SAMPLES] = 0
        for id, object in objects.iteritems():
            # Ignore fake objects (process names, modules, "Everything", "kernel", etc.)
            if object['self'] == 0:
                continue

            function = Function(id, object['name'])
            function[SAMPLES] = object['self']
            profile.add_function(function)
            profile[SAMPLES] += function[SAMPLES]

        for id, node in nodes.iteritems():
            # Ignore fake calls
            if node['self'] == 0:
                continue

            # Find a non-ignored parent
            parent_id = node['parent']
            while parent_id != 0:
                parent = nodes[parent_id]
                caller_id = parent['object']
                if objects[caller_id]['self'] != 0:
                    break
                parent_id = parent['parent']
            if parent_id == 0:
                continue

            callee_id = node['object']

            assert objects[caller_id]['self']
            assert objects[callee_id]['self']

            function = profile.functions[caller_id]

            samples = node['self']
            try:
                call = function.calls[callee_id]
            except KeyError:
                call = Call(callee_id)
                call[SAMPLES2] = samples
                function.add_call(call)
            else:
                call[SAMPLES2] += samples

        # Compute derived events
        profile.validate()
        profile.find_cycles()
        profile.ratio(TIME_RATIO, SAMPLES)
        profile.call_ratios(SAMPLES2)
        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)

        return profile


[docs]class SharkParser(LineParser):
    """Parser for MacOSX Shark output.

    Author: tom@dbservice.com
    """

[docs]    def __init__(self, infile):
        LineParser.__init__(self, infile)
        self.stack = []
        self.entries = {}

[docs]    def add_entry(self, function):
        try:
            entry = self.entries[function.id]
        except KeyError:
            self.entries[function.id] = (function, { })
        else:
            function_total, callees_total = entry
            function_total.samples += function.samples
    
[docs]    def add_callee(self, function, callee):
        func, callees = self.entries[function.id]
        try:
            entry = callees[callee.id]
        except KeyError:
            callees[callee.id] = callee
        else:
            entry.samples += callee.samples
        
[docs]    def parse(self):
        self.readline()
        self.readline()
        self.readline()
        self.readline()

        match = re.compile(r'(?P<prefix>[|+ ]*)(?P<samples>\d+), (?P<symbol>[^,]+), (?P<image>.*)')

        while self.lookahead():
            line = self.consume()
            mo = match.match(line)
            if not mo:
                raise ParseError('failed to parse', line)

            fields = mo.groupdict()
            prefix = len(fields.get('prefix', 0)) / 2 - 1

            symbol = str(fields.get('symbol', 0))
            image = str(fields.get('image', 0))

            entry = Struct()
            entry.id = ':'.join([symbol, image])
            entry.samples = int(fields.get('samples', 0))

            entry.name = symbol
            entry.image = image

            # adjust the callstack
            if prefix < len(self.stack):
                del self.stack[prefix:]

            if prefix == len(self.stack):
                self.stack.append(entry)

            # if the callstack has had an entry, it's this functions caller
            if prefix > 0:
                self.add_callee(self.stack[prefix - 1], entry)
                
            self.add_entry(entry)
                
        profile = Profile()
        profile[SAMPLES] = 0
        for _function, _callees in self.entries.itervalues():
            function = Function(_function.id, _function.name)
            function[SAMPLES] = _function.samples
            profile.add_function(function)
            profile[SAMPLES] += _function.samples

            if _function.image:
                function.module = os.path.basename(_function.image)

            for _callee in _callees.itervalues():
                call = Call(_callee.id)
                call[SAMPLES] = _callee.samples
                function.add_call(call)
                
        # compute derived data
        profile.validate()
        profile.find_cycles()
        profile.ratio(TIME_RATIO, SAMPLES)
        profile.call_ratios(SAMPLES)
        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)

        return profile


[docs]class XPerfParser(Parser):
    """Parser for CSVs generted by XPerf, from Microsoft Windows Performance Tools.
    """

[docs]    def __init__(self, stream):
        Parser.__init__(self)
        self.stream = stream
        self.profile = Profile()
        self.profile[SAMPLES] = 0
        self.column = {}

[docs]    def parse(self):
        import csv
        reader = csv.reader(
            self.stream, 
            delimiter = ',',
            quotechar = None,
            escapechar = None,
            doublequote = False,
            skipinitialspace = True,
            lineterminator = '\r\n',
            quoting = csv.QUOTE_NONE)
        it = iter(reader)
        row = reader.next()
        self.parse_header(row)
        for row in it:
            self.parse_row(row)
                
        # compute derived data
        self.profile.validate()
        self.profile.find_cycles()
        self.profile.ratio(TIME_RATIO, SAMPLES)
        self.profile.call_ratios(SAMPLES2)
        self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)

        return self.profile

[docs]    def parse_header(self, row):
        for column in range(len(row)):
            name = row[column]
            assert name not in self.column
            self.column[name] = column

[docs]    def parse_row(self, row):
        fields = {}
        for name, column in self.column.iteritems():
            value = row[column]
            for factory in int, float:
                try:
                    value = factory(value)
                except ValueError:
                    pass
                else:
                    break
            fields[name] = value
        
        process = fields['Process Name']
        symbol = fields['Module'] + '!' + fields['Function']
        weight = fields['Weight']
        count = fields['Count']

        function = self.get_function(process, symbol)
        function[SAMPLES] += weight * count
        self.profile[SAMPLES] += weight * count

        stack = fields['Stack']
        if stack != '?':
            stack = stack.split('/')
            assert stack[0] == '[Root]'
            if stack[-1] != symbol:
                # XXX: some cases the sampled function does not appear in the stack
                stack.append(symbol)
            caller = None
            for symbol in stack[1:]:
                callee = self.get_function(process, symbol)
                if caller is not None:
                    try:
                        call = caller.calls[callee.id]
                    except KeyError:
                        call = Call(callee.id)
                        call[SAMPLES2] = count
                        caller.add_call(call)
                    else:
                        call[SAMPLES2] += count
                caller = callee

[docs]    def get_function(self, process, symbol):
        function_id = process + '!' + symbol

        try:
            function = self.profile.functions[function_id]
        except KeyError:
            module, name = symbol.split('!', 1)
            function = Function(function_id, name)
            function.process = process
            function.module = module
            function[SAMPLES] = 0
            self.profile.add_function(function)

        return function


[docs]class SleepyParser(Parser):
    """Parser for GNU gprof output.

    See also:
    - http://www.codersnotes.com/sleepy/
    - http://sleepygraph.sourceforge.net/
    """

[docs]    def __init__(self, filename):
        Parser.__init__(self)

        from zipfile import ZipFile

        self.database = ZipFile(filename)

        self.symbols = {}
        self.calls = {}

        self.profile = Profile()
    
    _symbol_re = re.compile(
        r'^(?P<id>\w+)' + 
        r'\s+"(?P<module>[^"]*)"' + 
        r'\s+"(?P<procname>[^"]*)"' + 
        r'\s+"(?P<sourcefile>[^"]*)"' + 
        r'\s+(?P<sourceline>\d+)$'
    )

[docs]    def parse_symbols(self):
        lines = self.database.read('symbols.txt').splitlines()
        for line in lines:
            mo = self._symbol_re.match(line)
            if mo:
                symbol_id, module, procname, sourcefile, sourceline = mo.groups()
    
                function_id = ':'.join([module, procname])

                try:
                    function = self.profile.functions[function_id]
                except KeyError:
                    function = Function(function_id, procname)
                    function.module = module
                    function[SAMPLES] = 0
                    self.profile.add_function(function)

                self.symbols[symbol_id] = function

[docs]    def parse_callstacks(self):
        lines = self.database.read("callstacks.txt").splitlines()
        for line in lines:
            fields = line.split()
            samples = int(fields[0])
            callstack = fields[1:]

            callstack = [self.symbols[symbol_id] for symbol_id in callstack]

            callee = callstack[0]

            callee[SAMPLES] += samples
            self.profile[SAMPLES] += samples
            
            for caller in callstack[1:]:
                try:
                    call = caller.calls[callee.id]
                except KeyError:
                    call = Call(callee.id)
                    call[SAMPLES2] = samples
                    caller.add_call(call)
                else:
                    call[SAMPLES2] += samples

                callee = caller

[docs]    def parse(self):
        profile = self.profile
        profile[SAMPLES] = 0

        self.parse_symbols()
        self.parse_callstacks()

        # Compute derived events
        profile.validate()
        profile.find_cycles()
        profile.ratio(TIME_RATIO, SAMPLES)
        profile.call_ratios(SAMPLES2)
        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)

        return profile


[docs]class AQtimeTable:

[docs]    def __init__(self, name, fields):
        self.name = name

        self.fields = fields
        self.field_column = {}
        for column in range(len(fields)):
            self.field_column[fields[column]] = column
        self.rows = []

[docs]    def __len__(self):
        return len(self.rows)

[docs]    def __iter__(self):
        for values, children in self.rows:
            fields = {}
            for name, value in zip(self.fields, values):
                fields[name] = value
            children = dict([(child.name, child) for child in children])
            yield fields, children
        raise StopIteration

[docs]    def add_row(self, values, children=()):
        self.rows.append((values, children))


[docs]class AQtimeParser(XmlParser):

[docs]    def __init__(self, stream):
        XmlParser.__init__(self, stream)
        self.tables = {}

[docs]    def parse(self):
        self.element_start('AQtime_Results')
        self.parse_headers()
        results = self.parse_results()
        self.element_end('AQtime_Results')
        return self.build_profile(results) 

[docs]    def parse_headers(self):
        self.element_start('HEADERS')
        while self.token.type == XML_ELEMENT_START:
            self.parse_table_header()
        self.element_end('HEADERS')

[docs]    def parse_table_header(self):
        attrs = self.element_start('TABLE_HEADER')
        name = attrs['NAME']
        id = int(attrs['ID'])
        field_types = []
        field_names = []
        while self.token.type == XML_ELEMENT_START:
            field_type, field_name = self.parse_table_field()
            field_types.append(field_type)
            field_names.append(field_name)
        self.element_end('TABLE_HEADER')
        self.tables[id] = name, field_types, field_names

[docs]    def parse_table_field(self):
        attrs = self.element_start('TABLE_FIELD')
        type = attrs['TYPE']
        name = self.character_data()
        self.element_end('TABLE_FIELD')
        return type, name

[docs]    def parse_results(self):
        self.element_start('RESULTS')
        table = self.parse_data()
        self.element_end('RESULTS')
        return table

[docs]    def parse_data(self):
        rows = []
        attrs = self.element_start('DATA')
        table_id = int(attrs['TABLE_ID'])
        table_name, field_types, field_names = self.tables[table_id]
        table = AQtimeTable(table_name, field_names)
        while self.token.type == XML_ELEMENT_START:
            row, children = self.parse_row(field_types)
            table.add_row(row, children)
        self.element_end('DATA')
        return table

[docs]    def parse_row(self, field_types):
        row = [None]*len(field_types)
        children = []
        self.element_start('ROW')
        while self.token.type == XML_ELEMENT_START:
            if self.token.name_or_data == 'FIELD':
                field_id, field_value = self.parse_field(field_types)
                row[field_id] = field_value
            elif self.token.name_or_data == 'CHILDREN':
                children = self.parse_children()
            else:
                raise XmlTokenMismatch("<FIELD ...> or <CHILDREN ...>", self.token)
        self.element_end('ROW')
        return row, children

[docs]    def parse_field(self, field_types):
        attrs = self.element_start('FIELD')
        id = int(attrs['ID'])
        type = field_types[id]
        value = self.character_data()
        if type == 'Integer':
            value = int(value)
        elif type == 'Float':
            value = float(value)
        elif type == 'Address':
            value = int(value)
        elif type == 'String':
            pass
        else:
            assert False
        self.element_end('FIELD')
        return id, value

[docs]    def parse_children(self):
        children = []
        self.element_start('CHILDREN')
        while self.token.type == XML_ELEMENT_START:
            table = self.parse_data()
            assert table.name not in children
            children.append(table)
        self.element_end('CHILDREN')
        return children

[docs]    def build_profile(self, results):
        assert results.name == 'Routines'
        profile = Profile()
        profile[TIME] = 0.0
        for fields, tables in results:
            function = self.build_function(fields)
            children = tables['Children']
            for fields, _ in children:
                call = self.build_call(fields)
                function.add_call(call)
            profile.add_function(function)
            profile[TIME] = profile[TIME] + function[TIME]
        profile[TOTAL_TIME] = profile[TIME]
        profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
        return profile
    
[docs]    def build_function(self, fields):
        function = Function(self.build_id(fields), self.build_name(fields))
        function[TIME] = fields['Time']
        function[TOTAL_TIME] = fields['Time with Children']
        #function[TIME_RATIO] = fields['% Time']/100.0
        #function[TOTAL_TIME_RATIO] = fields['% with Children']/100.0
        return function

[docs]    def build_call(self, fields):
        call = Call(self.build_id(fields))
        call[TIME] = fields['Time']
        call[TOTAL_TIME] = fields['Time with Children']
        #call[TIME_RATIO] = fields['% Time']/100.0
        #call[TOTAL_TIME_RATIO] = fields['% with Children']/100.0
        return call

[docs]    def build_id(self, fields):
        return ':'.join([fields['Module Name'], fields['Unit Name'], fields['Routine Name']])

[docs]    def build_name(self, fields):
        # TODO: use more fields
        return fields['Routine Name']


[docs]class PstatsParser:
    """Parse Python profiling statistics saved with te pstats module."""

[docs]    def __init__(self, *filename):
        import pstats
        try:
            self.stats = pstats.Stats(*filename)
        except ValueError:
            import hotshot.stats
            self.stats = hotshot.stats.load(filename[0])
        self.profile = Profile()
        self.function_ids = {}

[docs]    def get_function_name(self, (filename, line, name)):
        module = os.path.splitext(filename)[0]
        module = os.path.basename(module)
        return "%s:%d:%s" % (module, line, name)

[docs]    def get_function(self, key):
        try:
            id = self.function_ids[key]
        except KeyError:
            id = len(self.function_ids)
            name = self.get_function_name(key)
            path = key[0]
            function = Function(id, name, path)
            self.profile.functions[id] = function
            self.function_ids[key] = id
        else:
            function = self.profile.functions[id]
        return function

[docs]    def parse(self):
        self.profile[TIME] = 0.0
        self.profile[TOTAL_TIME] = self.stats.total_tt
        for fn, (cc, nc, tt, ct, callers) in self.stats.stats.iteritems():
            callee = self.get_function(fn)
            callee.called = nc
            callee[TOTAL_TIME] = ct
            callee[TIME] = tt
            self.profile[TIME] += tt
            self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct)
            for fn, value in callers.iteritems():
                caller = self.get_function(fn)
                call = Call(callee.id)
                if isinstance(value, tuple):
                    for i in xrange(0, len(value), 4):
                        nc, cc, tt, ct = value[i:i+4]
                        if CALLS in call:
                            call[CALLS] += cc
                        else:
                            call[CALLS] = cc

                        if TOTAL_TIME in call:
                            call[TOTAL_TIME] += ct
                        else:
                            call[TOTAL_TIME] = ct

                else:
                    call[CALLS] = value
                    call[TOTAL_TIME] = ratio(value, nc)*ct

                caller.add_call(call)
        #self.stats.print_stats()
        #self.stats.print_callees()

        # Compute derived events
        self.profile.validate()
        self.profile.ratio(TIME_RATIO, TIME)
        self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)

        return self.profile


[docs]class Theme:

[docs]    def __init__(self, 
            bgcolor = (0.0, 0.0, 1.0),
            mincolor = (0.0, 0.0, 0.0),
            maxcolor = (0.0, 0.0, 1.0),
            fontname = "Arial",
            minfontsize = 10.0,
            maxfontsize = 10.0,
            minpenwidth = 0.5,
            maxpenwidth = 4.0,
            gamma = 2.2,
            skew = 1.0):
        self.bgcolor = bgcolor
        self.mincolor = mincolor
        self.maxcolor = maxcolor
        self.fontname = fontname
        self.minfontsize = minfontsize
        self.maxfontsize = maxfontsize
        self.minpenwidth = minpenwidth
        self.maxpenwidth = maxpenwidth
        self.gamma = gamma
        self.skew = skew

[docs]    def graph_bgcolor(self):
        return self.hsl_to_rgb(*self.bgcolor)

[docs]    def graph_fontname(self):
        return self.fontname

[docs]    def graph_fontsize(self):
        return self.minfontsize

[docs]    def node_bgcolor(self, weight):
        return self.color(weight)

[docs]    def node_fgcolor(self, weight):
        return self.graph_bgcolor()

[docs]    def node_fontsize(self, weight):
        return self.fontsize(weight)

[docs]    def edge_color(self, weight):
        return self.color(weight)

[docs]    def edge_fontsize(self, weight):
        return self.fontsize(weight)

[docs]    def edge_penwidth(self, weight):
        return max(weight*self.maxpenwidth, self.minpenwidth)

[docs]    def edge_arrowsize(self, weight):
        return 0.5 * math.sqrt(self.edge_penwidth(weight))

[docs]    def fontsize(self, weight):
        return max(weight**2 * self.maxfontsize, self.minfontsize)

[docs]    def color(self, weight):
        weight = min(max(weight, 0.0), 1.0)
    
        hmin, smin, lmin = self.mincolor
        hmax, smax, lmax = self.maxcolor
        
        if self.skew < 0:
            raise ValueError("Skew must be greater than 0")
        elif self.skew == 1.0:
            h = hmin + weight*(hmax - hmin)
            s = smin + weight*(smax - smin)
            l = lmin + weight*(lmax - lmin)
        else:
            base = self.skew
            h = hmin + ((hmax-hmin)*(-1.0 + (base ** weight)) / (base - 1.0))
            s = smin + ((smax-smin)*(-1.0 + (base ** weight)) / (base - 1.0))
            l = lmin + ((lmax-lmin)*(-1.0 + (base ** weight)) / (base - 1.0))

        return self.hsl_to_rgb(h, s, l)

[docs]    def hsl_to_rgb(self, h, s, l):
        """Convert a color from HSL color-model to RGB.

        See also:
        - http://www.w3.org/TR/css3-color/#hsl-color
        """

        h = h % 1.0
        s = min(max(s, 0.0), 1.0)
        l = min(max(l, 0.0), 1.0)

        if l <= 0.5:
            m2 = l*(s + 1.0)
        else:
            m2 = l + s - l*s
        m1 = l*2.0 - m2
        r = self._hue_to_rgb(m1, m2, h + 1.0/3.0)
        g = self._hue_to_rgb(m1, m2, h)
        b = self._hue_to_rgb(m1, m2, h - 1.0/3.0)

        # Apply gamma correction
        r **= self.gamma
        g **= self.gamma
        b **= self.gamma

        return (r, g, b)

[docs]    def _hue_to_rgb(self, m1, m2, h):
        if h < 0.0:
            h += 1.0
        elif h > 1.0:
            h -= 1.0
        if h*6 < 1.0:
            return m1 + (m2 - m1)*h*6.0
        elif h*2 < 1.0:
            return m2
        elif h*3 < 2.0:
            return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0
        else:
            return m1


TEMPERATURE_COLORMAP = Theme(
    mincolor = (2.0/3.0, 0.80, 0.25), # dark blue
    maxcolor = (0.0, 1.0, 0.5), # satured red
    gamma = 1.0
)

PINK_COLORMAP = Theme(
    mincolor = (0.0, 1.0, 0.90), # pink
    maxcolor = (0.0, 1.0, 0.5), # satured red
)

GRAY_COLORMAP = Theme(
    mincolor = (0.0, 0.0, 0.85), # light gray
    maxcolor = (0.0, 0.0, 0.0), # black
)

BW_COLORMAP = Theme(
    minfontsize = 8.0,
    maxfontsize = 24.0,
    mincolor = (0.0, 0.0, 0.0), # black
    maxcolor = (0.0, 0.0, 0.0), # black
    minpenwidth = 0.1,
    maxpenwidth = 8.0,
)


[docs]class DotWriter:
    """Writer for the DOT language.

    .. seealso::

        - "The DOT Language" specification
          http://www.graphviz.org/doc/info/lang.html
    """

[docs]    def __init__(self, fp):
        self.fp = fp

[docs]    def graph(self, profile, theme):
        self.begin_graph()

        fontname = theme.graph_fontname()

        self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125)
        self.attr('node', fontname=fontname, shape="box", style="filled", fontcolor="white", width=0, height=0)
        self.attr('edge', fontname=fontname)

        for function in profile.functions.itervalues():
            labels = []
            if function.process is not None:
                labels.append(function.process)
            if function.module is not None:
                labels.append(function.module)
            labels.append(function.name)
            for event in TOTAL_TIME_RATIO, TIME_RATIO:
                if event in function.events:
                    label = event.format(function[event])
                    labels.append(label)
            if function.called is not None:
                labels.append(u"%u\xd7" % (function.called,))

            if function.weight is not None:
                weight = function.weight
            else:
                weight = 0.0

            label = '\n'.join(labels)
            self.node(function.id, 
                label = label, 
                color = self.color(theme.node_bgcolor(weight)), 
                fontcolor = self.color(theme.node_fgcolor(weight)), 
                fontsize = "%.2f" % theme.node_fontsize(weight),
            )

            for call in function.calls.itervalues():
                callee = profile.functions[call.callee_id]

                labels = []
                for event in TOTAL_TIME_RATIO, CALLS:
                    if event in call.events:
                        label = event.format(call[event])
                        labels.append(label)

                if call.weight is not None:
                    weight = call.weight
                elif callee.weight is not None:
                    weight = callee.weight
                else:
                    weight = 0.0

                label = '\n'.join(labels)

                self.edge(function.id, call.callee_id, 
                    label = label, 
                    color = self.color(theme.edge_color(weight)), 
                    fontcolor = self.color(theme.edge_color(weight)),
                    fontsize = "%.2f" % theme.edge_fontsize(weight), 
                    penwidth = "%.2f" % theme.edge_penwidth(weight), 
                    labeldistance = "%.2f" % theme.edge_penwidth(weight), 
                    arrowsize = "%.2f" % theme.edge_arrowsize(weight),
                )

        self.end_graph()

[docs]    def begin_graph(self):
        self.write('digraph {\n')

[docs]    def end_graph(self):
        self.write('}\n')

[docs]    def attr(self, what, **attrs):
        self.write("\t")
        self.write(what)
        self.attr_list(attrs)
        self.write(";\n")

[docs]    def node(self, node, **attrs):
        self.write("\t")
        self.id(node)
        self.attr_list(attrs)
        self.write(";\n")

[docs]    def edge(self, src, dst, **attrs):
        self.write("\t")
        self.id(src)
        self.write(" -> ")
        self.id(dst)
        self.attr_list(attrs)
        self.write(";\n")

[docs]    def attr_list(self, attrs):
        if not attrs:
            return
        self.write(' [')
        first = True
        for name, value in attrs.iteritems():
            if first:
                first = False
            else:
                self.write(", ")
            self.id(name)
            self.write('=')
            self.id(value)
        self.write(']')

[docs]    def id(self, id):
        if isinstance(id, (int, float)):
            s = str(id)
        elif isinstance(id, basestring):
            if id.isalnum() and not id.startswith('0x'):
                s = id
            else:
                s = self.escape(id)
        else:
            raise TypeError
        self.write(s)

[docs]    def color(self, (r, g, b)):

        def float2int(f):
            if f <= 0.0:
                return 0
            if f >= 1.0:
                return 255
            return int(255.0*f + 0.5)

        return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)])

[docs]    def escape(self, s):
        s = s.encode('utf-8')
        s = s.replace('\\', r'\\')
        s = s.replace('\n', r'\n')
        s = s.replace('\t', r'\t')
        s = s.replace('"', r'\"')
        return '"' + s + '"'

[docs]    def write(self, s):
        self.fp.write(s)


[docs]class Main:
    """Main program."""

    themes = {
            "color": TEMPERATURE_COLORMAP,
            "pink": PINK_COLORMAP,
            "gray": GRAY_COLORMAP,
            "bw": BW_COLORMAP,
    }

[docs]    def main(self):
        """Main program."""

        parser = optparse.OptionParser(
            usage="\n\t%prog [options] [file] ...",
            version="%%prog %s" % __version__)
        parser.add_option(
            '-o', '--output', metavar='FILE',
            type="string", dest="output",
            help="output filename [stdout]")
        parser.add_option(
            '-n', '--node-thres', metavar='PERCENTAGE',
            type="float", dest="node_thres", default=0.5,
            help="eliminate nodes below this threshold [default: %default]")
        parser.add_option(
            '-e', '--edge-thres', metavar='PERCENTAGE',
            type="float", dest="edge_thres", default=0.1,
            help="eliminate edges below this threshold [default: %default]")
        parser.add_option(
            '-f', '--format',
            type="choice", choices=('prof', 'callgrind', 'oprofile', 'hprof', 'sysprof', 'pstats', 'shark', 'sleepy', 'aqtime', 'xperf'),
            dest="format", default="prof",
            help="profile format: prof, callgrind, oprofile, hprof, sysprof, shark, sleepy, aqtime, pstats, or xperf [default: %default]")
        parser.add_option(
            '-c', '--colormap',
            type="choice", choices=('color', 'pink', 'gray', 'bw'),
            dest="theme", default="color",
            help="color map: color, pink, gray, or bw [default: %default]")
        parser.add_option(
            '-s', '--strip',
            action="store_true",
            dest="strip", default=False,
            help="strip function parameters, template parameters, and const modifiers from demangled C++ function names")
        parser.add_option(
            '-w', '--wrap',
            action="store_true",
            dest="wrap", default=False,
            help="wrap function names")
        parser.add_option(
            '-r', '--restrict',
            type="string", dest="restrict_string", default="",
            help="eliminate functions that do not contain this string in their pathname [default: None]")
        # add a new option to control skew of the colorization curve
        parser.add_option(
            '--skew',
            type="float", dest="theme_skew", default=1.0,
            help="skew the colorization curve.  Values < 1.0 give more variety to lower percentages.  Value > 1.0 give less variety to lower percentages")
        (self.options, self.args) = parser.parse_args(sys.argv[1:])

        if len(self.args) > 1 and self.options.format != 'pstats':
            parser.error('incorrect number of arguments')

        try:
            self.theme = self.themes[self.options.theme]
        except KeyError:
            parser.error('invalid colormap \'%s\'' % self.options.theme)
        
        # set skew on the theme now that it has been picked.
        if self.options.theme_skew:
            self.theme.skew = self.options.theme_skew

        if self.options.format == 'prof':
            if not self.args:
                fp = sys.stdin
            else:
                fp = open(self.args[0], 'rt')
            parser = GprofParser(fp)
        elif self.options.format == 'callgrind':
            if not self.args:
                fp = sys.stdin
            else:
                fp = open(self.args[0], 'rt')
            parser = CallgrindParser(fp)
        elif self.options.format == 'oprofile':
            if not self.args:
                fp = sys.stdin
            else:
                fp = open(self.args[0], 'rt')
            parser = OprofileParser(fp)
        elif self.options.format == 'sysprof':
            if not self.args:
                fp = sys.stdin
            else:
                fp = open(self.args[0], 'rt')
            parser = SysprofParser(fp)
        elif self.options.format == 'hprof':
            if not self.args:
                fp = sys.stdin
            else:
                fp = open(self.args[0], 'rt')
            parser = HProfParser(fp)        
        elif self.options.format == 'pstats':
            if not self.args:
                parser.error('at least a file must be specified for pstats input')
            parser = PstatsParser(*self.args)
        elif self.options.format == 'xperf':
            if not self.args:
                fp = sys.stdin
            else:
                fp = open(self.args[0], 'rt')
            parser = XPerfParser(fp)
        elif self.options.format == 'shark':
            if not self.args:
                fp = sys.stdin
            else:
                fp = open(self.args[0], 'rt')
            parser = SharkParser(fp)
        elif self.options.format == 'sleepy':
            if len(self.args) != 1:
                parser.error('exactly one file must be specified for sleepy input')
            parser = SleepyParser(self.args[0])
        elif self.options.format == 'aqtime':
            if not self.args:
                fp = sys.stdin
            else:
                fp = open(self.args[0], 'rt')
            parser = AQtimeParser(fp)
        else:
            parser.error('invalid format \'%s\'' % self.options.format)

        self.profile = parser.parse()
        
        if self.options.output is None:
            self.output = sys.stdout
        else:
            self.output = open(self.options.output, 'wt')

        self.write_graph()

    _parenthesis_re = re.compile(r'\([^()]*\)')
    _angles_re = re.compile(r'<[^<>]*>')
    _const_re = re.compile(r'\s+const$')

[docs]    def strip_function_name(self, name):
        """Remove extraneous information from C++ demangled function names."""

        # Strip function parameters from name by recursively removing paired parenthesis
        while True:
            name, n = self._parenthesis_re.subn('', name)
            if not n:
                break

        # Strip const qualifier
        name = self._const_re.sub('', name)

        # Strip template parameters from name by recursively removing paired angles
        while True:
            name, n = self._angles_re.subn('', name)
            if not n:
                break

        return name

[docs]    def wrap_function_name(self, name):
        """Split the function name on multiple lines."""

        if len(name) > 32:
            ratio = 2.0/3.0
            height = max(int(len(name)/(1.0 - ratio) + 0.5), 1)
            width = max(len(name)/height, 32)
            # TODO: break lines in symbols
            name = textwrap.fill(name, width, break_long_words=False)

        # Take away spaces
        name = name.replace(", ", ",")
        name = name.replace("> >", ">>")
        name = name.replace("> >", ">>") # catch consecutive

        return name

[docs]    def compress_function_name(self, name):
        """Compress function name according to the user preferences."""

        if self.options.strip:
            name = self.strip_function_name(name)

        if self.options.wrap:
            name = self.wrap_function_name(name)

        # TODO: merge functions with same resulting name

        return name

[docs]    def write_graph(self):
        dot = DotWriter(self.output)
        profile = self.profile
        profile.prune(self.options.node_thres/100.0, self.options.edge_thres/100.0, self.options.restrict_string)

        for function in profile.functions.itervalues():
            function.name = self.compress_function_name(function.name)

        dot.graph(profile, self.theme)


if __name__ == '__main__':
    Main().main()