Source code for pySPACE.run.launch

#!/usr/bin/env python
""" Main program to run pySPACE

For further instructions take a look at the pySPACE documentation and the tutorials 
in there!

.. note::
    Due to errors in configuration files, data or the software, the software may
    crash. Because of internal parallelization and threading, it is currently
    not possible to use ''ctrl + c''. So you should kill the processes manually
    e.g.::

        ctrl + z
        kill -9 %1
        fg

**Profiling**

For profiling the software you should use the option ``--profile``,
when running pySPACE. Furthermore you should use the
:class:`~pySPACE.environments.backends.serial.SerialBackend`.
Otherwise the started subprocesses can not be examined.
The result is called `profile.pstat` and saved in your result folder.
For getting a print out of this profiling, you can use gprof2dot.py in the
library folder. (partial) Copy from the documentation:

    General usage:: 
    
        python gprof2dot.py -f pstats profiling_file -r pySPACE | dot -Tpng -o output.png
    
    or simple usage in result folder, when pyspace is on same level as general
    storage folder::

        python ../../../pyspace/pySPACE/tools/gprof2dot.py -f pstats profile.pstat | dot -Tpng -o output.png
    
    where profiling_file is the file that is generated by the cProfile module
    and output.png is the filename of the resulting picture.
    The option '-r', '--restrict' is there to  eliminate functions in the
    profiling, that do not contain this string in
    their path name [default: None].

    .. note:: For creating the graphic, which is done with the 'dot' command,
              the GraphViz package needs to be installed.
"""
# general imports
import os
import shutil
import sys
import logging
from optparse import OptionParser
import cProfile
import yaml
import warnings

# adding pySPACE to system path for import
file_path = os.path.dirname(os.path.realpath(__file__))
pyspace_path = file_path[:file_path.rfind('pySPACE')-1]
if not pyspace_path in sys.path:
    sys.path.append(pyspace_path)

import pySPACE
import_path = os.path.realpath(os.path.join(os.path.dirname(pySPACE.__file__),
                               os.path.pardir))
if not import_path == pyspace_path:
    warnings.warn("Check your Python path! "+
                  "'%s' is the expected pySPACE path," % pyspace_path +
                  " but '%s' is used." % import_path)

# pySPACE imports
from pySPACE.missions.operations.base import Operation, create_operation_from_file
from pySPACE.tools.filesystem import get_relative_path, create_source_archive
from pySPACE import create_backend
from pySPACE.environments.chains.operation_chain import create_operation_chain
from pySPACE.environments.big_bang import LOGGER

#import matplotlib
#matplotlib.use("MacOSX") #MacOSX")

[docs]class LaunchParser(OptionParser):
    """ Overwrite epilog printout

    Code taken from:

    http://stackoverflow.com/questions/5961160/displaying-newlines-in-the-help-message-when-using-pythons-optparse

    """
[docs]    def format_epilog(self, formatter):
        """ Simply do not change the format of the string """
        return self.epilog

epilog=\
"""
This is the script to launch pySPACE.

For detailed documentation on pySPACE refer to the online documentation at
http://pyspace.github.io/pyspace/index.html,
the __init__ file in the pySPACE folder, or the index.rst in the docs folder.

This script shall start pySPACE in the standard benchmark flow.
If you used the setup.py before or another installation program
all relevant files should be found in the folder `pySPACEcenter`
in your home directory. Otherwise it will be searched for in your
`PYSPACE_CONF_DIR`.

The main configuration is specified in the <config.yaml>. If you run pySPACE
for the first time, have a look at it and the therein specified environment
parameters.

Due to errors in configuration files, data or the software, the software may
crash. Because of internal parallelization and threading, it is currently
not possible to use ''ctrl + c''. So you should kill the processes manually
e.g.::

    ctrl + z
    kill -9 %1
    fg

"""

[docs]def run_operation(default_backend, operation, ex_timeout=1e6, re_timeout=1e6):
    """ Runs the given operation on the backend

    Runs the given operation *operation* either on the backend specified in the
    operation' spec file or (if none is specified) on the backend
    passed as *default_backend*.

    Different timeouts are required, because for the execute function get is
    called which does not accept to high timeouts without proper error handling
    on a Mac OS X whereas Linux systems are fine with larger timeouts.
    """
    # Check if default backend can be used or if we have to run on a separate
    # backend
    if "backend" in operation.operation_spec:
        backend = create_backend(operation.operation_spec["backend"])
        LOGGER.info(" --> For current operation using backend: \n\t\t %s."%str(backend))
    else:
        backend = default_backend
    # In case a operation_chain is executed the queue needs to be reset, since
    # the the first terminated operation cleans and closes the queue.
    if backend.__str__() == "MulticoreBackend":
        backend.reset_queue()
    backend.stage_in(operation)
    try:
        backend.execute(timeout=ex_timeout)
        backend.retrieve(timeout=re_timeout)
        backend.consolidate()
        return operation.get_output_directory()
    finally:
        backend.cleanup()


[docs]def run_operation_chain(default_backend, operation_chain):
    """ Runs the given operation chain on the backend

    Runs the given operation chain *operation_chain* on the backend passed as
    *default_backend*.

    .. todo:: document override mode here and in tutorial
    .. todo:: documentation needed for prepare operation and hidden params
    .. todo:: parameter settings missing instead of parameter ranges?
    """
    base_result_dir = operation_chain.get_output_directory()

    input_path = operation_chain["input_path"]
    prepare_operation = operation_chain["prepare_operation"] \
                            if "prepare_operation" in operation_chain else None
    operations = operation_chain["operations"]
    runs = operation_chain["runs"] if "runs" in operation_chain else 1

    # Run prepare operation if requested
    if prepare_operation is not None:
        LOGGER.info("Running prepare operation of the operation chain")
        # Create operation object for specified prepare operation
        operation = create_operation_from_file(prepare_operation,
                                               base_result_dir)
        output_directory = run_operation(default_backend, operation)

        # Rename output_directory
        preparation_directory = os.sep.join(output_directory.split(os.sep)[:-1]) + \
                                    os.sep + "prepare_operation"
        shutil.move(output_directory, preparation_directory)

    # Execute all operations of the operation chain sequentially
    for index, operation in enumerate(operations):
        overridden_params_dict = {}
        if isinstance(operation, str):
            op_spec_relative_filename = operation
        else: # it should be a dictionary...
            if 'operation_spec' in operation:
                op_spec_relative_filename = operation['operation_spec']
            else:
                op_spec_relative_filename = None

            try:
                overridden_params_dict = operation["overridden_params"]
            except KeyError:
                pass

        if op_spec_relative_filename is not None:
            LOGGER.info("Running operation %s of the operation chain (%s/%s)" % \
                    (op_spec_relative_filename, index + 1, len(operations)))
            spec_file_name = os.path.join(pySPACE.configuration.spec_dir,
                                          "operations",
                                           op_spec_relative_filename)
            operation_spec = yaml.load(open(spec_file_name, "r"))
        else:
            # we expect to get everything from overridden params
            operation_spec = {}
            try:
                operation_name = overridden_params_dict['operation_name']
            except KeyError:
                operation_name = "<unnamed>"
            LOGGER.info("Running operation %s of the operation chain (%s/%s)" % \
                    (operation_name, index + 1, len(operations)))

        operation_spec["input_path"] = input_path
        operation_spec["runs"] = runs

        # Add pseudo parameter "__PREPARE_OPERATION__" to parameter ranges
        # if there was a prepare operation
        if prepare_operation is not None :
            if not "parameter_ranges" in operation_spec:
                operation_spec["parameter_ranges"] = {}
            operation_spec["parameter_ranges"]["__PREPARE_OPERATION__"] = [preparation_directory]
            if not "hide_parameters" in operation_spec:
                operation_spec["hide_parameters"] = []
            operation_spec["hide_parameters"].append("__PREPARE_OPERATION__")


        # override params with any explicitly specified params in the operation chain
        # spec.
        operation_spec.update(overridden_params_dict)

        # use the operation factory method to create operation
        operation = Operation.create(operation_spec,
                                     base_result_dir = base_result_dir)

        # Run the operation
        output_directory = run_operation(default_backend, operation)
        # The output acts as input for the next operation of the operation chain
        input_path = get_relative_path(pySPACE.configuration.storage,
                                         output_directory)


[docs]def main():
    #### Find pySPACE package and import it ####

    # Determine path of current file
    path = os.path.realpath(__file__)

    # Move up to parent directory that contains the pySPACE tree
    suffix = []
    for i in range(3):
        path, tail = os.path.split(path)
        suffix.append(tail)
    parent_dir = path

    # Check proper directory structure
    if suffix != ['launch.py', 'run', 'pySPACE']:
        raise RuntimeError, "Encountered incorrect directory structure. "\
                            "launch.py needs to reside in $PARENT_DIR/pySPACE/run"


    # Workaround for eegserver crashing after 255 open ports
    # - Now it crashes after 4096 open ports ;-)
    #import resource
    #(fd1, fd2) = resource.getrlimit(resource.RLIMIT_NOFILE)
    #fd1 = 4096 if fd2 == resource.RLIM_INFINITY else fd2-1
    #resource.setrlimit(resource.RLIMIT_NOFILE, (fd1,fd2))
    # ------------------------------------------------------

    #########################################

    ### Parsing of command line arguments
    usage = "Usage: %prog [BACKEND_SPECIFICATION]  [--config <conf.yaml>] "\
            "[--operation <operation.yaml> | --operation_chain <operation_chain.yaml>] "\
            "[--profile]"\
            " where BACKEND_SPECIFICATION can be --serial, --mcore, --loadl or --mpi"

    parser = LaunchParser(usage=usage, epilog=epilog)

    # Configuration
    parser.add_option("-c", "--configuration",
                      default="config.yaml",
                      help="Choose the configuration file, which is looked up in PYSPACE_CONF_DIR",
                      action="store")
    # Backends
    parser.add_option("-s", "--serial", action="store_true", default=False,
                      help="Enables execution on the SerialBackend (one local process)")
    parser.add_option("-m", "--mcore", action="store_true", default=False,
                      help="Enables execution on the MulticoreBackend (one process per CPU core)")
    parser.add_option("-l", "--local", action="store_true", default=False,
                      help="Enables execution on the MulticoreBackend (one process per CPU core)")
    parser.add_option("-i", "--mpi", action="store_true", default=False,
                      help="Enables execution via MPI")
    parser.add_option("-L", "--loadl", action="store_true", default=False,
                      help="Enables execution via LoadLeveler.")
    # Operation / operation chain
    parser.add_option("-o", "--operation",
                      help="Chooses the operation that will be executed. The "
                           "operation specification file is looked up in "
                           "$SPEC_DIR/operations",
                      action="store")
    parser.add_option("-O", "-C", "--operation_chain",
                      help="Chooses the operation chain that will be executed. "
                           "The operation chain specification file is looked up "
                           "in $SPEC_DIR/operation_chains",
                      action="store")
    # Profiling
    parser.add_option("-p", "--profile",
                      help="Profiles execution.",
                      action="store_true", default=False,)

    (options, args) = parser.parse_args()

    # Load configuration file
    pySPACE.load_configuration(options.configuration)

    if hasattr(pySPACE.configuration, "eeg_acquisition_dir"):
        eeg_parent_dir =\
        os.sep.join(pySPACE.configuration.eeg_acquisition_dir.split(os.sep)[:-1])
        if not hasattr(pySPACE.configuration, "eeg_acquisition_dir"):
            pySPACE.configuration.eeg_module_path = eeg_parent_dir
    else:
        eeg_parent_dir, tail = os.path.split(parent_dir)
        eeg_parent_dir = os.path.join(eeg_parent_dir, "eeg_modules")
        pySPACE.configuration.eeg_module_path = eeg_parent_dir
    sys.path.append(eeg_parent_dir)

    # Create backend
    if options.serial:
        default_backend = create_backend("serial")
    elif options.mcore or options.local:
        default_backend = create_backend("mcore")
    elif options.mpi:
        default_backend = create_backend("mpi")
    elif options.loadl:
        default_backend = create_backend("loadl")
    else: # Falling back to serial backend
        default_backend = create_backend("serial")

    LOGGER.info(" --> Using backend: \n\t\t %s."%str(default_backend))

    if not options.operation is None:
        # Create operation for the given name
        operation = create_operation_from_file(options.operation)
        # Store current source code for later inspection
        create_source_archive(archive_path=operation.get_output_directory())
        if not options.profile:
            # Execute the current operation
            run_operation(default_backend, operation)
        else:
            # Execute and profile operation
            cProfile.runctx('pySPACE.run_operation(default_backend, operation)',
                            globals(), locals(),
                            filename = operation.get_output_directory()\
                                       + os.sep + "profile.pstat")
    elif not options.operation_chain is None:
        # Create operation chain for the given name
        operation_chain = create_operation_chain(options.operation_chain)
        # Store current source code for later inspection
        create_source_archive(archive_path=operation_chain.get_output_directory())

        if not options.profile:
            # Execute the current operation_chain
            run_operation_chain(default_backend, operation_chain)
        else:
            # Execute and profile operation
            cProfile.runctx('pySPACE.run_operation_chain(default_backend, operation_chain)',
                            globals(), locals(),
                            filename=operation_chain.get_output_directory()\
                                     + os.sep + "profile.pstat")
    else:
        parser.error("Neither operation chain nor operation specification file given!")

    logging.shutdown()
    # Stop logger thread in backend
    default_backend._stop_logging()

    del default_backend

if __name__ == "__main__":
    # run main with soft finishing
    sys.exit(main())
    # hard finish
    os._exit(0)