Source code for pySPACE.run.launch

#!/usr/bin/env python
""" Main program to run pySPACE

For further instructions take a look at the pySPACE documentation and the tutorials 
in there!

.. note::
    Due to errors in configuration files, data or the software, the software may
    crash. Because of internal parallelization and threading, it is currently
    not possible to use ''ctrl + c''. So you should kill the processes manually
    e.g.::

        ctrl + z
        kill -9 %1
        fg

**Profiling**

For profiling the software you should use the option ``--profile``,
when running pySPACE. Furthermore you should use the
:class:`~pySPACE.environments.backends.serial.SerialBackend`.
Otherwise the started subprocesses can not be examined.
The result is called `profile.pstat` and saved in your result folder.
For getting a print out of this profiling, you can use gprof2dot.py in the
library folder. (partial) Copy from the documentation:

    General usage:: 
    
        python gprof2dot.py -f pstats profiling_file -r pySPACE | dot -Tpng -o output.png
    
    or simple usage in result folder, when pyspace is on same level as general
    storage folder::

        python ../../../pyspace/pySPACE/tools/gprof2dot.py -f pstats profile.pstat | dot -Tpng -o output.png
    
    where profiling_file is the file that is generated by the cProfile module
    and output.png is the filename of the resulting picture.
    The option '-r', '--restrict' is there to  eliminate functions in the
    profiling, that do not contain this string in
    their path name [default: None].

    .. note:: For creating the graphic, which is done with the 'dot' command,
              the GraphViz package needs to be installed.
"""
# general imports
import os
import shutil
import sys
import logging
from optparse import OptionParser
import cProfile
import yaml
import warnings

# adding pySPACE to system path for import
file_path = os.path.dirname(os.path.realpath(__file__))
pyspace_path = file_path[:file_path.rfind('pySPACE')-1]
if not pyspace_path in sys.path:
    sys.path.append(pyspace_path)

import pySPACE
import_path = os.path.realpath(os.path.join(os.path.dirname(pySPACE.__file__),
                               os.path.pardir))
if not import_path == pyspace_path:
    warnings.warn("Check your Python path! "+
                  "'%s' is the expected pySPACE path," % pyspace_path +
                  " but '%s' is used." % import_path)

# pySPACE imports
from pySPACE.missions.operations.base import Operation, create_operation_from_file
from pySPACE.tools.filesystem import get_relative_path, create_source_archive
from pySPACE import create_backend
from pySPACE.environments.chains.operation_chain import create_operation_chain
from pySPACE.environments.big_bang import LOGGER

#import matplotlib
#matplotlib.use("MacOSX") #MacOSX")

[docs]class LaunchParser(OptionParser): """ Overwrite epilog printout Code taken from: http://stackoverflow.com/questions/5961160/displaying-newlines-in-the-help-message-when-using-pythons-optparse """
[docs] def format_epilog(self, formatter): """ Simply do not change the format of the string """ return self.epilog
epilog=\ """ This is the script to launch pySPACE. For detailed documentation on pySPACE refer to the online documentation at http://pyspace.github.io/pyspace/index.html, the __init__ file in the pySPACE folder, or the index.rst in the docs folder. This script shall start pySPACE in the standard benchmark flow. If you used the setup.py before or another installation program all relevant files should be found in the folder `pySPACEcenter` in your home directory. Otherwise it will be searched for in your `PYSPACE_CONF_DIR`. The main configuration is specified in the <config.yaml>. If you run pySPACE for the first time, have a look at it and the therein specified environment parameters. Due to errors in configuration files, data or the software, the software may crash. Because of internal parallelization and threading, it is currently not possible to use ''ctrl + c''. So you should kill the processes manually e.g.:: ctrl + z kill -9 %1 fg """
[docs]def run_operation(default_backend, operation, ex_timeout=1e6, re_timeout=1e6): """ Runs the given operation on the backend Runs the given operation *operation* either on the backend specified in the operation' spec file or (if none is specified) on the backend passed as *default_backend*. Different timeouts are required, because for the execute function get is called which does not accept to high timeouts without proper error handling on a Mac OS X whereas Linux systems are fine with larger timeouts. """ # Check if default backend can be used or if we have to run on a separate # backend if "backend" in operation.operation_spec: backend = create_backend(operation.operation_spec["backend"]) LOGGER.info(" --> For current operation using backend: \n\t\t %s."%str(backend)) else: backend = default_backend # In case a operation_chain is executed the queue needs to be reset, since # the the first terminated operation cleans and closes the queue. if backend.__str__() == "MulticoreBackend": backend.reset_queue() backend.stage_in(operation) try: backend.execute(timeout=ex_timeout) backend.retrieve(timeout=re_timeout) backend.consolidate() return operation.get_output_directory() finally: backend.cleanup()
[docs]def run_operation_chain(default_backend, operation_chain): """ Runs the given operation chain on the backend Runs the given operation chain *operation_chain* on the backend passed as *default_backend*. .. todo:: document override mode here and in tutorial .. todo:: documentation needed for prepare operation and hidden params .. todo:: parameter settings missing instead of parameter ranges? """ base_result_dir = operation_chain.get_output_directory() input_path = operation_chain["input_path"] prepare_operation = operation_chain["prepare_operation"] \ if "prepare_operation" in operation_chain else None operations = operation_chain["operations"] runs = operation_chain["runs"] if "runs" in operation_chain else 1 # Run prepare operation if requested if prepare_operation is not None: LOGGER.info("Running prepare operation of the operation chain") # Create operation object for specified prepare operation operation = create_operation_from_file(prepare_operation, base_result_dir) output_directory = run_operation(default_backend, operation) # Rename output_directory preparation_directory = os.sep.join(output_directory.split(os.sep)[:-1]) + \ os.sep + "prepare_operation" shutil.move(output_directory, preparation_directory) # Execute all operations of the operation chain sequentially for index, operation in enumerate(operations): overridden_params_dict = {} if isinstance(operation, str): op_spec_relative_filename = operation else: # it should be a dictionary... if 'operation_spec' in operation: op_spec_relative_filename = operation['operation_spec'] else: op_spec_relative_filename = None try: overridden_params_dict = operation["overridden_params"] except KeyError: pass if op_spec_relative_filename is not None: LOGGER.info("Running operation %s of the operation chain (%s/%s)" % \ (op_spec_relative_filename, index + 1, len(operations))) spec_file_name = os.path.join(pySPACE.configuration.spec_dir, "operations", op_spec_relative_filename) operation_spec = yaml.load(open(spec_file_name, "r")) else: # we expect to get everything from overridden params operation_spec = {} try: operation_name = overridden_params_dict['operation_name'] except KeyError: operation_name = "<unnamed>" LOGGER.info("Running operation %s of the operation chain (%s/%s)" % \ (operation_name, index + 1, len(operations))) operation_spec["input_path"] = input_path operation_spec["runs"] = runs # Add pseudo parameter "__PREPARE_OPERATION__" to parameter ranges # if there was a prepare operation if prepare_operation is not None : if not "parameter_ranges" in operation_spec: operation_spec["parameter_ranges"] = {} operation_spec["parameter_ranges"]["__PREPARE_OPERATION__"] = [preparation_directory] if not "hide_parameters" in operation_spec: operation_spec["hide_parameters"] = [] operation_spec["hide_parameters"].append("__PREPARE_OPERATION__") # override params with any explicitly specified params in the operation chain # spec. operation_spec.update(overridden_params_dict) # use the operation factory method to create operation operation = Operation.create(operation_spec, base_result_dir = base_result_dir) # Run the operation output_directory = run_operation(default_backend, operation) # The output acts as input for the next operation of the operation chain input_path = get_relative_path(pySPACE.configuration.storage, output_directory)
[docs]def main(): #### Find pySPACE package and import it #### # Determine path of current file path = os.path.realpath(__file__) # Move up to parent directory that contains the pySPACE tree suffix = [] for i in range(3): path, tail = os.path.split(path) suffix.append(tail) parent_dir = path # Check proper directory structure if suffix != ['launch.py', 'run', 'pySPACE']: raise RuntimeError, "Encountered incorrect directory structure. "\ "launch.py needs to reside in $PARENT_DIR/pySPACE/run" # Workaround for eegserver crashing after 255 open ports # - Now it crashes after 4096 open ports ;-) #import resource #(fd1, fd2) = resource.getrlimit(resource.RLIMIT_NOFILE) #fd1 = 4096 if fd2 == resource.RLIM_INFINITY else fd2-1 #resource.setrlimit(resource.RLIMIT_NOFILE, (fd1,fd2)) # ------------------------------------------------------ ######################################### ### Parsing of command line arguments usage = "Usage: %prog [BACKEND_SPECIFICATION] [--config <conf.yaml>] "\ "[--operation <operation.yaml> | --operation_chain <operation_chain.yaml>] "\ "[--profile]"\ " where BACKEND_SPECIFICATION can be --serial, --mcore, --loadl or --mpi" parser = LaunchParser(usage=usage, epilog=epilog) # Configuration parser.add_option("-c", "--configuration", default="config.yaml", help="Choose the configuration file, which is looked up in PYSPACE_CONF_DIR", action="store") # Backends parser.add_option("-s", "--serial", action="store_true", default=False, help="Enables execution on the SerialBackend (one local process)") parser.add_option("-m", "--mcore", action="store_true", default=False, help="Enables execution on the MulticoreBackend (one process per CPU core)") parser.add_option("-l", "--local", action="store_true", default=False, help="Enables execution on the MulticoreBackend (one process per CPU core)") parser.add_option("-i", "--mpi", action="store_true", default=False, help="Enables execution via MPI") parser.add_option("-L", "--loadl", action="store_true", default=False, help="Enables execution via LoadLeveler.") # Operation / operation chain parser.add_option("-o", "--operation", help="Chooses the operation that will be executed. The " "operation specification file is looked up in " "$SPEC_DIR/operations", action="store") parser.add_option("-O", "-C", "--operation_chain", help="Chooses the operation chain that will be executed. " "The operation chain specification file is looked up " "in $SPEC_DIR/operation_chains", action="store") # Profiling parser.add_option("-p", "--profile", help="Profiles execution.", action="store_true", default=False,) (options, args) = parser.parse_args() # Load configuration file pySPACE.load_configuration(options.configuration) if hasattr(pySPACE.configuration, "eeg_acquisition_dir"): eeg_parent_dir =\ os.sep.join(pySPACE.configuration.eeg_acquisition_dir.split(os.sep)[:-1]) if not hasattr(pySPACE.configuration, "eeg_acquisition_dir"): pySPACE.configuration.eeg_module_path = eeg_parent_dir else: eeg_parent_dir, tail = os.path.split(parent_dir) eeg_parent_dir = os.path.join(eeg_parent_dir, "eeg_modules") pySPACE.configuration.eeg_module_path = eeg_parent_dir sys.path.append(eeg_parent_dir) # Create backend if options.serial: default_backend = create_backend("serial") elif options.mcore or options.local: default_backend = create_backend("mcore") elif options.mpi: default_backend = create_backend("mpi") elif options.loadl: default_backend = create_backend("loadl") else: # Falling back to serial backend default_backend = create_backend("serial") LOGGER.info(" --> Using backend: \n\t\t %s."%str(default_backend)) if not options.operation is None: # Create operation for the given name operation = create_operation_from_file(options.operation) # Store current source code for later inspection create_source_archive(archive_path=operation.get_output_directory()) if not options.profile: # Execute the current operation run_operation(default_backend, operation) else: # Execute and profile operation cProfile.runctx('pySPACE.run_operation(default_backend, operation)', globals(), locals(), filename = operation.get_output_directory()\ + os.sep + "profile.pstat") elif not options.operation_chain is None: # Create operation chain for the given name operation_chain = create_operation_chain(options.operation_chain) # Store current source code for later inspection create_source_archive(archive_path=operation_chain.get_output_directory()) if not options.profile: # Execute the current operation_chain run_operation_chain(default_backend, operation_chain) else: # Execute and profile operation cProfile.runctx('pySPACE.run_operation_chain(default_backend, operation_chain)', globals(), locals(), filename=operation_chain.get_output_directory()\ + os.sep + "profile.pstat") else: parser.error("Neither operation chain nor operation specification file given!") logging.shutdown() # Stop logger thread in backend default_backend._stop_logging() del default_backend
if __name__ == "__main__": # run main with soft finishing sys.exit(main()) # hard finish os._exit(0)