Operation Examples

These are examples of yaml files you can use as a template for your own operations. For details on operations have a look at the respective documentation.

bench.yaml

# concatenation of algorithms
type: node_chain
# path relative to storage
input_path: "example_summary"
runs : 3 # number of repetitions to catch randomness
parameter_ranges :
    __C__ : [0.05, 1.25]
    __Normalization__ : [GaussianFeatureNormalization,
                        EuclideanFeatureNormalization]
node_chain: # the algorithms
    -   # load the data
        node: FeatureVectorSourceNode
    -   # random splitting: 40% training, 60% testing
        node: TrainTestSplitter
        parameters :
            train_ratio: 0.4
            random: True
    -   # normalize each feature
        node: ${__Normalization__}
    -   # standard svm classifier (affine version)
        node : SorSvmNode
        parameters :
            complexity : ${__C__}
            kernel_type : "LINEAR"
            class_labels : ["Standard","Target"]
            max_iterations : 10
    -   # gather results and calculate performance
        node: PerformanceSinkNode
        parameters:
            ir_class: "Target"

classification.yaml

# concatenation of algorithms
type: node_chain
# path relative to storage
input_path: "example_summary"
runs : 2 # number of repetitions to catch randomness
node_chain: # the algorithms
    -   # load the data
        node: FeatureVectorSourceNode
    -   # random splitting into 60% training, 40% testing
        node: TrainTestSplitter
        parameters :
            train_ratio: 0.6
            random: True
    -   # normalize each feature to have
        node: GaussianFeatureNormalizationNode
    -   # a standard svm classifier (affine version)
        node : SorSvmNode
        parameters :
            complexity : 1
            kernel_type : "LINEAR"
            class_labels : ["Standard","Target"]
            max_iterations : 10
    -   # gather results and calculate performance
        node: PerformanceSinkNode
        parameters:
            ir_class: "Target"

mmlf_analysis.yaml

type: analysis

input_path: "operation_results/20100120_09_49_27"

# We are interested in the effect of the three variable "gamma", "epsilon", and "lambda"
parameters: ["gamma", "epsilon", "lambda"] 

# We are interested in the two simple metrics "accumulated_reward" and "offline_accumulated_reward"
# and in the sequence of rewards obtained during learning and testing.
metrics: 
    - ["reward", "sequence", {'mwa_window_length': 25}]
    - "accumulated_reward"
    - ["offline_reward", "sequence", {'mwa_window_length': 25}]
    - "offline_accumulated_reward"

mmlf_operation.yaml

# This operation allows to conduct empirical evaluation in Reinforcement Learning
# scenarios. It is based on the MMLF software. MMLF is written in Python and can be
# obtained at http://mmlf.sourceforge.net/. Under this URL, documentation for the MMLF
# is also available that might be useful for understanding this operation.
#
# Further information about MMLF operations can be found in the corresponding tutorial
# "Empiricial Evaluations in Reinforcment Learning with pySPACE and MMLF"
# :ref:`docs.tutorials.tutorial_interface_to_mmlf`
type: mmlf

# The path under which the MMLF package can be found
mmlf_path : "/home/user/python-packages/mmlf"

# Determines how many independent runs will be conducted for each parameter setting
runs : 1

# Determines how many episodes the agent can learn
learning_episodes : 500

# Determines how many episodes the policy learned by an agent is evaluated
# This can be set to 1 for deterministic environments, but should b e set to
# larger values for stochastic environments
test_episodes : 100

# The name of the MMLF world that will be used
# Available worlds are among others "mountain_car", "single_pole_balancing",
# "double_pole_balancing", and "maze2d.
world_name : "mountain_car"

# The template for the MMLF environment XML configuration.
# For more details, please take a look at the MMLf documentation
# The __XX__ entries are placeholders of the template which are
# instantiated by all values given in generalized_domain
# to yield concrete environments.
environment_template : 
   <environment environmentmodulename="mcar_env">
     <configDict maxStepsPerEpisode = "500"
                 accelerationFactor = "__acceleration_factor__"
                 maxGoalVelocity = "__max_goal_velocity__"
                 positionNoise = "0.0"
                 velocityNoise = "0.0"
     />
   </environment>

# In order to avoid method overfit (please take a look at the paper by
# Whiteson et al. "Generalized Domains for Empirical Evaluations in 
# Reinforcement Learning" for more details), an agent should be tested
# not only in one specific instantiation of an environment, but in several
# slightly different versions of an environment. These differences
# can be obtained by varying certain parameters of the environment. 
# This example will test each agent in four slightly different versions of
# the mountain car domain.
generalized_domain:
   - {"__acceleration_factor__": 0.001, "__max_goal_velocity__": 0.07}
   - {"__acceleration_factor__": 0.0075, "__max_goal_velocity__": 0.07}
   - {"__acceleration_factor__": 0.001, "__max_goal_velocity__": 0.02}
   - {"__acceleration_factor__": 0.0075, "__max_goal_velocity__": 0.02}

# The template for the MMLF agent XML configuration. 
# For more details, please take a look at the MMLf documentation
# The __XX__ entries are placeholders of the template which are
# instantiated by all values given in parameter_ranges, 
# parameter_settings etc. (see below) to yield concrete agents
agent_template : 
   <agent agentmodulename="td_lambda_agent">
    <configDict gamma = "__gamma__"
                 epsilon = "__epsilon__"
                 lambda = "__lambda__"
                 minTraceValue = "0.5"
                 defaultStateDimDiscretizations = "7"
                 defaultActionDimDiscretizations = "5"
                 update_rule = "'SARSA'"
                 function_approximator =  "dict(name = 'CMAC', learning_rate = 0.5, update_rule = 'exaggerator', number_of_tilings = 10, defaultQ = 0.0)"
                 policyLogFrequency = "250"
     />
   </agent>

# "parameter_ranges" is used to determine the values of each parameter 
# that are tested. If there is more than one parameter, then each
# possible combination (i.e. the crossproduct) is tested. 
# Please be aware of the potential combinatorial explosion.
# The given example would test 3*2*3=18 different agent
# configurations!
# Alternatively, one could also specify concrete parameter combinations
# that should be tested by using "parameter_settings" instead of
# "parameter_ranges". Please look at the weka_classification_operation.yaml
# example for more details on that.
parameter_ranges: 
    __gamma__: [0.9, 0.99, 1.0]
    __epsilon__ : [0.0, 0.1]
    __lambda__: [0.0, 0.5, 0.9]

node_chain.yaml

# An example of a *node_chain* specification file.
# The specified input is the value of the entry
# with the key "input_path", the templates the value of "templates".
# This template is parametrized with two parameters called
# "__LOWER_CUTOFF__", and "__UPPER_CUTOFF__". Optionally, some "constraints"
# on the allowed parameter combinations can be defined. For instance,
# the constraint "__LOWER_CUTOFF__ < __UPPER_CUTOFF__" prevents that
# the combination where both __LOWER_CUTOFF__ and __UPPER_CUTOFF__ are 2.0
# is tested. For each combination of the given values for these two parameters
# that fulfills all constraints and the datasets of the
# input summary, one result dataset is created. This result dataset
# consists of the results of 10 independent runs with the
# instantiated template performed to the respective input dataset.
# Each such run is an independent process.

# The optional parameter "backend" allows to overwrite the backend specification provided
# via the command-line. This is useful if the operation is part of a chain
# and different operations of the chain should
# not be executed on the same modality.

type: node_chain

input_path: "example_data"
templates : ["example_flow.yaml"]
backend: "local"
parameter_ranges :
    __LOWER_CUTOFF__ : [0.1, 1.0, 2.0]
    __UPPER_CUTOFF__ : [2.0, 4.0]
constraints:
    - "__LOWER_CUTOFF__ < __UPPER_CUTOFF__"


runs : 10

weka_analysis.yaml

type: analysis

input_path: "tutorial_data"

# here interested parameters are specified; e.g. "__Dataset__",
# "__NUM_RETAINED_FEATURES__", "__SELF_DEFINED_PARAMETER__", "Kernel_Type", 
# "Complexity", "Kernel_Exponent", "Kernel_Gamma", "Kernel_Offset", 
# "Kernel_Weight", "Key_Scheme_options", "Key_Scheme"
# The parameter names have to be those from the results.csv file.
parameters: ["__Complexity__", "__Template__"]

# here the metrics which the evaluation is based on are specified, e.g. 
# "False_positive_rate", "False_negative_rate", "Percent_correct", "F_measure",
# "Area_under_ROC", "IR_recall", "IR_precision"
metrics: ["F_measure", "False_positive_rate", "False_negative_rate"]

weka_classification_operation.yaml

# An example of a WekaClassification-Operation specification file.
# The specified input is the value of the entry with the key
# "input_path", the weka template is "classification". The available
# templates are stored in specs/operations/weka_templates/.

type: weka_classification
input_path: "tutorial_data"
template: classification

# The specific classifiers to be used within the operation can be specified 
# using the keyword "parameter_settings". The example below would compare four 
# different parametrizations of a linear svm (complexity either 0.1 or 1.0 and 
# weight for class 2 either 1.0 or 2.0). Note that 'libsvm_lin' is an 
# abbreviation which must be defined in abbreviations.yaml.
# parameter_settings:
#     -
#         classifier: 'libsvm_lin'
#         # "ir_class_index": index of the class performance metrics are
#         # calculated for; index begins with 1
#         ir_class_index: 1
#         complexity: 0.1
#         w0: 1.0
#         w1: 1.0
#     -
#         classifier: 'libsvm_lin'
#         ir_class_index: 1
#         complexity: 1.0
#         w0: 1.0
#         w1: 1.0
#     -
#         classifier: 'libsvm_lin'
#         ir_class_index: 1
#         complexity: 0.1
#         w0: 1.0
#         w1: 2.0
#     -
#         classifier: 'libsvm_lin'
#         ir_class_index: 1
#         complexity: 1.0
#         w0: 1.0
#         w1: 2.0

# Alternatively to specific parameter settings one could also specify ranges for 
# each parameter. This is indicated by the usage of "parameter_ranges" instead of
# "parameter_settings". *parameter_ranges* are automatically converted  into
# *parameter_settings* by creating the crossproduct of all parameter ranges. The
# parameter_ranges in the comment below
# result in the same parameter_setting as the one given above.
#
# parameter_ranges :
#     complexity : [0.1, 1.0]
#     w0 : [1.0]
#     w1 : [1.0, 2.0 ]
#     ir_class_index: [1]
#     classifier: ['libsvm_lin']
parameter_ranges :
    complexity : [0.1, 0.5, 1.0, 5.0, 10.0]
    w0 : [1.0]
    w1 : [1.0]
    ir_class_index: [1]
    classifier: ['libsvm_lin']

weka_feature_selection.yaml

# An example of a WekaFilter-Operation specification file. The specified
# input is the value of the entry with the key "input_path". The weka
# template is one of the available templates stored in specs/operations/weka_templates/,
# here for example "feature_selection".

type: weka_filter

input_path: "tutorial_data"

template: feature_selection

# Parameters to specify the feature selection can be set here.
# "feature_selector" specifies the evaluation method and "ranker"
# the search method of the feature selection. "num_retained_features" is the 
# number of attributes that will be kept after the filtering and reflects
# to the reduction of attributes.
# Note that 'chi_squared' or 'ranker' are abbreviation which must be defined
# in abbreviations.yaml and correspond to specific weka classes.
parameter_ranges:
    # e.g. chi_squared, info_gain, gain_ratio, relief, CFS
    feature_selector: ['chi_squared','info_gain','gain_ratio','relief'] 
    ranker: ['ranker'] # e.g. ranker, greedy_stepwise, best_firstn
    num_retained_features: [10] # -1 means all features are retained

# "hide_parameters" means, that this parameters will not be mapped to 
# the directory names in the result structure {parameter_name/parameter-value}.
# This is suggestive e.g. when only one parameter value is used.
hide_parameters: ['ranker','num_retained_features']

windower.yaml

type: node_chain
templates : ["examples/example_offline_windower.yaml"]

input_path: "eeg/Brio_Oddball"

runs : 1