Source code for pySPACE.missions.nodes.source.feature_vector_source

""" Source for :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector` elements e.g. from arff or csv files

.. note::    Nearly a total copy of the :mod:`~pySPACE.missions.nodes.source.time_series_source`.
            The important part of the code can be found in the corresponding
            metadata.yaml .
"""

from pySPACE.missions.nodes.base_node import BaseNode
from pySPACE.tools.memoize_generator import MemoizeGenerator


[docs]class FeatureVectorSourceNode(BaseNode): """ Source for samples of type :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector` This node reads :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector` elements accumulated in a :mod:`~pySPACE.resources.dataset_defs.feature_vector` and passes them into the :mod:`~pySPACE.environments.chains.node_chain`. As described in :mod:`~pySPACE.resources.dataset_defs.feature_vector` it is important, that the storage format is correct specified in the metadata.yaml. If the dataset has been constructed by pySPACE, this is done automatically. **Parameters** **Exemplary Call** .. code-block:: yaml - node : Feature_Vector_Source :Author: Jan Hendrik Metzen (jhm@informatik.uni-bremen.de) :Created: 2008/11/25 """ input_types = ["FeatureVector"]
[docs] def __init__(self, **kwargs): super(FeatureVectorSourceNode, self).__init__(**kwargs)
[docs] def set_input_dataset(self, dataset): """ Sets the dataset from which this node reads the data """ self.set_permanent_attributes(dataset=dataset)
[docs] def register_input_node(self, node): """ Register the given node as input """ raise Exception("No nodes can be registered as inputs for source nodes")
[docs] def use_next_split(self): """ Use the next split of the data into training and test data. Returns True if more splits are available, otherwise False. This method is useful for benchmarking """ return False
[docs] def train_sweep(self, use_test_data): """ Performs the actual training of the node. .. note:: Source nodes cannot be trained """ raise Exception("Source nodes cannot be trained")
[docs] def request_data_for_training(self, use_test_data): """ Returns the time windows that can be used for training of subsequent nodes .. todo:: to document """ if not use_test_data: # If the input dataset consists only of one single run, # we use this as input for all runs to be conducted (i.e. we # rely on later randomization of the order). Otherwise # we use the data for this run number if self.dataset.meta_data["runs"] > 1: key = (self.run_number, self.current_split, "train") else: key = (0, self.current_split, "train") # Check if there is training data for the current split and run if key in self.dataset.data.keys(): self._log("Accessing input dataset's training feature vector windows.") self.data_for_training = MemoizeGenerator(self.dataset.get_data(*key).__iter__(), caching=self.caching) else: # Returns an iterator that iterates over an empty sequence # (i.e. an iterator that is immediately exhausted), since # this node does not provide any data that is explicitly # dedicated for training self._log("No training data available.") self.data_for_training = MemoizeGenerator((x for x in [].__iter__()), caching=self.caching) else: # Return the test data as there is no additional data that # was dedicated for training return self.request_data_for_testing() # Return a fresh copy of the generator return self.data_for_training.fresh()
[docs] def request_data_for_testing(self): """ Returns the data that can be used for testing of subsequent nodes .. todo:: to document """ # If we haven't read the data for testing yet if self.data_for_testing == None: self._log("Accessing input dataset's test feature vector windows.") # If the input dataset consists only of one single run, # we use this as input for all runs to be conducted (i.e. we # rely on later randomization of the order). Otherwise # we use the data for this run number if self.dataset.meta_data["runs"] > 1: key = (self.run_number, self.current_split, "test") else: key = (0, self.current_split, "test") test_data_generator = self.dataset.get_data(*key).__iter__() self.data_for_testing = MemoizeGenerator(test_data_generator, caching=self.caching) # Return a fresh copy of the generator return self.data_for_testing.fresh()
[docs] def get_metadata(self, key): """ Return the value corresponding to the given key from the dataset meta data of this source node. """ return self.dataset.meta_data.get(key)
_NODE_MAPPING = {"Feature_Vector_Source": FeatureVectorSourceNode, "Labeled_Feature_Vector_Source": FeatureVectorSourceNode,}