Source code for pySPACE.missions.nodes.sink.feature_vector_sink
""" Collect feature vectors """
import copy
from pySPACE.missions.nodes.base_node import BaseNode
from pySPACE.resources.dataset_defs.feature_vector import FeatureVectorDataset
[docs]class FeatureVectorSinkNode(BaseNode):
""" Collect all :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector` elements
that are passed through it in a collection of type :mod:`~pySPACE.resources.dataset_defs.feature_vector`.
**Parameters**
**Exemplary Call**
.. code-block:: yaml
-
node: FeatureVectorSink
:input: FeatureVector
:output: FeatureVectorDataset
:Author: Jan Hendrik Metzen (jhm@informatik.uni-bremen.de)
:Created: 2008/09/02
"""
input_types = ["FeatureVector"]
[docs] def __init__(self, classes_names=[], num_features=None, **kwargs):
super(FeatureVectorSinkNode, self).__init__(**kwargs)
self.set_permanent_attributes(
classes_names=classes_names,
num_features=num_features,
feature_vector_collection=None, # This will be created lazily
)
[docs] def reset(self):
"""
Reset the state of the object to the clean state it had after its
initialization
"""
# We have to create a temporary reference since we remove
# the self.permanent_state reference in the next step by overwriting
# self.__dict__
tmp = self.permanent_state
# TODO: just a hack to get it working quickly...
tmp["feature_vector_collection"] = self.feature_vector_collection
self.__dict__ = copy.copy(tmp)
self.permanent_state = tmp
[docs] def is_trainable(self):
""" Returns whether this node is trainable. """
# Though this node is not really trainable, it returns true in order
# to get trained. The reason is that during this training phase,
# it stores all training samples into an ARFF file,
# which can then later be used in Weka
return True
[docs] def is_supervised(self):
""" Returns whether this node requires supervised training """
return True
[docs] def _train(self, data, label):
# We do nothing
pass
[docs] def _create_result_sets(self, num_features, feature_names = None):
"""
Sets some object members that could not set during
__init__ since the depend on the dimensionality of the
data (i.e. the number of features)
"""
# Create the labeled samples sets lazily
self.num_features = num_features
if feature_names == None:
feature_names = [("f%s" % i) for i in range(self.num_features)]
self.feature_vector_collection = \
FeatureVectorDataset(classes_names=self.classes_names,
feature_names=feature_names,
num_features =self.num_features)
[docs] def process_current_split(self):
"""
Compute the results of this sink node for the current split of the data
into train and test data
"""
# Compute the feature vectors for the data used for training
for feature_vector, label in self.input_node.request_data_for_training(False):
# Do lazy initialization of the class
if self.feature_vector_collection == None:
feature_names = feature_vector.feature_names \
if hasattr(feature_vector,
"feature_names") else None
self._create_result_sets(feature_vector.size,
feature_names)
# Add sample
self.feature_vector_collection.add_sample(feature_vector,
label = label,
train = True,
split = self.current_split,
run = self.run_number)
# Compute the feature vectors for the data used for testing
for feature_vector, label in self.input_node.request_data_for_testing():
# Do lazy initialization of the class
# (maybe there were no training examples)
if self.feature_vector_collection == None:
feature_names = feature_vector.feature_names \
if hasattr(feature_vector,
"feature_names") else None
self._create_result_sets(feature_vector.size,
feature_names)
# Add sample
self.feature_vector_collection.add_sample(feature_vector,
label,
train = False,
split = self.current_split,
run = self.run_number)
[docs] def get_result_dataset(self):
""" Return the result """
return self.feature_vector_collection
_NODE_MAPPING = {"Labeled_Feature_Vector_Sink": FeatureVectorSinkNode,
"Feature_Vector_Sink": FeatureVectorSinkNode}