Source code for pySPACE.missions.nodes.type_manipulation.type_conversion

""" Convert feature to prediction vectors and TimeSeries and vice versa

**Known issues**
    No unit tests!
"""
import logging
import numpy

from pySPACE.missions.nodes.base_node import BaseNode
from pySPACE.resources.data_types.prediction_vector import PredictionVector
from pySPACE.resources.data_types.feature_vector import FeatureVector

from pySPACE.resources.data_types.time_series import TimeSeries


[docs]class Prediction2FeaturesNode(BaseNode): """ Use the prediction values as features This node converts the type :class:`~pySPACE.resources.data_types.prediction_vector.PredictionVector` to the type :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector`. This is needed, whenever one want to feed classification predictions into a node that expects feature vectors (e.g. gating functions). **Parameters** :name: String. A prefix of the new feature. (*optional, default: ''*) **Exemplary Call** .. code-block:: yaml - node : Prediction2Features parameters : name : "SVM_" :Author: Mario Krell (Mario.Krell@dfki.de) :Created: 2010/08/06 """ input_types = ["PredictionVector"]
[docs] def __init__(self, name='', **kwargs): super(Prediction2FeaturesNode, self).__init__(**kwargs) self.set_permanent_attributes(name=name, feature_names=[], label=None)
[docs] def _execute(self, data): """ Extract the prediction features from the given data .. todo:: Give the possibility to give the new feature names to the transformation manually. Especially useful for ensemble approaches. """ assert (type(data) == PredictionVector), \ "Prediction2FeaturesNode requires PredictionVector inputs " \ "not %s" % type(data) if type(data.prediction) != list: f_name = self.name + "prediction" return FeatureVector(numpy.array([[data.prediction]]), [f_name]) else: #type(data.prediction) == list: f_names = [self.name + "prediction_" + str(i) for i in range(len(data.prediction))] return FeatureVector(numpy.array([data.prediction]),f_names)
[docs] def get_output_type(self, input_type, as_string=True): if as_string: return "FeatureVector" else: return self.string_to_class("FeatureVector")
[docs]class Features2PredictionNode(BaseNode): """ Use the feature vectors as prediction values This node converts the type :class:`~pySPACE.resources.data_types.prediction_vector.PredictionVector` to the type :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector`. The feature values are used as individual predictions and the labels are created based on the passed parameter "class_labels". **Parameters** :class_labels: List of length two of class_labels If a feature's values is larger than 0, the second class label is used as the prediction vector's label otherwise the first. **Exemplary Call** .. code-block:: yaml - node : Features2Prediction parameters : class_labels : ['Standard', 'Target'] :Author: Jan Hendrik Metzen (jhm@informatik.uni-bremen.de) :Created: 2010/089/24 """ input_types = ["FeatureVector"]
[docs] def __init__(self, class_labels, **kwargs): super(Features2PredictionNode, self).__init__(**kwargs) self.set_permanent_attributes(class_labels =class_labels)
[docs] def _execute(self, data): """ Extract the prediction features from the given data""" assert (type(data) == FeatureVector), \ "Features2PredictionNode requires FeatureVector inputs " \ "not %s" % type(data) classification_rule = lambda x: self.class_labels[0] if x <= 0 \ else self.class_labels[1] data=data.view(numpy.ndarray) return PredictionVector(label = map(classification_rule, data[0,:]), prediction=list(data[0,:]))
[docs] def get_output_type(self, input_type, as_string=True): if as_string: return "PredictionVector" else: return self.string_to_class("PredictionVector")
[docs]def uniquify_list(seq): """ Uniquify a list by preserving its original order """ seen = set() return [i for i in seq if i not in seen and not seen.add(i)]
[docs]class FeatureVector2TimeSeriesNode(BaseNode): """ Convert feature vector to time series This node converts the type :class:`~pySPACE.resources.data_types.prediction_vector.PredictionVector` to :class:`~pySPACE.resources.data_types.time_series.TimeSeries`. The feature values are extracted and put into their respective place of sensor name and time. The *sampling_frequency* is also calculated. .. todo:: In the case of not using *reshape*, the code needs some tuning. An index mapping needs to be determined and for new samples only to be applied. **Parameters** :reshape: Assuming, that the data is in a simple structure (the features are sorted first by sensors and second by time), a simple reshape is required and no complex iteration over all entries. This speeds up the transformation and is turned on by this parameter. If you are unsure, just leave the parameter as it is. With the first incoming sample, the structure will be checked and if possible the parameter changed. If the structure of your data changes, you should reset this node. (*optional, default: False*) **Exemplary Call** .. code-block:: yaml - node : FeatureVector2TimeSeries :Author: Mario Michael Krell :Created: 2011/09/23 :Refactored: 2013/04/24 """ input_types = ["FeatureVector"]
[docs] def __init__(self,reshape=False,**kwargs): super(FeatureVector2TimeSeriesNode, self).__init__(**kwargs) self.set_permanent_attributes(sensor_names=None, times=None, feat_sensor_names=None, feat_times=None, reshape=reshape, frequency=None, shape_test=False)# test if reshape can be active
[docs] def _execute(self, data): """ Extract feature values from and match it to their respective sensor name and time """ assert (type(data) == FeatureVector), \ "FeatureVector2TimeSeries requires FeatureVector inputs " \ "not %s" % type(data) # sensor name is what comes after the first underscore if (self.sensor_names is None): self.feat_sensor_names = [fnames.split('_')[1] for fnames in data.feature_names] self.sensor_names = uniquify_list(self.feat_sensor_names) # time is what comes after the second underscore if (self.times is None): self.feat_times = [float((fnames.split('_')[2])[:-3]) for fnames in data.feature_names] self.times = list(set(self.feat_times)) # sort list self.times.sort() if self.frequency is None: try: # calculate sampling frequency self.frequency = 1.0/(self.times[1] - self.times[0]) except IndexError: self.frequency = 1.0 self._log("Unable to determine sampling frequency! Setting to 1.", level=logging.ERROR) # check structure of feature names, if it fits to reshape approach if not self.reshape and not self.shape_test: self.reshape = True m = len(self.times) n = len(self.sensor_names) for i in range(m): for j in range(n): if not self.reshape: break index = i*m+j if not(self.feat_times[index] == self.times[i] and self.feat_sensor_names[index] == self.sensor_names[j]): self.reshape = False break self.shape_test = True if not self.reshape: self._log("Reshaping deactivated.", level=logging.INFO) data_array = data.view(numpy.ndarray) if not self.reshape: # create 2-dimensional array. all fills with zero. matrix = numpy.zeros((len(self.times),len(self.sensor_names))) # try to find the correct place (channel name and time) # to insert the feature values for i in range(len(data.feature_names)): col = self.times.index(self.feat_times[i]) row = self.sensor_names.index(self.feat_sensor_names[i]) matrix[col][row] = data_array[0][i] else: matrix = data_array.reshape(len(self.times),len(self.sensor_names)) # generate new time series object # all filled with zeros instead of data new_data = TimeSeries(matrix, channel_names=self.sensor_names, sampling_frequency=self.frequency) return new_data
[docs] def get_output_type(self, input_type, as_string=True): if as_string: return "TimeSeries" else: return self.string_to_class("TimeSeries")
[docs]class Feature2MonoTimeSeriesNode(BaseNode): """ Convert feature vector to time series with only one time stamp This node converts the type *FeatureVector* to *TimeSeries*. No real mapping of the features to the corresponding times series place is done. Instead every feature is identified with a channel. The purpose of this node is to enable the user to use time series nodes on feature vectors, especially on feature vectors without any time structure. **Exemplary Call** .. code-block:: yaml - node : Feature2MonoTimeSeries :Author: Mario Krell (mario.krell@dfki.de) :Created: 2012/08/31 """ input_types = ["FeatureVector"]
[docs] def _execute(self, data): """ Identify feature names with channel names """ assert (type(data) == FeatureVector), \ "Feature2MonoTimeSeries requires FeatureVector inputs " \ "not %s" % type(data) data_array = numpy.atleast_2d(data.view(numpy.ndarray)) new_data = TimeSeries(data_array, channel_names = data.feature_names, sampling_frequency = 1.0) return new_data
[docs] def is_invertable(self): """ Inversion is only a mapping of names """ return True
[docs] def _invert(self,data): """ The invert function is needed for the inverse node """ assert (type(data) == TimeSeries), \ "Feature2MonoTimeSeries inversion requires TimeSeries inputs " \ "not %s" % type(data) assert (data.shape[0]==1), "Wrong array shape: %s."%data.shape[0] data_array = data.view(numpy.ndarray) new_data = FeatureVector(data_array, feature_names = data.channel_names) return new_data
[docs] def get_output_type(self, input_type, as_string=True): if as_string: return "TimeSeries" else: return self.string_to_class("TimeSeries")
[docs]class MonoTimeSeries2FeatureNode(Feature2MonoTimeSeriesNode): """ Convert time series with only one time stamp to feature vector This node converts the type *TimeSeries* to *FeatureVector*. Each channel is mapped to one feature. The purpose of this node is to enable the user to use time series nodes on feature vectors. Especially on feature vectors without any time structure. Therefore this node is the back transformation from the :class:`pySPACE.missions.nodes.type_manipulation.type_conversion.Feature2MonoTimeSeriesNode` **Exemplary Call** .. code-block:: yaml - node : MonoTimeSeries2Feature :Author: Mario Krell (mario.krell@dfki.de) :Created: 2012/08/31 """ input_types = ["TimeSeries"]
[docs] def _execute(self, data): """ Identify channel names with feature names """ return super(MonoTimeSeries2FeatureNode,self)._invert(data)
[docs] def _invert(self,data): """ Irrelevant inversion introduced just for completeness """ return super(MonoTimeSeries2FeatureNode,self)._execute(data)
[docs] def get_output_type(self, input_type, as_string=True): if as_string: return "FeatureVector" else: return self.string_to_class("FeatureVector")
[docs]class CastDatatypeNode(BaseNode): """ Changes the datatype of the data **Parameters** :datatype: Type to cast to. (*optional, default: "eval(numpy.float64)"*) **Exemplary Call** .. code-block:: yaml - node : CastDatatype :Authors: Hendrik Woehrle (hendrik.woehrle@dfki.de) :Created: 2012/03/29 """ input_types = ["TimeSeries"]
[docs] def __init__(self, datatype=numpy.int16, selected_channels=None,**kwargs): super(CastDatatypeNode, self).__init__(**kwargs) self.set_permanent_attributes(datatype=datatype)
[docs] def _execute(self, data): """ Apply the cast """ #Determine the indices of the channels which will be filtered self._log("Cast data") casted_data = data.astype(self.datatype) result_time_series = TimeSeries.replace_data(data, casted_data) return result_time_series
[docs] def get_output_type(self, input_type, as_string=True): if as_string: return "TimeSeries" else: return self.string_to_class("TimeSeries")
_NODE_MAPPING = {"Prediction2Features": Prediction2FeaturesNode, "Features2Prediction": Features2PredictionNode, "LabeledFeature2TimeSeries": FeatureVector2TimeSeriesNode, "Feature2TimeSeries": FeatureVector2TimeSeriesNode, "CastDatatype": CastDatatypeNode, }