Source code for pySPACE.resources.data_types.prediction_vector

""" 1d array of prediction values with properties (labels, reference to the predictor)

"""

import numpy
from pySPACE.resources.data_types import base


[docs]class PredictionVector(base.BaseData): """ Represents a prediction vector It contains a label, a prediction and a reference to the predictor. I doesn't matter if it uses one or multiple predictions. The object might be even used for regression, where no label is needed. In contrast to :class:`~pySPACE.resources.data_types.time_series.TimeSeries` or :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector` objects, prediction vectors are currently generated in a node chain with classifiers for example and not loaded. For evaluation the :class:`~pySPACE.missions.nodes.sink.classification_performance_sink.PerformanceSinkNode` can be used to evaluate the predictions. For multiple predictions, nodes from the :mod:`~pySPACE.missions.nodes.classification.ensemble` module can be used. For creating a prediction vector, there are four **Parameters** :input_array: The prediction vector is (for historical reasons) a 2d numpy array with some additional (mode important parameters). The content of the input_array should be/is the same as used in the *prediction* parameter. If you do not specify this parameter, it is generated from the *prediction* and vice versa. Any object, which can be converted to a 2d-numpy array can be used to specify this parameter. :label: The label normally gives a semantic meaning to the prediction value and is a string, e.g., "ill" or "Target". For regression this parameter can be ignored and is set to None. For multiple predictions, it is a list. :prediction: For regression, this is the regression value and for binary classification it is the prediction value. For SVMs it can be any real value and for algorithms with probabilistic output it should be the probability of the respective data belonging to the second and not the first class or vice versa. For multiple predictions this is not a single number, but a list of floats. The prediction value is used to generate the *input_array* parameter or vice versa. :predictor: For accessing special parameters of the decision algorithm, this parameter is used (default: None). It is typically a pointer to the Node, which created the vector. For multiple predictions, a list might be used, which might be replaced during the processing by an ensemble classifier. One main usage is when reading out additional metrics in the evaluation process like convergence behaviour or weights of a linear classifier. The last 3 parameters are directly to object variables with the same name. Currently, the object is by default like an array, with access to the different other parameters. For future developments, only these parameters should be used. .. todo:: Implement a method _generate_tag for BaseData type (if desired) .. todo:: Eliminate 2d-array behaviour incl. modifications in some nodes :Author: Mario Micheal Krell :Created: 2010/07/28 """
[docs] def __new__(subtype, input_array=None, label=None, prediction=None, predictor=None, tag=None, **kwargs): """ Create the object including several type mappings """ # Input array is not an already formed ndarray instance # We first cast to be our class type if input_array is None: if type(prediction) == list: input_array = [prediction] elif type(prediction) == numpy.ndarray: input_array = numpy.atleast_2d(prediction) elif prediction is None: raise TypeError( "You should at least give a prediction value " + "of 1 or -1 in the input array or the prediction component") else: if type(prediction) == numpy.float64: pass elif type(prediction) == float: prediction = numpy.float64(prediction) elif type(prediction) == int or type(prediction) == numpy.int64: prediction *= 1.0 else: import warnings warnings.warn("Type mismatch in Prediction Vector: %s!"%type(prediction)) prediction = float(prediction) input_array = [[prediction]] if not numpy.isfinite(input_array).all(): if type(prediction) == list: input_array = [0 for i in range(len(prediction))] elif prediction > 0: prediction = 10**9 input_array = [[float(prediction)]] else: prediction = -10**9 input_array = [[float(prediction)]] obj = base.BaseData.__new__(subtype, input_array) # add subclasses attributes to the created instance # obj.feature_names = ["prediction value"] obj.label = label obj.predictor = predictor # using the input array is not necessary any more if prediction is None: l = list(input_array[0]) if len(l) == 1: obj.prediction = l[0] else: obj.prediction = l else: obj.prediction = prediction if not tag is None: obj.tag = tag # Finally, we must return the newly created object: return obj
[docs] def __array_finalize__(self, obj): super(PredictionVector, self).__array_finalize__(obj) # set default values for attributes, since normally they are not needed # when taking just the values if not (obj is None) and not (type(obj) == numpy.ndarray): # reset the attributes from passed original object self.label = getattr(obj, 'label', None) self.predictor = getattr(obj, 'predictor', None) self.prediction = getattr(obj, 'prediction', None) else: self.label = None self.predictor = None self.prediction = None
# which is a good printing format? "label, value"?
[docs] def __str__(self): str_repr = "" if hasattr(self.label, "__iter__"): for label, prediction in zip(self.label, self.prediction): str_repr += "%s : %.4f \t" % (label, prediction) else: str_repr += "%s : %.4f \t" % (self.label, self.prediction) return str_repr
[docs] def __reduce__(self): """ Refer to http://www.mail-archive.com/numpy-discussion@scipy.org/msg02446.html# for infos about pickling ndarray subclasses """ object_state = list(super(PredictionVector, self).__reduce__()) subclass_state = (self.label, self.predictor, self.prediction) object_state[2].append(subclass_state) object_state[2] = tuple(object_state[2]) return tuple(object_state)
[docs] def __setstate__(self, state): nd_state, base_state, own_state = state super(PredictionVector, self).__setstate__((nd_state, base_state)) (self.label, self.predictor, self.prediction) = own_state
[docs] def __eq__(self, other): """ Same label and prediction value """ if type(other) != type(self): return False return (self.label == other.label and numpy.allclose(self.prediction, other.prediction))