Source code for pySPACE.missions.nodes.classification.discriminant_analysis_classifier

""" Discriminant analysis type classifiers """
import numpy
from copy import deepcopy
import warnings

from pySPACE.missions.nodes.base_node import BaseNode
from pySPACE.resources.data_types.prediction_vector import PredictionVector

[docs]class DiscriminantAnalysisClassifierBase(BaseNode): """ Template for discriminant analysis type classifier nodes This class has the is_trainable method and so on. Also a generic training method, which simply collects all training data, exists here. A classifier that inherits from here should implement stop_training and execute. **Parameters** :class_labels: Determines the order of the two classes. This is important, when you want that the prediction value is negative for the first class and positive for the other one. Otherwise this variable is set by adding the labels, when they first occur. (*optional, default: []*) :prior_probability: The prior probability for any given sample to belong to either class. Pass a list with two entries in the same order as in class_labels. The values in prior_probability don't have to be actual probabilities, i.e., they don't have to add up to 1: [1,3] is equivalent to [.25,.75]. Note that this parameter is in some sense inverse to the SVM weights: The underrepresented class will typically get assigned a higher SVM weight but the smaller prior probability. (*optional, default: [1.,1.]*) :Author: David Feess (David.Feess@dfki.de) :Created: 2012/05/30 """
[docs] def __init__(self, prior_probability = [1.,1.], class_labels = [], **kwargs): super(DiscriminantAnalysisClassifierBase, self).__init__(**kwargs) self.set_permanent_attributes(classes=class_labels, prior_probability = prior_probability, x=None, # training data y=None) # training labels
[docs] def is_trainable(self): """ Returns whether this node is trainable. """ return True
[docs] def is_supervised(self): """ Returns whether this node requires supervised training """ return True
[docs] def _train(self, data, label): """ Train node on given example *data* for class *label*. In this method, all data items and labels are buffered in a matrix for batch training. """ # construct list of all labels if label not in self.classes: warnings.warn("Please give the expected classes to the classifier!" +" %s unknown. "%label +"Therefore define the variable " +"'class_labels' in your spec file, where you use your " +"classifier. For further info look at the node documentation.") self.classes.append(label) # map label to [-1,1], assuming that "target label" # is at position 0 and standards at position 1; skip all other labels label_index = self.classes.index(label) if label_index == 0: label_index = 1 elif label_index == 1: label_index = -1 else: return if self.x is None: # initialize data variables self.x = deepcopy(data) self.y = numpy.array(label_index) else: # stack data self.x = numpy.vstack((self.x, data)) self.y = numpy.vstack((self.y, label_index))
[docs]class LinearDiscriminantAnalysisClassifierNode(DiscriminantAnalysisClassifierBase): """ Classify by linear discriminant analysis A detailed description can be found in: [1] Bishop, 2006 C.M. Bishop, "Pattern recognition and machine learning", Springer (2006), 4.1.3-4.1.5 Implementation strategies originate from [2] Schloegl et al., Adaptive Methods in BCI Research - An Introductory Tutorial. Brain-Computer Interfaces (2010) pp. 331 **Parameters** See description of :class:`~DiscriminantAnalysisClassifierBase` **Exemplary Call** .. code-block:: yaml - node : LDA parameters : class_labels : ["Target","Standard"] prior_probability : [1,6] :Author: David Feess (David.Feess@dfki.de) :Created: 2012/05/29 """
[docs] def __init__(self, class_labels = [],**kwargs): super(LinearDiscriminantAnalysisClassifierNode, self).__init__\ (class_labels=class_labels, **kwargs) self.set_permanent_attributes(iECM=None, # inv extended cov matrix mu_m1=None, # class specific means mu_p1=None) #,
# bw=None) # classification vector
[docs] def _stop_training(self, debug=False): """ Perform the actual model building """ # this calculations strongly follow [2] self.x = self.x.T # samples x channels self.y = self.y.T # stack a row of ones to the data; (samples + 1) x channels self.x = numpy.vstack((numpy.ones_like(self.x[0,:]),self.x)) # claculate extended cov matrix and pseudo inverse # ECM has entries [a,b;c,D] with a = NrSamples, b.T=c=data mean, D=cov ECM = numpy.dot(self.x, self.x.T) # eq. 16 in [2] self.iECM = numpy.linalg.inv(ECM/ECM[0,0]) # calculate class-specific means self.mu_m1 = numpy.mean(self.x[1:,self.y[0,:]==-1], axis=1) self.mu_p1 = numpy.mean(self.x[1:,self.y[0,:]==1], axis=1)
### analytically equivalent but more elegant: use of w and b. ### ## calculate w and b (eqs. 40f in [2]) # w = numpy.dot((mu_p1 - mu_m1), self.iECM[1:,1:]) # delta_mu*inv(cov) # b = -numpy.dot(ECM[1:,0],w.T) # -mu_x*w.T ## stack b and w to a joint model parameter # self.bw = numpy.hstack((b,w)) ## and then in execute: prediciton is [b,w]*[1,x].T (eq. 39 in [2]) # m = float(numpy.dot(self.bw,data))
[docs] def _execute(self, data): """ Executes the classifier on the given data vector """ predicted_class = None # add feature that is constantly one (bias term) data = numpy.vstack((numpy.array([1]),data.T)) # offset due to prior probabilities prior_shift = numpy.log(float(self.prior_probability[0])/ \ float(self.prior_probability[1])) # prediciton is [0,delta mu]*iECM*[1,x] (eq. 45 in [2]) # (this is eqivalent to [b,w]*[1,x].T (eq. 39)) m = numpy.dot(numpy.dot( numpy.hstack((numpy.array([0]),self.mu_p1 - self.mu_m1)).T, self.iECM), data)[0] + prior_shift if m > 0: predicted_class = self.classes[0] else: predicted_class = self.classes[1] return PredictionVector(label = predicted_class, prediction = m, predictor = self)
[docs]class QuadraticDiscriminantAnalysisClassifierNode(DiscriminantAnalysisClassifierBase): """ Classify by quadratic discriminant analysis Performs a QDA classification (basically evaluates the log of a likelihood ratio test). Implementation originates from [1] Schloegl et al., Adaptive Methods in BCI Research - An Introductory Tutorial. Brain-Computer Interfaces (2010) pp. 331 **Parameters** See description of DiscriminantAnalysisClassifierBase **Exemplary Call** .. code-block:: yaml - node : QDA parameters : class_labels : ["Target","Standard"] prior_probability : [1,6] :Author: David Feess (David.Feess@dfki.de) :Created: 2012/05/29 """
[docs] def __init__(self, class_labels = [],**kwargs): super(QuadraticDiscriminantAnalysisClassifierNode, self).__init__\ (class_labels=class_labels, **kwargs) self.set_permanent_attributes(ECM_p1=None, # class +1 ext cov matrix ECM_m1=None, # class -1 ECM iECM_p1=None, # class +1 inv ext cov matrix iECM_m1=None, # class -1 iECM logdetCM_p1=None, # logdet of cov logdetCM_m1=None)
[docs] def _stop_training(self, debug=False): """ Perform the actual model building """ # this calculations strongly follow [1] # stack a row of ones to the data; (samples + 1) x channels self.x = numpy.vstack((numpy.ones_like(self.x[:,0]),self.x.T)) self.y = self.y.T # data for each class individually: x_p1 = self.x[:,self.y[0,:]==1] x_m1 = self.x[:,self.y[0,:]==-1] # calculate extended cov matrix and pseudo inverse for each class # ECM has entries [a,b;c,D] with a = NrSamples, b.T=c=data mean, D=cov # the logdet terms are needed for the classification function self.ECM_p1 = numpy.dot(x_p1, x_p1.T) # eq. 16 in [1] # the paper does not really use the inverse but the scaled inverse self.iECM_p1 = numpy.linalg.inv(self.ECM_p1/self.ECM_p1[0,0]) self.logdet_p1 = self.logdet_from_ECM(self.ECM_p1) self.ECM_m1 = numpy.dot(x_m1, x_m1.T) # eq. 16 in [1] # the paper does not really use the inverse but the scaled inverse self.iECM_m1 = numpy.linalg.inv(self.ECM_m1/self.ECM_m1[0,0]) self.logdet_m1 = self.logdet_from_ECM(self.ECM_m1)
[docs] def _execute(self, data): """ Executes the classifier on the given data vector """ predicted_class = None # add feature that is constantly one (bias term) data = numpy.vstack((numpy.array([1]),data.T)).T # The QDA evaluation currently uses the wikipedia formula, because # I didn't find a textbook that has it -.- # Basically, we perform a likelihood ratio test. the likelihood for # class j is # (2*pi*det(Sgima_j))^(-1/2) * exp(-1/2 xF_jx.T) where # F_j = (x-mu_j) * iSigma_j * (x-mu_j).T # = [1,x] * {iECM_j - [1,0; 0,0]} * [1,x].T) # we use the log of the likelihood ratio, which boils down to: # {xFx+log(det(Sigma))}_i - {xFx+log(det(Sigma))}_j c=numpy.zeros_like(self.iECM_p1); c[0,0]=1 # c:=[1,0; 0,0] # xFx terms: xFx_p1 = float(numpy.dot(data,numpy.dot(self.iECM_p1-c, data.T))) xFx_m1 = float(numpy.dot(data,numpy.dot(self.iECM_m1-c, data.T))) # offset due to prior probabilities prior_shift = 2 * numpy.log(float(self.prior_probability[0])/ \ float(self.prior_probability[1])) D = (xFx_p1 + self.logdet_p1) - (xFx_m1 + self.logdet_m1) + prior_shift if D < 0: predicted_class = self.classes[0] else: predicted_class = self.classes[1] return PredictionVector(label = predicted_class, prediction = D, predictor = self)
[docs] def logdet_from_ECM(self, ECM): """ Compute logdet of cov matrix from extended cov matrix (ECM) """ # This has to be done for both classes in trainng # first extract cov matric from extended cov matrix: Sigma = ECM[1:,1:]/ECM[0,0] - \ numpy.dot(ECM[:,1]/ECM[0,0], ECM[1,:]/ECM[0,0]) return numpy.linalg.slogdet(Sigma)[1]
_NODE_MAPPING = {"QDA": QuadraticDiscriminantAnalysisClassifierNode, "LDA": LinearDiscriminantAnalysisClassifierNode}