Source code for pySPACE.missions.nodes.classification.discriminant_analysis_classifier

""" Discriminant analysis type classifiers """
import numpy
from copy import deepcopy
import warnings

from pySPACE.missions.nodes.base_node import BaseNode
from pySPACE.resources.data_types.prediction_vector import PredictionVector

[docs]class DiscriminantAnalysisClassifierBase(BaseNode):
    """ Template for discriminant analysis type classifier nodes
    
    This class has the is_trainable method and so on. Also a generic training
    method, which simply collects all training data, exists here.
    A classifier that inherits from here should implement stop_training and
    execute.   
    
    **Parameters**
    
        :class_labels:
            Determines the order of the two classes.
            This is important, when you want that the prediction
            value is negative for the first class and
            positive for the other one.
            Otherwise this variable is set by adding the labels,
            when they first occur.
        
            (*optional, default: []*)

        :prior_probability:
            The prior probability for any given sample to belong to either class.
            Pass a list with two entries in the same order as in class_labels. The
            values in prior_probability don't have to be actual probabilities, i.e.,
            they don't have to add up to 1: [1,3] is equivalent to [.25,.75].
            Note that this parameter is in some sense inverse to the SVM weights: 
            The underrepresented class will typically get assigned a higher SVM
            weight but the smaller prior probability.
            
            (*optional, default: [1.,1.]*)
        
    :Author: David Feess (David.Feess@dfki.de)
    :Created: 2012/05/30
    """
[docs]    def __init__(self, prior_probability = [1.,1.],
                 class_labels = [], **kwargs):
        super(DiscriminantAnalysisClassifierBase, self).__init__(**kwargs)
        
        self.set_permanent_attributes(classes=class_labels,
                                      prior_probability = prior_probability,
                                      x=None, # training data
                                      y=None) # training labels
        
[docs]    def is_trainable(self):
        """ Returns whether this node is trainable. """
        return True
    
[docs]    def is_supervised(self):
        """ Returns whether this node requires supervised training """
        return True

[docs]    def _train(self, data, label):
        """ Train node on given example *data* for class *label*.
        
            In this method, all data items and labels are buffered 
            in a matrix for batch training. 
        """
        # construct list of all labels
        if label not in self.classes:
            warnings.warn("Please give the expected classes to the classifier!"
                +" %s unknown. "%label +"Therefore define the variable "
                +"'class_labels' in your spec file, where you use your "
                +"classifier. For further info look at the node documentation.")
            self.classes.append(label)
        
        # map label to [-1,1], assuming that "target label"
        # is at position 0 and standards at position 1; skip all other labels
        label_index = self.classes.index(label)
        if label_index == 0:
            label_index = 1
        elif label_index == 1:
            label_index = -1
        else:
            return
        
        if self.x is None: # initialize data variables
            self.x = deepcopy(data)
            self.y = numpy.array(label_index)
        else: # stack data
            self.x = numpy.vstack((self.x, data))
            self.y = numpy.vstack((self.y, label_index))

[docs]class LinearDiscriminantAnalysisClassifierNode(DiscriminantAnalysisClassifierBase):
    """ Classify by linear discriminant analysis

    A detailed description can be found in:
    [1] Bishop, 2006 C.M. Bishop, "Pattern recognition and machine learning", 
    Springer (2006), 4.1.3-4.1.5
    
    Implementation strategies originate from
    [2] Schloegl et al., Adaptive Methods in BCI Research - An Introductory
    Tutorial. Brain-Computer Interfaces (2010) pp. 331
    
    
    **Parameters**
    
        See description of :class:`~DiscriminantAnalysisClassifierBase`
    
    **Exemplary Call**
    
    .. code-block:: yaml
    
        -
            node : LDA
            parameters :
                class_labels : ["Target","Standard"]
                prior_probability : [1,6]

    :Author: David Feess (David.Feess@dfki.de)
    :Created: 2012/05/29
    """
[docs]    def __init__(self, class_labels = [],**kwargs):
        super(LinearDiscriminantAnalysisClassifierNode, self).__init__\
                                                (class_labels=class_labels,
                                                 **kwargs)

        self.set_permanent_attributes(iECM=None, # inv extended cov matrix
                                      mu_m1=None, # class specific means
                                      mu_p1=None) #,
#                                      bw=None) # classification vector 


[docs]    def _stop_training(self, debug=False):
        """ Perform the actual model building """
        # this calculations strongly follow [2]
        self.x = self.x.T # samples x channels
        self.y = self.y.T
        # stack a row of ones to the data; (samples + 1) x channels
        self.x = numpy.vstack((numpy.ones_like(self.x[0,:]),self.x))
        # claculate extended cov matrix and pseudo inverse
        # ECM has entries [a,b;c,D] with a = NrSamples, b.T=c=data mean, D=cov
        ECM = numpy.dot(self.x, self.x.T) # eq. 16 in [2]
        self.iECM = numpy.linalg.inv(ECM/ECM[0,0])
        # calculate class-specific means
        self.mu_m1 = numpy.mean(self.x[1:,self.y[0,:]==-1], axis=1)
        self.mu_p1 = numpy.mean(self.x[1:,self.y[0,:]==1], axis=1)

        ### analytically equivalent but more elegant: use of w and b. ###
        ## calculate w and b (eqs. 40f in [2])
        # w = numpy.dot((mu_p1 - mu_m1), self.iECM[1:,1:]) # delta_mu*inv(cov)
        # b = -numpy.dot(ECM[1:,0],w.T) # -mu_x*w.T
        ## stack b and w to a joint model parameter 
        # self.bw = numpy.hstack((b,w))
        ## and then in execute: prediciton is [b,w]*[1,x].T (eq. 39 in [2])
        # m = float(numpy.dot(self.bw,data))
    
[docs]    def _execute(self, data):
        """ Executes the classifier on the given data vector """
        predicted_class = None
        # add feature that is constantly one (bias term)
        data = numpy.vstack((numpy.array([1]),data.T))
        
        # offset due to prior probabilities
        prior_shift =  numpy.log(float(self.prior_probability[0])/ \
                                 float(self.prior_probability[1]))
        
        # prediciton is [0,delta mu]*iECM*[1,x] (eq. 45 in [2])
        # (this is eqivalent to [b,w]*[1,x].T (eq. 39))
        m = numpy.dot(numpy.dot( 
                    numpy.hstack((numpy.array([0]),self.mu_p1 - self.mu_m1)).T,
                    self.iECM),
                    data)[0] + prior_shift
                    
        if m > 0:
            predicted_class = self.classes[0]
        else:
            predicted_class = self.classes[1]
        
        return PredictionVector(label = predicted_class, 
                                prediction = m, 
                                predictor = self)

[docs]class QuadraticDiscriminantAnalysisClassifierNode(DiscriminantAnalysisClassifierBase):
    """ Classify by quadratic discriminant analysis
    
    Performs a QDA classification (basically evaluates the log of a
    likelihood ratio test).
    
    Implementation originates from
    [1] Schloegl et al., Adaptive Methods in BCI Research - An Introductory
    Tutorial. Brain-Computer Interfaces (2010) pp. 331
    
    **Parameters**
    
        See description of DiscriminantAnalysisClassifierBase
    
    **Exemplary Call**
    
    .. code-block:: yaml
    
        -
            node : QDA
            parameters :
                class_labels : ["Target","Standard"]
                prior_probability : [1,6]

    :Author: David Feess (David.Feess@dfki.de)
    :Created: 2012/05/29
    """
[docs]    def __init__(self, class_labels = [],**kwargs):
        super(QuadraticDiscriminantAnalysisClassifierNode, self).__init__\
                                                (class_labels=class_labels,
                                                 **kwargs)

        self.set_permanent_attributes(ECM_p1=None, # class +1 ext cov matrix
                                      ECM_m1=None, # class -1 ECM
                                      iECM_p1=None, # class +1 inv ext cov matrix
                                      iECM_m1=None, # class -1 iECM
                                      logdetCM_p1=None, # logdet of cov
                                      logdetCM_m1=None)
    
[docs]    def _stop_training(self, debug=False):
        """ Perform the actual model building """
        # this calculations strongly follow [1]
        # stack a row of ones to the data; (samples + 1) x channels
        self.x = numpy.vstack((numpy.ones_like(self.x[:,0]),self.x.T))
        self.y = self.y.T
        # data for each class individually:
        x_p1 =  self.x[:,self.y[0,:]==1]
        x_m1 =  self.x[:,self.y[0,:]==-1]
        
        # calculate extended cov matrix and pseudo inverse for each class
        # ECM has entries [a,b;c,D] with a = NrSamples, b.T=c=data mean, D=cov
        # the logdet terms are needed for the classification function
        self.ECM_p1 = numpy.dot(x_p1, x_p1.T) # eq. 16 in [1]
        # the paper does not really use the inverse but the scaled inverse
        self.iECM_p1 = numpy.linalg.inv(self.ECM_p1/self.ECM_p1[0,0])
        self.logdet_p1 = self.logdet_from_ECM(self.ECM_p1)
        
        self.ECM_m1 = numpy.dot(x_m1, x_m1.T) # eq. 16 in [1]
        # the paper does not really use the inverse but the scaled inverse
        self.iECM_m1 = numpy.linalg.inv(self.ECM_m1/self.ECM_m1[0,0])
        self.logdet_m1 = self.logdet_from_ECM(self.ECM_m1)
        
    
[docs]    def _execute(self, data):
        """ Executes the classifier on the given data vector """
        predicted_class = None
        # add feature that is constantly one (bias term)
        data = numpy.vstack((numpy.array([1]),data.T)).T
        # The QDA evaluation currently uses the wikipedia formula, because
        # I didn't find a textbook that has it -.-
        # Basically, we perform a likelihood ratio test. the likelihood for 
        # class j is
        # (2*pi*det(Sgima_j))^(-1/2) * exp(-1/2 xF_jx.T) where
        # F_j = (x-mu_j) *        iSigma_j       * (x-mu_j).T
        #     =    [1,x] * {iECM_j - [1,0; 0,0]} * [1,x].T)
        # we use the log of the likelihood ratio, which boils down to:
        # {xFx+log(det(Sigma))}_i - {xFx+log(det(Sigma))}_j
        c=numpy.zeros_like(self.iECM_p1); c[0,0]=1 # c:=[1,0; 0,0]
        # xFx terms:
        xFx_p1 =  float(numpy.dot(data,numpy.dot(self.iECM_p1-c, data.T)))
        xFx_m1 =  float(numpy.dot(data,numpy.dot(self.iECM_m1-c, data.T)))
        
        # offset due to prior probabilities
        prior_shift = 2 * numpy.log(float(self.prior_probability[0])/ \
                                    float(self.prior_probability[1]))
        
        D = (xFx_p1 + self.logdet_p1) - (xFx_m1 + self.logdet_m1) + prior_shift
        if D < 0:
            predicted_class = self.classes[0]
        else:
            predicted_class = self.classes[1]
        
        return PredictionVector(label = predicted_class, 
                                prediction = D, 
                                predictor = self)

[docs]    def logdet_from_ECM(self, ECM):
        """ Compute logdet of cov matrix from extended cov matrix (ECM) """
        # This has to be done for both classes in trainng
        # first extract cov matric from extended cov matrix:
        Sigma = ECM[1:,1:]/ECM[0,0] - \
                                numpy.dot(ECM[:,1]/ECM[0,0], ECM[1,:]/ECM[0,0])
        return numpy.linalg.slogdet(Sigma)[1]

_NODE_MAPPING = {"QDA": QuadraticDiscriminantAnalysisClassifierNode,
                "LDA": LinearDiscriminantAnalysisClassifierNode}