Source code for pySPACE.missions.nodes.classification.svm_variants.external

""" Wrapper around external SVM variant implementations like LibSVM or LIBLINEAR """

import logging
import warnings

try:
#    import matplotlib as mpl
#    mpl.rcParams['text.usetex'] = True
#    mpl.rcParams['text.latex.unicode'] = True
    import matplotlib.pyplot as plt
except ImportError:
    pass

import scipy.spatial.distance
from pySPACE.missions.nodes.decorators import NoOptimizationParameter, ChoiceParameter

# import the external libraries
try: # Liblinear
    import liblinearutil
except ImportError:
    pass
try:  # Libsvm
    import svmutil
except ImportError:
    pass

# representation of the linear classification vector
from pySPACE.resources.data_types.feature_vector import FeatureVector

# the output is a prediction vector
from pySPACE.resources.data_types.prediction_vector import PredictionVector

# array handling
import numpy

# base class
from pySPACE.missions.nodes.classification.base import RegularizedClassifierBase


@NoOptimizationParameter("regression")
[docs]class LibSVMClassifierNode(RegularizedClassifierBase):
    """Classify like a Standard SVM with the LibSVM settings.
    
    This node is a wrapper around the *current* libsvm implementation of a SVM.
    
    http://www.csie.ntu.edu.tw/~cjlin/libsvm/oldfiles/
    
    **Parameters**

    Some general parameters are only documented in the
    :class:`RegularizedClassifierBase <pySPACE.missions.nodes.classification.base.RegularizedClassifierBase>`.

        :svm_type:
            Defines the used SVM type.
            One of the following Strings: 'C-SVC', 'one-class SVM',
            'epsilon-SVR', 'nu-SVR'. The last two types are for regression,
            the first for classification.

            .. warning:: For using "one-class SVM" better use the
                :class:`~pySPACE.missions.nodes.classification.one_class.LibsvmOneClassNode`.

            
            (*optional, default: 'C-SVC'*)
            
        :complexity:
            Defines parameter for 'C-SVC', 'epsilon-SVR' and 'nu-SVR'.
            Complexity sets the weighting of punishment for misclassification
            in comparison to generalizing classification from the data.
            Equals parameter /cost/ or /C/ in libsvm-package.
            Value in the range from 0 to infinity.

            (*optional, default: 1*)
            

        :str_label_function:
            A String representing a Python eval()-able function,
            that transforms the labels (list). 
            It makes only sense for numeric labels. E.g. 
            "lambda liste: [exp(-0.0001*elem**2) for elem in liste]".
            
            (*optional, default: None*)
            
        :debug:
            If *debug* is True one gets additional output 
            concerning the classification.
            
            .. note:: This makes only sense for the 'LINEAR'-*kernel_type*.
            
            (*optional, default: False*)
            
        :store:
            Parameter of super-class. If *store* is True,
            the classification vector is stored as a feature vector.
            
            .. note:: This makes only sense for the 'LINEAR'-*kernel_type*.
            
            (*optional, default: False*)
            
        :max_iterations:
            Restricts the solver inside the LibSVM to maximal
            use N iterations, where N is the product of *max_iterations*
            and the number of samples used to train the classifier.
            If omitted or set to zero the
            solver takes as much iterations it needs to
            calculate the model.
            
            .. note:: This number has to be an integer and
                    is very important if you expect the classifier
                    not to converge.
            
            .. note:: To use this feature you will need the modified libsvm
                    of the external folder in a compiled version.
                    Furthermore you should make sure,
                    that this version is imported, e.g. by adding the path
                    at the beginning of the configuration file paths.

            (*optional, default: 0*)
            
        :complexities_path:
            If a complexities_path is given, the complexity is read from a
            YAML file. This file has a dict with channel numbers as keys and
            the corresponding complexity as value. Also, a
            'features_per_channel' dict entry can be set to calculate
            channel number based on the number of features. If no 
            'features_per_channel' is given, a factor of 1 is assumed. This
            can be used to specify the number of features in the file, instead
            of the number of sensor channels. A minimal example for the file
            content could be::
            
                {32: 0.081, 62: 0.019, features_per_channel: 6}.
                
            'complexities_path' will overwrite 'complexity'.
    
            (*optional, default: 0*)

    **Exemplary Call**
    
    .. code-block:: yaml
    
        -
            node : LibSVM_Classifier
            parameters :
                svm_type : "C-SVC"
                complexity : 1
                kernel_type : "LINEAR"
                class_labels : ['Standard', 'Target']
                weight : [1,3]
                debug : True
                store : True
                max_iterations : 100
    
    :input:    FeatureVector
    :output:   PredictionVector
    :Author: Jan Hendrik Metzen (jhm@informatik.uni-bremen.de)
            & Mario Krell (Mario.krell@dfki.de)
    :Created: 2009/07/02
    :Revised: 2010/04/09
    :Last change: 2011/05/06 Mario Krell old version deleted
    
    """
[docs]    def __init__(self, svm_type='C-SVC', max_iterations=0,
                 str_label_function=None, complexities_path=None, **kwargs):

        # set default svm_type 'C-SVC' if unsupported svm_type is selected
        supported_types = ["C-SVC", "one-class SVM", "epsilon-SVR", "nu-SVR"]
        if svm_type not in supported_types:
            svm_type = 'C-SVC'
            warnings.warn("SVM-type unknown. C-SVC will be used!")

        if svm_type == 'C-SVC':
            regression = False
        else:
            regression = True
        super(LibSVMClassifierNode, self).__init__(
            regression=regression, **kwargs)
        # Check if the svm module has been correctly imported
        try:
            import svmutil
        except ImportError as e:
            self._log("svmutil.py could not be imported.")
            message = "Using the LibSVMClassifierNode requires " + \
                "the Python svm module provided by libsvm. " + \
                "For installation hints see documentation " + \
                "or http://www.csie.ntu.edu.tw/~cjlin/libsvm/." + \
                "Furthermore try to import the path to the " + \
                "external folder."
            args = e.args
            if not args:
                e.args = message
            else:
                e.args = (message,) + args
            raise

        self.set_permanent_attributes(str_label_function=str_label_function,
                                      svm_type=svm_type,
                                      max_iterations=int(max_iterations),
                                      store_all_samples=True,
                                      predictor_iterations=numpy.Inf)

[docs]    def _stop_training(self, debug=False):
        """ Finish the training, i.e. train the SVM """
        self._complete_training(debug)
        self.relabel_training_set()

[docs]    def _complete_training(self, debug=False):
        """ Iterate over the complete data to get the initial model """
        ########## read complexities file if given ##########
        if self.complexities_path is not None:
            import yaml
            complexities_file=open(self.complexities_path, 'r')
            complexities = yaml.load(complexities_file)
            # nr of channels    = nr of features (==dim) / features_per_channel
            if not 'features_per_channel' in complexities:
                complexities['features_per_channel'] = 1
            self.complexity = complexities[
                    round(self.dim/complexities['features_per_channel'])]
            self._log("Read complexity %s from file. Dimension is %s" %
                      (self.complexity, self.dim), level=logging.INFO)
            
        # not compatible with regression!
            # self._log("Instances of Class %s: %s, %s: %s" \
            #            % (self.classes[0], 
            #               self.labels.count(self.classes.index(self.classes[0])),
            #               self.classes[1], 
            #               self.labels.count(self.classes.index(self.classes[1]))))
        # instead this?:
        self._log("Performing training of SVM.")
        
        ########## Calculation of default gamma ##########
        self.calculate_gamma()

        self.num_samples = len(self.samples)

        # nr_weight is the number of elements in the array weight_label and
        # weight. Each weight[i] corresponds to weight_label[i], meaning that
        # the penalty of class weight_label[i] is scaled by a factor of 
        # weight[i]. If you do not want to change penalty for any of the 
        # classes, just set nr_weight to 0.
        
        ########## preparation of the libsvm command ##########
        # for probability output add "-b 1" to options
        options = \
            "-c %.42f -d %d -g %.42f -r %.42f -n %.42f -p %.42f -e %.20f -m %.42f" % \
            (self.complexity, self.exponent, self.gamma,
            self.offset, self.nu, self.epsilon, self.tolerance, 1000)
            # use 1000MB instead of 100MB (default)
        # options += " -b 1" un-comment this for probabilistic output!
        if self.multinomial:
            options += " -b 1"
        for i,w in enumerate(self.weight):
            options += " -w%d %.42f" % (i, w)
        if self.kernel_type == 'LINEAR':
            options += " -t 0"
        elif self.kernel_type == 'POLY':
            options += " -t 1"
        elif self.kernel_type == 'RBF':
            options += " -t 2"
        elif self.kernel_type == 'SIGMOID':
            options += " -t 3"
        else:
            self.kernel_type = 'LINEAR'
            options += " -t 0"
            warnings.warn("Kernel unknown! Precomputed Kernels are not " +
                          "yet implemented. Linear Kernel used.")
            # PRECOMPUTED: kernel values in training_set_file 
            #              (not yet implemented)

        if self.svm_type == 'C-SVC':
            options += " -s 0"
        elif self.svm_type == 'nu-SVR':
            options += " -s 1"
        elif self.svm_type == 'one-class SVM':
            options += " -s 2"
        elif self.svm_type == 'epsilon-SVR':
            options += " -s 3"
        else:
            options += " -s 0"
            self.svm_type = 'C-SVC'
            warnings.warn("SVM-type unknown. C-SVC will be used!")
        if not self.debug:
            options += " -q"
            self._log("Libsvm is now quiet!")
        
        old_libsvm_options = options
        
        if self.max_iterations != 0:
            options += " -i %d" % self.max_iterations
        try:
            param = svmutil.svm_parameter(options)
        except ValueError:
            param = svmutil.svm_parameter(old_libsvm_options)
            self._log(
                "Using max_iterations is not supported by the standard " +
                "LIBSVM. Change your Python path to our customized version!",
                level=logging.CRITICAL)

        # transform labels with *label_function*
        if self.str_label_function is not None:
            self.label_function = eval(self.str_label_function)
            self.labels = self.label_function(self.labels)
        
        # build the classifier
        # h = [map(float,list(data)) for data in self.samples]
        problem = svmutil.svm_problem(self.labels, [
            map(float, list(data)) for data in self.samples])
        model = svmutil.svm_train(problem, param)
        if not self.multinomial:
            if (self.svm_type == 'C-SVC' or self.svm_type == 'one-class SVM') \
                    and self.kernel_type == 'LINEAR':
                self.calculate_classification_vector(model)
                if self.debug:
                    # This calculation is needed for further analysis
                    self.calculate_slack_variables(model) 
                    print "LIBSVM Parameter:"
                    self.print_variables()
            else:
                # Slack variables are the same no matter which kernel is used
                # This method is mainly used to reduce the number of samples
                # being stored later on.
                if self.debug:
                    self.calculate_slack_variables(model) 
                self.model = model
        else:
            self.model = model
            # Slack variables are the same no matter which kernel is used
            # This method is mainly used to reduce the number of samples
            # being stored later on.
        
        # read number of iterations needed to solve the problem
        if self.max_iterations != 0:
            try:
                predictor_iterations = model.get_num_iterations()
                self.classifier_information["~~Solver_Iterations~~"] = \
                    predictor_iterations
                if predictor_iterations == 0 or \
                        predictor_iterations == numpy.Inf:
                    self.classifier_information["~~SVM_Converged~~"] = False
                else:
                    self.classifier_information["~~SVM_Converged~~"] = True
            except:
                warnings.warn("Could not read state of the LibSVM Solver " +
                              "from the C-Library!")

        try:
            self.classifier_information["~~offset~~"] = self.b
            self.classifier_information["~~w0~~"] = self.w[0]
            self.classifier_information["~~w1~~"] = self.w[1]
        except:
            pass
        self.delete_training_data()

[docs]    def _execute(self, x):
        """ Executes the classifier on the given data vector x.
        prediction value = <w,data>+b in the linear case."""
        data = x.view(numpy.ndarray)
        if self.svm_type == 'C-SVC':
            if self.kernel_type == 'LINEAR' and not self.multinomial:
                return super(LibSVMClassifierNode, self)._execute(x)
            else:
                # for probability output add "-b 1" as 4th parameter
                if self.multinomial:
                    try:
                        p_labs, p_acc, p_vals = svmutil.svm_predict([0], [
                            map(float, list(data[0, :]))], self.model,
                            '-q -b 1')
                    except ValueError:  # Wrong options
                        p_labs, p_acc, p_vals = svmutil.svm_predict([0], [
                            map(float, list(data[0, :]))], self.model, '-b 1')
                else:
                    try:
                        prediction_value = svmutil.svm_predict([0], [
                            map(float, list(data[0, :]))], self.model,
                            '-q')[2][0][0]
                    except ValueError:  # Wrong options
                        prediction_value = svmutil.svm_predict([0], [
                            map(float, list(data[0, :]))], self.model)[2][0][0]
                    except IndexError:
                        warnings.warn("Probably your classification failed!")
                        prediction_value = 0

                # The new version has only one output of the score.
                # The ordering can be obtained by model.labels and if it is
                # not [1,0] we have to change the sign of the score to be
                # comparable with the old libsvm AND to do the right mapping
                # back to the binary labels
                if self.model.get_labels() == [0, 1]:
                    prediction_value = -prediction_value

            # Look up class label
            # prediction_value --> {-1,1} --> {0,1} --> Labels
            if self.multinomial:
                prediction = self.classes[int(p_labs[0])]
                prediction_value = p_vals[0][int(p_labs[0])]
            else:
                if prediction_value > 0:
                    prediction = self.classes[1]
                else:
                    prediction = self.classes[0]
            
            prediction_vector = PredictionVector(label=prediction,
                                                 prediction=prediction_value,
                                                 predictor=self)
            
            return prediction_vector
        elif self.svm_type == 'one-class SVM':  # one-class! TODO: Extra Node?
            # for probability output add "-b 1" as 4th parameter
            # get prediction as mentioned above
            if not self.kernel_type == "LINEAR" and not self.multinomial:
                try:
                    prediction = svmutil.svm_predict([0], [
                        map(float, list(data[0, :]))], self.model, "-q")
                except ValueError:
                    prediction = svmutil.svm_predict([0], [
                        map(float, list(data[0, :]))], self.model)
                prediction_value = prediction[2][0][0]
                if prediction_value >= 0:
                    label = self.classes[0]
                else:
                    label = self.classes[1]
                return PredictionVector(prediction=prediction_value,
                                        predictor=self,
                                        label=label)
            else:
                result = super(LibSVMClassifierNode, self)._execute(x)
                # invert label
                result.label = self.classes[1-self.classes.index(result.label)]
                return result
        else: # regression! TODO: Extra Node?
            # for probability output add "-b 1" as 4th parameter
            try:
                prediction_value = svmutil.svm_predict([0], [
                    map(float, list(data[0, :]))], self.model, "-q")
            except ValueError:
                prediction_value = svmutil.svm_predict([0], [
                    map(float, list(data[0, :]))], self.model)
            prediction_value = prediction_value[2][0][0]
            return PredictionVector(prediction=prediction_value,
                                    predictor=self)

[docs]    def save_model(self, filename):
        svmutil.svm_save_model(filename, self.model)

[docs]    def load_model(self, filename):
        print 'load model'
        self.model = svmutil.svm_load_model(filename)
           
[docs]    def calculate_slack_variables(self, model):
        """This method calculates from the given SVM model
        the related slack variables for classification."""
        self.t = []
        self.num_sv = 0
        self.num_nsv = 0
        self.inner_margin = 0
        self.ti = []
        dropped_samples = []
        dropped_labels = []
        for i in range(self.num_samples):
            # ctype libsvm bindings
            try:
                p = svmutil.svm_predict([0], [
                    map(float, list(self.samples[i-self.num_nsv]))],
                    model, "-q")[2][0][0]
            except ValueError:
                p = svmutil.svm_predict([0], [
                    map(float, list(self.samples[i-self.num_nsv]))],
                    model)[2][0][0]
            except IndexError:
                self._log("Classification failed. " +
                          "Did you specify the parameters correctly?",
                          level=logging.ERROR)
                p = 0
            if model.get_labels() == [0,1]:
                p = -p
            p *= 2 * (self.labels[i - self.num_nsv] - 0.5)
            if p > 1:
                self.t.append(0)
                self.ti.append(0)
                dropped_samples.append(self.samples.pop(i - self.num_nsv))
                dropped_labels.append(self.labels.pop(i - self.num_nsv))
                self.num_nsv += 1
            else:
                self.t.append(1-p)
                self.num_sv += 1
                if 1-p<1e-5:
                    p = 1
                    self.ti.append(0)
                else:
                    self.ti.append(1-p)
                    self.inner_margin +=1
        # if self.store_all_samples:
        for i in range(len(dropped_samples)):
            self.samples.append(dropped_samples[i])
            self.labels.append(dropped_labels[i])
        del(dropped_samples)
        del(dropped_labels)
    
[docs]    def calculate_classification_vector(self, model):
        """ Calculate classification vector w and the offset b """
        # ctypes libsvm bindings
        # TODO get parameter maybe easier
        try:
            self.b = svmutil.svm_predict([0], [[0.0]*self.dim], model,
                                         "-q")[2][0][0]
        except ValueError:
            self.b = svmutil.svm_predict([0], [[0.0]*self.dim], model)[2][0][0]
        except IndexError:
            self._log("Classification failed. " +
                      "Did you specify the parameters correctly?",
                      level=logging.ERROR)
            self.b = 0
            self.w = numpy.zeros(self.dim)
            self.features = FeatureVector(numpy.atleast_2d(self.w).astype(
                                          numpy.float64), self.feature_names)
        if model.get_labels() == [0, 1]:
            self.b = -self.b
        self.w = numpy.zeros(self.dim)
        for i in range(self.dim):
            e = [0.0] * self.dim
            e[i] = 1.0
            try:
                self.w[i] = svmutil.svm_predict([0],[e], model, "-q")[2][0][0]
            except ValueError:
                try:
                    self.w[i] = svmutil.svm_predict([0],[e], model)[2][0][0]
                except IndexError:
                    pass
            except IndexError:
                pass
            if model.get_labels() == [0,1]:
                self.w[i] = -self.w[i]
            self.w[i] -= self.b 
        self.features = FeatureVector(numpy.atleast_2d(self.w).astype(
                                      numpy.float64), self.feature_names)
        try:
            wf = []
            for i,feature in enumerate(self.feature_names):
                if not self.w[i] == 0:
                    wf.append((self.w[i],feature))
            wf.sort()
            w = numpy.array(wf, dtype='|S200')
        except ValueError:
            self._log('w could not be converted.', level=logging.WARNING)
        except IndexError:
            self._log('There are more feature names than features. \
                      Please check your feature generation and input data.',
                      level=logging.CRITICAL)
            self.b = 0
            w = numpy.zeros(self.dim)
            self.w = w
        # only features without zero multiplier are relevant
        self.num_retained_features = len(w)
        self.classifier_information["~~Num_Retained_Features~~"] = \
            self.num_retained_features
        self.print_w = w

[docs]    def remove_no_border_points(self, retraining_required):
        """ Discard method to remove all samples from the training set that are
            not in the border of their class.

            The border is determined by a minimum distance from the center of
            the class and a maximum distance.

            :param retraining_required: flag if retraining is
                    requiered (the new point is a potential SV or a removed
                    one was a sv)
        """
        # get centers of each class
        targetSamples = [s for (s, l) in zip(self.samples, self.labels)\
                         if l == 1]  # self.classes.index("Target")]
        standardSamples = [s for (s, l) in zip(self.samples, self.labels)\
                           if l == 0]  # self.classes.index("Standard")]

        if self.training_set_ratio == "KEEP_RATIO_AS_IT_IS":
            num_target = len(targetSamples)
            num_standard = len(standardSamples)

            num_target = 1.0 * num_target / (num_target + num_standard) * \
                self.basket_size
            num_standard = self.basket_size - num_target

        # mean vector of each class (its center)
        mTarget = numpy.mean(targetSamples, axis=0)
        mStandard = numpy.mean(standardSamples, axis=0)

        # euclidean distance between the class centers
        R = scipy.spatial.distance.euclidean(mTarget, mStandard)

        if self.show_plot:
            dim = numpy.shape(self.samples)[1]
            if dim == 2:
                self.plot_class_borders(
                    mStandard, mTarget, R, self.scale_factor_small,
                    self.scale_factor_tall)

        # get distance of each point to its class center
        distances = []
        for i, (s, l) in enumerate(zip(self.samples, self.labels)):
            if l==self.classes.index("Target"):
                r_1 = scipy.spatial.distance.euclidean(s,mTarget)
                r_2 = scipy.spatial.distance.euclidean(s,mStandard)
                distances.append([i, s, l, r_1, r_2/(r_1 + r_2)])
            else:
                r_1 = scipy.spatial.distance.euclidean(s,mStandard)
                r_2 = scipy.spatial.distance.euclidean(s,mTarget)
                distances.append([i, s, l, r_1, r_2/(r_1 + r_2)])

        if self.border_handling == "USE_ONLY_BORDER_POINTS":
            # remove all points that are not in the border (in a specific
            # radius) around the center
            # does not guarantee that demanded number of samples are
            # contained in the new training set
            distances = filter(lambda x: (
                self.scale_factor_small*R < x[3] < self.scale_factor_tall*R),
                               distances)
            # sort according to weight
            distances.sort(key=lambda x: x[5])
            # pay attention to the basket size
            distances = distances[:self.basket_size]
        elif self.border_handling == "USE_DIFFERENCE":
            # take that point that differ most
            # sort by distance, then sort by weight
            distances.sort(key=lambda x: (abs(x[3] -
                                          ((self.scale_factor_tall -
                                            self.scale_factor_small)/2.0)*R) *
                                          (x[0] != len(self.samples)), x[4]))

        if self.border_handling == "USE_ONLY_BORDER_POINTS":
            # pay attention to the basket size
            distances = distances[:self.basket_size]
        elif self.training_set_ratio == "KEEP_RATIO_AS_IT_IS":
            distances_tmp = []
            for d in distances:
                if d[2] == 1 and num_target > 0:
                    num_target -= 1
                    distances_tmp.append(d)
                elif d[2] == 0 and num_standard > 0:
                    num_standard -= 1
                    distances_tmp.append(d)
            distances = distances_tmp
        elif self.training_set_ratio == "BALANCED_RATIO":
            distances_tmp = []
            num_target = 0
            num_standard = 0
            for d in distances:
                if d[2] == 1 and num_target < (self.basket_size/2):
                    num_target += 1
                    distances_tmp.append(d)
                elif d[2] == 0 and num_standard < (self.basket_size/2):
                    num_standard += 1
                    distances_tmp.append(d)
            distances = distances_tmp
        else:
            # pay attention to the basket size
            distances = distances[:self.basket_size]

        [idxs, _, _, _, _] = zip(*distances)
        retraining_required = self.remove_samples(list(
            set(numpy.arange(self.num_samples)) - set(idxs))) \
            or retraining_required

        return retraining_required

[docs]    def add_new_sample(self, data, class_label=None, default=False):
        """ Add a new sample to the training set.

            :param data:  A new sample for the training set.
            :type  data:  list of float
            :param class_label:    The label of the new sample.
            :type  class_label:    str
            :param default:  Specifies if the sample is added to the current
                             training set or to a future training set
            :param default:  bool
        """
        # use a separate knowledge base when old samples will be totally removed
        if (self.discard_type == "CDT" or self.discard_type == "INC_BATCH")\
                and default is False:
            self.future_samples.append(data)
            self.future_labels.append(class_label)

            # the sample size for the new knowledge base
            # is limited to basket size, so pop oldest
            while len(self.future_samples) > self.basket_size:
                self.future_samples.pop(0)
                self.future_labels.pop(0)
        else:
            # add new data
            self._train_sample(data, class_label)
            self.num_samples += 1

[docs]    def remove_samples(self, idxs):
        """ Remove the samples at the given indices from the training set.

            :param: idxs: Indices of the samples to remove.
            :type:  idxs: list of int
            :rtype: bool - True if a support vector was removed.
        """
        idxs.sort(reverse=True)
        for idx in idxs:
            self.samples.pop(idx)
            self.labels.pop(idx)
            if self.add_type == "UNSUPERVISED_PROB":
                self.decisions.pop(idx)
            self.num_samples -= 1
        return True

[docs]    def visualize(self):
        """ Show the training samples, SVS and the current decision function
        """
        dim = numpy.shape(self.samples)[1]
        if dim == 2:
            ax = plt.gca()
            ax.set_xlabel(r'$x_0$')
            ax.set_ylabel(r'$x_1$')

            self.plot_samples()
            self.plot_hyperplane()
        elif dim == 3:
            ax = plt.gca(projection='3d')
            ax.set_xlabel(r'$x_0$')
            ax.set_ylabel(r'$x_1$')
            ax.set_zlabel(r'$x_2$')

            self.plot_samples_3D()
            self.plot_hyperplane_3D()

        if dim == 2 or dim == 3:
            plt.draw()
            if self.save_plot is True:
                imagename = "%s/tmp%010d.png"\
                            % (self.plot_storage, self.m_counter_i)
                self.m_counter_i += 1
                plt.savefig(imagename)


@NoOptimizationParameter("use_list")
@ChoiceParameter("svm_type", choices=[0, 1, 2, 3, 4, 5, 6, 7])
[docs]class LiblinearClassifierNode(LibSVMClassifierNode):
    """ Code Integration of external linear SVM classifier program

    http://www.csie.ntu.edu.tw/~cjlin/liblinear/
    LIBLINEAR was implemented by the LIBSVM programmers.
    
    It is important to mention, that here (partially) the same modified SVM
    model is used as in the SOR variant.
    (:mod:`pySPACE.missions.nodes.classification.svm_variants.SOR`)

    **Parameters**

    Some general parameters are only documented in the
    :class:`RegularizedClassifierBase <pySPACE.missions.nodes.classification.base.RegularizedClassifierBase>`.

        :svm_type:
            :0: L2-regularized logistic regression (primal)
            :1: L2-regularized L2-loss support vector classification (dual)
            :2: L2-regularized L2-loss support vector classification (primal)
            :3: L2-regularized L1-loss support vector classification (dual)
            :4: multi-class support vector classification by Crammer and Singer
            :5: L1-regularized L2-loss support vector classification
            :6: L1-regularized logistic regression
            :7: L2-regularized logistic regression (dual)
            
            Type 3 is the standard SVM with 
            b used in the target function as component of w (offset = True)
            or b set to zero. 
            
            (*optional, default:3*)
    
        :tolerance:
            Tolerance of termination criterion, same default as in libsvm.
            
            .. todo:: Same variable name in upper class for epsilon-SVR
                      instead of tolerance.
    
            (*optional, default: 0.001*)
        
        :offset:
            If True, x is internally replaced by (x,1)
            to get an artificial offset b.
            Probably in this case b is regularized.
            Otherwise the offset b in the classifier function (w^Tx+b)
            is set to zero.
            
            (*optional, default: True*)

        :store:
            Parameter of super-class. If *store* is True,
            the classification vector is stored as a feature vector.

            (*optional, default: False*)

    **Exemplary Call**

    .. code-block:: yaml

        -
            node : lSVM
            parameters :
                class_labels : ["Target", "Standard"]

    :Author: Mario Michael Krell (mario.krell@dfki.de)
    :Created: 2012/01/19
    """

[docs]    def __init__(self,tolerance=0.001, svm_type=3, offset=True, **kwargs):
        
        if offset:
            offset = 1
        else:
            offset = -1
            
        super(LiblinearClassifierNode,self).__init__(use_list=True, **kwargs)

        # svm type is renamed such that C-SVC is still used in the super class
        # this is currently especially advantageous in the execute method
        self.set_permanent_attributes(
            tolerance=tolerance, alg_num=svm_type, offset=offset)

[docs]    def _train(self, data, class_label):
        """ Trains the classifier on the given data
        
        It is assumed that the class_label parameter
        contains information about the true class the data belongs to

        .. todo::   check in new version of liblinear, if ndarrays are accepted
                    and the method from libsvm can be used.
        """
        self._train_phase_started = True
        if self.feature_names is None:
            try:
                self.feature_names = data.feature_names
            except AttributeError as e:
                warnings.warn(
                    "Use a feature generator node before a classification node."
                    )
                raise e
            if self.dim is None:
                self.dim = data.shape[1]
            if self.samples is None:
                self.samples = []
            if self.labels is None:
                self.labels = []
        if class_label not in self.classes:
            warnings.warn(
                "Please give the expected classes to the classifier! " +
                "%s unknown. Therefore, define the variable " % class_label +
                "'class_labels' in your spec file, " +
                "where you use your classifier. " +
                "For further information refer to the node documentation.")
            self.classes.append(class_label)
            self.set_permanent_attributes(classes=self.classes)
        
        # Collect the data
        data_array=data.view(numpy.ndarray)
        self.samples.append(map(float, list(data_array[0,:])))
        self.labels.append(self.classes.index(class_label))

[docs]    def _stop_training(self, debug=False):
        """ Finish the training, i.e. train the SVM """
        self._complete_training(debug)
        self.relabel_training_set()

[docs]    def _complete_training(self, debug=False):
        """ Forward data to external training and extract classifier information
        """
        if self.str_label_function is not None:
            self.label_function = eval(self.str_label_function)
            self.labels = self.label_function()

        options = "-c %.42f  -e %.42f -s %d -B %d" % \
             (self.complexity, self.tolerance, self.alg_num, self.offset)
        for i,w in enumerate(self.weight):
            options += " -w%d %.42f" % (i, w)
        if not self.debug:
            options += " -q"
            self._log("Liblinear is now quiet!")

        import liblinearutil

        param = liblinearutil.parameter(options)
        problem = liblinearutil.problem(self.labels, self.samples)
        model = liblinearutil.train(problem, param)

        self.calculate_classification_vector(model)
        if self.debug:
            print self.print_w
            print self.b

[docs]    def calculate_classification_vector(self, model):
        """This method calculates from the given SVM model
        the related classification vector w and the offset b."""
        # ctypes liblinear bindings
        if self.offset == 1:
            self.b = model.w[self.dim]
        else:
            self.b = 0
        self.w = numpy.zeros(self.dim)
        for i in range(self.dim):
            self.w[i] = model.w[i]
        if model.get_labels() == [0,1]:
            self.w = -1*self.w
            self.b = -1*self.b
        self.features = FeatureVector(numpy.atleast_2d(self.w).astype(
                                      numpy.float64), self.feature_names)
        try:
            wf=[]
            for i,feature in enumerate(self.feature_names):
                if not self.w[i] == 0:
                    wf.append((self.w[i],feature))
            wf.sort()
            w = numpy.array(wf, dtype='|S20')
        except ValueError :
            print 'w could not be converted.'
        except IndexError :
            print 'There are more feature names than features. \
                    Please check your feature generation and input data.'
            self.b = 0
            w = numpy.zeros(self.dim)
            self.w = w
        # only features without zero multiplier are relevant
        self.num_retained_features = len(w) 
        self.classifier_information["~~Num_Retained_Features~~"] =\
            self.num_retained_features
        self.print_w = w


_NODE_MAPPING = {"LibSVM_Classifier": LibSVMClassifierNode,
                "2SVM": LibSVMClassifierNode,
                "lSVM": LiblinearClassifierNode,
                }