Source code for pySPACE.missions.nodes.visualization.feature_vector_vis

""" Visualize :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector` elements"""
import itertools
import os
import warnings
import pylab
import numpy
from collections import defaultdict
from pySPACE.resources.data_types.prediction_vector import PredictionVector
from pySPACE.tools.filesystem import create_directory

try:
    import mdp.nodes
except:
    pass

from pySPACE.missions.nodes.base_node import BaseNode


[docs]class LLEVisNode(BaseNode): """ Show a 2d scatter plot of all :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector` based on Locally Linear Embedding (LLE) from MDP This node collects all training examples it obtains along with their label. It computes than an embedding of all these examples in a 2d space using the "Locally Linear Embedding" algorithm and plots a scatter plot of the examples in this space. **Parameters** :neighbors: The number of neighbor vectors that should be considered for each instance during locally linear embedding (*optional, default: 15*) **Exemplary Call** .. code-block:: yaml - node : Time_Series_Source - node : All_Train_Splitter - node : Time_Domain_Features - node : LLE_Vis parameters : neighbors : 10 - node : Nil_Sink Known Issues: This node will use pylab.show() to show the figure. There is no store method implemented yet. On Macs, pylab.show() might sometimes fail due to a wrong plotting backend. A possible workaround in that case is to manually set the plotting backend to 'MacOSX'. This has to be done before pylab is imported, so one can temporarily add "import matplotlib; matplotlib.use('MacOSX')" to the very beginning of launch.py. :Author: Jan Hendrik Metzen (jhm@informatik.uni-bremen.de) :Created: 2009/07/07 """ input_types = ["FeatureVector"]
[docs] def __init__(self, neighbors = 15, **kwargs): super(LLEVisNode, self).__init__(**kwargs) self.set_permanent_attributes( neighbors = neighbors, # A set of colors that can be used to distinguish different classes colors = set(["r", "b"]), # A mapping from class label to its color in the plot class_colors = dict(), # Remembers the classes (colors) of the instances seen instance_colors = [], # instances = [] ) pylab.ion() figure = pylab.figure(figsize=(21, 11)) figure.subplots_adjust(left=0.01, bottom=0.01, right=0.99, top= 0.99, wspace=0.2, hspace=0.2) pylab.draw()
[docs] def is_trainable(self): """ Returns whether this node is trainable. """ # Though this node is not really trainable, it returns true in order # to get trained. The reason is that during this training phase, # it visualizes all samChannel_Visples that are passed as arguments return True
[docs] def is_supervised(self): """ Returns whether this node requires supervised training """ return True
[docs] def _get_train_set(self, use_test_data): """ Returns the data that can be used for training """ # We take data that is provided by the input node for training # NOTE: This might involve training of the preceding nodes train_set = self.input_node.request_data_for_training(use_test_data) # Add the data provided by the input node for testing to the # training set # NOTE: This node is not really learning but just collecting all # examples. Because of that it must take # all data for training (even when use_test_data is False) train_set = itertools.chain(train_set, self.input_node.request_data_for_testing()) return train_set
[docs] def _train(self, data, label): """ This node is not really trained but uses the labeled examples to generate a scatter plot. """ # Determine color of this class if not yet done if label not in self.class_colors.keys(): self.class_colors[label] = self.colors.pop() # Stor the given example along with its class (encoded in the color) self.instances.append(data) self.instance_colors.append(self.class_colors[label])
[docs] def _stop_training(self, debug=False): """ Stops the training, i.e. create the 2d representation Uses the Locally Linear Embedding algorithm to create a 2d representation of the data and creates a 2d scatter plot. """ instances = numpy.vstack(self.instances) # Compute LLE and project the data lle_projected_data = mdp.nodes.LLENode(k=self.neighbors, output_dim=2)(instances) # Create scatter plot of the projected data pylab.scatter(lle_projected_data[:,0], lle_projected_data[:,1], c = self.instance_colors) pylab.show()
[docs] def _execute(self, data): # We simply pass the given data on to the next node return data
[docs]class MnistVizNode(BaseNode): """ Node for plotting MNIST Data **Parameters** :mode: One of *FeatureVector*, *PredictionVector*, and *nonlinear*. If *FeatureVector* is taken, the data is assumed to be in the 28x28 format and can be visualized like the original data. If *PredictionVector* is chosen, the affine backtransformation approach is used. If possible, the visualization is enhanced by the average data found in the data history at the *history_index*. If *nonlinear* os used, a nonlinear processing chain is assumed for calculating the backtransformation with derivatives with the sample at the If not specified, the input data type is used. (*recommended, default: input type*) :history_index: Index for determining the averaging data or the data for calculating the derivative from prediction vectors. To save the respective data, the *keep_in_history* parameter has to be used, in the node, which produces the needed data. This can be a Noop node at the beginning. By default the last stored sample is used. (*recommended, default: last sample*) :max_samples: In case of the *nonlinear* mode, a backtransformation graphic must be generated for every data sample. To reduce memory usage, only the first *max_*samples* training samples are used. (*optional, default: 10*) **Exemplary Call** .. code-block:: yaml - node : MnistViz """
[docs] def __init__(self, mode=None, history_index=0, max_samples=10, **kwargs): super(MnistVizNode, self).__init__(**kwargs) self.set_permanent_attributes( averages=None, counter=None, mode=mode, history_index=history_index, inputs=None, max_samples=max_samples, )
[docs] def _train(self, data, label): """ Average data with labels (no real training)""" if self.mode is None: self.mode = type(data).__name__ if (self.mode == "PredictionVector") and data.has_history(): new_data = data.history[self.history_index - 1] del(data) data = new_data if self.mode == "nonlinear": if self.inputs is None: self.inputs = [] self.inputs.append(data) if self.mode in "FeatureVector" or ( (self.mode == "PredictionVector") and not type(data) == PredictionVector): if self.averages is None or self.counter is None: self.averages = defaultdict(lambda : numpy.zeros((28, 28))) self.counter = defaultdict(float) # Average the given example along with its class data.view(numpy.ndarray) number_array = data.reshape((28,28)) self.averages[label] += number_array self.counter[label] += 1 if self.inputs is None: self.inputs = [] if not len(self.inputs) == self.max_samples: self.inputs.append(number_array)
[docs] def store_state(self, result_dir, index=None): """ Main method which generates and stores the graphics """ if self.store: #set the specific directory for this particular node node_dir = os.path.join(result_dir, self.__class__.__name__) #do we have an index-number? if not index is None: #add the index-number... node_dir += "_%d" % int(index) create_directory(node_dir) colors = ["white", "black", "blue", "red"] if self.mode == "FeatureVector": for label in self.averages: self.averages[label] *= 1.0/self.counter[label] #http://wiki.scipy.org/Cookbook/Matplotlib/Show_colormaps pylab.figure(figsize=(4, 4), dpi=300) pylab.contourf(self.averages[label], 50, cmap="jet", origin="image") pylab.xticks(()) pylab.yticks(()) #pylab.colorbar() f_name = str(node_dir)+str(os.sep)+str(label)+"average" pylab.savefig(f_name + ".png", bbox_inches='tight') for index, input in enumerate(self.inputs): pylab.figure(figsize=(4, 4), dpi=300) pylab.contourf(input, 50, cmap="binary", origin="image") pylab.xticks(()) pylab.yticks(()) #pylab.colorbar() f_name = str(node_dir)+str(os.sep)+"sample"+str(index) pylab.savefig(f_name + ".png", bbox_inches='tight') elif self.mode == "PredictionVector": trafos = self.get_previous_transformations()[-1] trafo = trafos[0] trafo.view(numpy.ndarray) covariance = trafos[1][1] trafo_covariance = numpy.dot(covariance, trafo.flatten()) # covariance free picture number_array = trafo.reshape((28, 28)) fig = pylab.figure(figsize=(4, 4), dpi=300) pylab.contourf(number_array, 50, cmap="jet", origin="image", vmax=abs(number_array).max(), vmin=-abs(number_array).max()) pylab.xticks(()) pylab.yticks(()) #pylab.colorbar() if not self.averages is None: for label in self.averages: self.averages[label] *= 1.0/self.counter[label] pylab.contour( self.averages[label], levels=[50], colors=colors[self.averages.keys().index(label)], linewidths=3, origin="image") f_name = str(node_dir)+str(os.sep)+"classifier" pylab.savefig(f_name + ".png", bbox_inches='tight') pylab.close(fig) # covariance picture (similar code as before) number_array = trafo_covariance.reshape((28, 28)) fig = pylab.figure(figsize=(4, 4), dpi=300) pylab.contourf(number_array, 50, cmap="jet", origin="image", vmax=abs(number_array).max(), vmin=-abs(number_array).max()) pylab.xticks(()) pylab.yticks(()) #pylab.colorbar() if not self.averages is None: for label in self.averages: pylab.contour( self.averages[label], levels=[50], linewidths=3, colors=colors[self.averages.keys().index(label)], origin="image") f_name = str(node_dir)+str(os.sep)+"classifier_cov" pylab.savefig(f_name + ".png", bbox_inches='tight') pylab.close(fig) elif self.mode == "nonlinear": from matplotlib.backends.backend_pdf import PdfPages import datetime with PdfPages(str(node_dir)+str(os.sep)+'sample_vis.pdf') as pdf: index = 0 for sample in self.inputs: index += 1 base_vector = sample.history[self.history_index-1] trafos = self.get_previous_transformations(base_vector)[-1] trafo = trafos[0] trafo.view(numpy.ndarray) covariance = trafos[1][1] trafo_covariance = \ numpy.dot(covariance, trafo.flatten()) covariance_array = trafo_covariance.reshape((28, 28)) base_array = base_vector.reshape((28, 28)) trafo_array = trafo.reshape((28, 28)) #fig = pylab.figure(figsize=(5, 5), dpi=300) #pylab.suptitle(sample.label) # SUBPLOT 1: plot of the derivative #pylab.subplot(2, 2, 1) #pylab.title("Backtransformation") fig = pylab.figure(figsize=(4, 4), dpi=300) pylab.contourf(trafo_array, 50, cmap="jet", origin="image", vmax=abs(trafo_array).max(), vmin=-abs(trafo_array).max()) pylab.xticks(()) pylab.yticks(()) # pylab.colorbar() pylab.contour( base_array, levels=[50], colors=colors[1], origin="image") # store and clean f_name = str(node_dir) + str(os.sep) + "classifier_" \ + str(index) pylab.savefig(f_name + ".png", bbox_inches='tight') pylab.close(fig) fig = pylab.figure(figsize=(4, 4), dpi=300) # SUBPLOT 2: plot of the derivative multiplied with covariance # pylab.subplot(2,2,2) # pylab.title("Backtransformation times Covariance") pylab.contourf(covariance_array, 50, cmap="jet", origin="image", vmax=abs(covariance_array).max(), vmin=-abs(covariance_array).max()) pylab.xticks(()) pylab.yticks(()) # pylab.colorbar() pylab.contour( base_array, levels=[50], colors=colors[1], origin="image") # # SUBPLOT 2: plot of the original feature vector # pylab.subplot(2,2,2) # pylab.title("Original data") # # pylab.contourf(base_array, 50, cmap="binary", origin="image") # pylab.xticks(()) # pylab.yticks(()) # pylab.colorbar() # # SUBPLOT 3: plot of the difference between vectors # pylab.subplot(2,2,3) # pylab.title("Addition") # # pylab.contourf(trafo_array+base_array, 50, cmap="spectral", origin="image") # pylab.xticks(()) # pylab.yticks(()) # pylab.colorbar() # # # SUBPLOT 4: plot of the difference between vectors # pylab.subplot(2,2,4) # pylab.title("Subtraction") # # pylab.contourf(base_array-trafo_array, 50, cmap="spectral", origin="image") # pylab.xticks(()) # pylab.yticks(()) # pylab.colorbar() # pdf.savefig(fig, bbox_inches='tight') # store and clean f_name = str(node_dir) + str(os.sep) + \ "classifier_cov_" + str(index) pylab.savefig(f_name + ".png", bbox_inches='tight') pylab.close(fig) if index == self.max_samples: break # d = pdf.infodict() # d['Title'] = 'Sample visualization' # # d['Author'] = '' # # d['Subject'] = '' # # d['Keywords'] = '' # d['CreationDate'] = datetime.datetime.today() # d['ModDate'] = datetime.datetime.today() pylab.close('all')
[docs] def is_trainable(self): """ Labels are required for visualization """ return True
[docs] def is_supervised(self): """ Labels are required for visualization """ return True
_NODE_MAPPING = {"LLE_Vis": LLEVisNode}