Source code for pySPACE.missions.nodes.source.random_time_series_source

""" Generate random data for TimeSeries """

from pySPACE.missions.nodes.base_node import BaseNode
from pySPACE.missions.nodes.source.time_series_source import TimeSeriesSourceNode
from pySPACE.tools.memoize_generator import MemoizeGenerator
from pySPACE.resources.data_types.time_series import TimeSeries

import random
import numpy


[docs]class RandomTimeSeriesSourceNode(TimeSeriesSourceNode):
    """ Generate random data and act as a source for windowed TimeSeries
    
    This node acts as a source for windowed TimeSeries. The TimeSeries
    are generated randomly according to the given parameters and
    forwarded.
    
    The time series are generated according to the given generating function,
    and the class label by a uniform distribution according with a given threshold
    Only two classes are supported by now.
    
    **Parameters**

        :num_instances:
            The number of instances to be generated.

            (*optional, default: 20*)

        :generating_function_class_0:
            A function to generate data for class 0.
            Receives an index, which states the 
            number of already generated samples. 
            
            (*optional, default: lambda i: numpy.ones((2,2))*i*)
            
        :generating_function_class_1:
            A function to generate data for class 1.
            Receives an index, which states the 
            number of already generated samples. 
            
            (*optional, default: lambda i: numpy.ones((2,2))*i*)
            
        :channel_names: Channel names of the time series objects.
            
        :class_labels: The class labels of the generated time series.
        
        :choice_threshold:
            The threshold class assignment. The classes are
            generated randomly by generating a random number r
            between 0 and 1. If r < threshold, the class label is
            class_labels[0], and class_labels[1] otherwise.
            
        :sampling_frequency:
            Sampling frequency of the generated time series.
            
        :random:
            If true, the order of the data is randomly shuffled. 

            (*optional, default: True*)
    
    **Exemplary Call**
    
    .. code-block:: yaml
    
        - 
            node : RandomTimeSeriesSource
    
    :Author: Hendrik Woehrle (hendrik.woehrle@dfki.de)
    :Created: 2010/09/22
    """
    
[docs]    def __init__(self, num_instances = 20, 
                        generating_function_class_0 = lambda i: numpy.ones((2,2))*i,
                        generating_function_class_1 = lambda i: numpy.ones((2,2))*i,
                        channel_names = ["X", "Y"],
                        class_labels = ['A','B'],
                        class_choice_function = random.random,
                        choice_threshold = 0.33,
                        sampling_frequency = 2,
                        **kwargs):
        super(RandomTimeSeriesSourceNode, self).__init__(**kwargs)
        # We have to create a dummy collection
        class DummyObject(object): pass
        collection = DummyObject()
        collection.meta_data = {'runs' : 1}
        collection.data = {}
        
        # only binary classification supported by now
        assert( len(class_labels) == 2)
        
        self.set_permanent_attributes(collection = collection, 
                                        num_instances = num_instances,
                                        generating_function_class_0 = generating_function_class_0,
                                        generating_function_class_1 = generating_function_class_1,
                                        channel_names = channel_names,
                                        class_labels = class_labels,
                                        class_choice_function = class_choice_function,
                                        choice_threshold = choice_threshold,
                                        sampling_frequency = sampling_frequency)
        
[docs]    def generate_random_data(self):
        """ Method that is invoked by train and test data generation functions"""
        # invokes the given generating functions
        generated_data = []
        
        for i in range(self.num_instances):
            choice = self.class_choice_function()
            label = None
            
            if choice < self.choice_threshold:
                input_array = self.generating_function_class_0(i)
                label = self.class_labels[0]
            else:
                input_array = self.generating_function_class_1(i)
                label = self.class_labels[1]
            
            generated_data.append( (TimeSeries(input_array = input_array,
                                        channel_names = self.channel_names, 
                                        sampling_frequency = self.sampling_frequency ),
                                        label))
        return generated_data
    
[docs]    def request_data_for_testing(self):
        """
        Returns the data that can be used for testing of subsequent nodes

        .. todo:: to document
        """
        # If we haven't read the data for testing yet
        if self.data_for_testing == None:
            
            generated_data = self.generate_random_data()
                                    
            # Create a generator that emits the windows
            test_data_generator = ((sample, label) \
                                     for (sample, label) in generated_data)
            
            self.data_for_testing = MemoizeGenerator(test_data_generator,
                                                     caching = True)
            
        
        # Return a fresh copy of the generator
        return self.data_for_testing.fresh()   
        
        
[docs]    def request_data_for_training(self, use_test_data):
        """
        Returns the data that can be used for testing of subsequent nodes

        .. todo:: to document
        """
        if use_test_data:
            return self.request_data_for_testing()
            
        # If we haven't read the data for testing yet
        if self.data_for_training == None:
            
            generated_data = self.generate_random_data()
                                    
            # Create a generator that emits the windows
            train_data_generator = ((sample, label) \
                                     for (sample, label) in generated_data)
            
            self.data_for_training = MemoizeGenerator(train_data_generator,
                                                     caching = True)
            
        
        # Return a fresh copy of the generator
        return self.data_for_training.fresh()       
        
        
[docs]    def get_metadata(self, key):
        """ This source node does not contain collection meta data. """
        return None