Source code for pySPACE.missions.nodes.sink.nil_sink
""" Store only meta data of dataset
This is useful to save
disk space in scenarios, where only the output of any preceding node
is desired, and where there is no need to save the whole data set again.
"""
from pySPACE.missions.nodes.sink.time_series_sink import TimeSeriesSinkNode
from pySPACE.resources.dataset_defs.dummy import DummyDataset
[docs]class NilSinkNode(TimeSeriesSinkNode):
""" Store only meta information and perform training and testing
The node inherits from TimeSeriesSinkNode, but instead of
collecting the data, this node passes a DummyDataset.
**Parameters**
**Exemplary Call**
.. code-block:: yaml
-
node: Nil_Sink
:Input: any
:Output: DummyDataset
:Author: David Feess (david.feess@dfki.de)
:Created: 2010/03/30
"""
input_types = ["TimeSeries", "FeatureVector", "PredictionVector"]
[docs] def __init__(self,**kwargs):
self.dummy_collection=DummyDataset()
super(NilSinkNode, self).__init__(**kwargs)
[docs] def process_current_split(self):
"""Request the data from the input node and count splits. """
for _,_ in self.input_node.request_data_for_training(False):
pass
for _,_ in self.input_node.request_data_for_testing():
pass
# Count Splits for meta data. Usually this is done by
# BaseDataset.add_sample. But here, obviously, no samples are added.
if self.current_split + 1 > self.dummy_collection.meta_data["splits"]:
self.dummy_collection.meta_data["splits"] = self.current_split + 1
[docs] def get_result_dataset(self):
""" Return the empty dummy collection """
return self.dummy_collection
[docs]class OnlyTrainSinkNode(NilSinkNode):
""" Store only meta information and perform training but not testing
The node performs only training on the node chain,
so that the test procedure can be performed manually,
e.g. for debug and testing reasons.
The node is very similar to the NilSinkNode.
.. todo:: Merge the nil-nodes
.. todo:: Change name to more meaningful.
**Parameters**
**Exemplary Call**
.. code-block:: yaml
-
node: Only_Train_Sink
:Author: Hendrik Woehrle (hendrik.woehrle@dfki.de)
:Created: 2011/07/14
"""
[docs] def __init__(self, **kwargs):
super(OnlyTrainSinkNode, self).__init__(**kwargs)
self.set_permanent_attributes(dummy_collection = \
DummyDataset())
[docs] def process_current_split(self):
""" Request the data from the input node and count splits """
for _,_ in self.input_node.request_data_for_training(False): #feature_vector, label
pass
# Count Splits for meta data. Usually this is done by
# BaseDataset.add_sample. But here, obviously, no samples are added.
if self.current_split + 1 > self.dummy_collection.meta_data["splits"]:
self.dummy_collection.meta_data["splits"] = self.current_split + 1
[docs] def request_data_for_testing(self):
""" Request data for testing, just call the predecessors method
This is possible, since this node does not process any data and slightly
shortens processing time.
"""
return self.input_node.request_data_for_testing()
_NODE_MAPPING = {"Nil_Sink": NilSinkNode,
"Only_Train_Sink": OnlyTrainSinkNode}