Source code for pySPACE.missions.nodes.feature_selection.random_feature_selection

""" Randomly select a number of features 

.. todo:: generalize or put together with other node
"""

import os
import cPickle
import random

from pySPACE.missions.nodes.base_node import BaseNode
from pySPACE.resources.data_types.feature_vector import FeatureVector

from pySPACE.tools.filesystem import  create_directory

[docs]class RandomFeatureSelectionNode(BaseNode): """ Randomly select a given number of features **Parameters** :num_retained_features: The number of features that should be retained by the node. This information must be specified if selected_features_path is not specified. (*optional, default: None*) :selected_features_path: An absolute path from which the selected features are loaded. If not specified, these features are learned from the training data. In this case, num_retained_features must be specified. (*optional, default: None*) **Exemplary Call** .. code-block:: yaml - node : RandomFeatureSelection parameters : num_retained_features : 1 :Author: Jan Hendrik Metzen (jhm@informatik.uni-bremen.de) :Created: 2009/02/03 """
[docs] def __init__(self, num_retained_features=None, selected_features_path = None, **kwargs): super(RandomFeatureSelectionNode, self).__init__(**kwargs) retained_feature_indices = None # Load patterns from file if requested if selected_features_path != None: features_file = open(selected_features_path, 'r') retained_feature_indices = cPickle.load(features_file) if num_retained_features is not None: if len(retained_feature_indices) > num_retained_features: retained_feature_indices = retained_feature_indices[0:num_retained_features] elif len(retained_feature_indices) < num_retained_features: import warnings warnings.warn("Only %s features available, cannot retain " "%s features!" % (len(retained_feature_indices), num_retained_features)) features_file.close() self.set_permanent_attributes( retained_feature_indices = retained_feature_indices, num_retained_features = num_retained_features, feature_names = None)
[docs] def _execute(self, feature_vector): """ Projects the feature vector onto the retained features """ if self.retained_feature_indices == None: if self.num_retained_features > feature_vector.shape[1]: self._log("Too large 'num_retained_features' (%s)!" % self.num_retained_features) self.set_permanent_attributes( num_retained_features=feature_vector.shape[1]) # The indices of the features that will be retained self.retained_feature_indices = random.sample(range(feature_vector.shape[1]), self.num_retained_features) self.feature_names = feature_vector.feature_names # Project the features onto the selected subspace proj_features = feature_vector[:,self.retained_feature_indices] # Update the feature_names list feature_names = [feature_vector.feature_names[index] for index in self.retained_feature_indices] # Create feature vector instance projected_feature_vector = FeatureVector(proj_features, feature_names) return projected_feature_vector
[docs] def store_state(self, result_dir, index=None): """ Stores this node in the given directory *result_dir* """ if self.store: node_dir = os.path.join(result_dir, self.__class__.__name__) create_directory(node_dir) # This node only stores the order of the selected features' indices name = "%s_sp%s.pickle" % ("selected_features", self.current_split) result_file = open(os.path.join(node_dir, name), "wb") result_file.write(cPickle.dumps(self.retained_feature_indices, protocol=2)) result_file.close() # Store feature names name = "feature_names_sp%s.txt" % self.current_split result_file = open(os.path.join(node_dir, name), "w") result_file.write("%s" % self.feature_names) result_file.close()