Source code for pySPACE.missions.nodes.scikit_nodes

# -*- coding:utf-8; -*-
""" Wrap the algorithms defined in `scikit.learn <http://scikit-learn.org/>`_ in pySPACE nodes

For details on parameter usage look at the
`scikit documentation <http://scikit-learn.org/>`_ or
the wrapped documentation of pySPACE: :ref:`scikit_nodes`.
The parameters given in the node specification are filtered, to check if they
are available, and then directly forwarded to the scikit algorithm.

This module is based heavily on the scikit.learn wrapper for the "Modular
toolkit for Data Processing"
(MDP, version 3.3, http://mdp-toolkit.sourceforge.net/).
All credit goes to the MDP authors.

MDP (version 3.3) is distributed under the following BSD license::

    This file is part of Modular toolkit for Data Processing (MDP).
    All the code in this package is distributed under the following conditions:

    Copyright (c) 2003-2012, MDP Developers <mdp-toolkit-devel@lists.sourceforge.net>

    All rights reserved.

    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:

        * Redistributions of source code must retain the above copyright
          notice, this list of conditions and the following disclaimer.
        * Redistributions in binary form must reproduce the above copyright
          notice, this list of conditions and the following disclaimer in the
          documentation and/or other materials provided with the distribution.
        * Neither the name of the Modular toolkit for Data Processing (MDP)
          nor the names of its contributors may be used to endorse or promote
          products derived from this software without specific prior written
          permission.

    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
__docformat__ = "restructuredtext en"

try:
    import sklearn
    _sklearn_prefix = 'sklearn'
except ImportError:
    try:
        import scikits.learn as sklearn
        _sklearn_prefix = 'scikits.learn'
    except ImportError:
        _sklearn_prefix = False

import inspect
import re

import numpy
import logging
import warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import sys

from pySPACE.missions.nodes.base_node import BaseNode
from pySPACE.missions.nodes import NODE_MAPPING, DEFAULT_NODE_MAPPING
from pySPACE.resources.data_types.prediction_vector import PredictionVector
from pySPACE.resources.data_types.feature_vector import FeatureVector


[docs]class ScikitException(Exception): """Base class for exceptions in nodes wrapping scikit algorithms.""" pass
# import all submodules of sklearn (to work around lazy import) def _version_too_old(version, known_good): """ version comparison """ for part,expected in zip(version.split('.'), known_good): try: p = int(part) except ValueError: return None if p < expected: return True if p > expected: break return False if not _sklearn_prefix: scikit_modules = [] elif _version_too_old(sklearn.__version__, (0, 8)): scikit_modules = ['ann', 'cluster', 'covariance', 'feature_extraction', 'feature_selection', 'features', 'gaussian_process', 'glm', 'linear_model', 'preprocessing', 'svm', 'pca', 'lda', 'hmm', 'fastica', 'grid_search', 'mixture', 'naive_bayes', 'neighbors', 'qda'] elif _version_too_old(sklearn.__version__, (0, 9)): # package structure has been changed in 0.8 scikit_modules = ['svm', 'linear_model', 'naive_bayes', 'neighbors', 'mixture', 'hmm', 'cluster', 'decomposition', 'lda', 'covariance', 'cross_val', 'grid_search', 'feature_selection.rfe', 'feature_extraction.image', 'feature_extraction.text', 'pipelines', 'pls', 'gaussian_process', 'qda'] elif _version_too_old(sklearn.__version__, (0, 11)): # from release 0.9 cross_val becomes cross_validation and hmm is deprecated scikit_modules = ['svm', 'linear_model', 'naive_bayes', 'neighbors', 'mixture', 'cluster', 'decomposition', 'lda', 'covariance', 'cross_validation', 'grid_search', 'feature_selection.rfe', 'feature_extraction.image', 'feature_extraction.text', 'pipelines', 'pls', 'gaussian_process', 'qda', 'ensemble', 'manifold', 'metrics', 'preprocessing', 'tree'] elif _version_too_old(sklearn.__version__, (0, 17)): scikit_modules = ['svm', 'linear_model', 'naive_bayes', 'neighbors', 'mixture', 'cluster', 'decomposition', 'lda', 'covariance', 'cross_validation', 'grid_search', 'feature_selection', 'feature_extraction', 'pipeline', 'pls', 'gaussian_process', 'qda', 'ensemble', 'manifold', 'metrics', 'preprocessing', 'semi_supervised', 'tree', 'hmm'] else: scikit_modules = ['calibration', 'cluster', 'covariance', 'cross_decomposition', 'cross_validation', 'decomposition', 'discriminant_analysis', 'ensemble', 'feature_extraction', 'feature_selection', 'gaussian_process', 'grid_search', 'isotonic', 'kernel_approximation', 'kernel_ridge', 'learning_curve', 'linear_model', 'manifold', 'metrics', 'mixture', 'multiclass', 'naive_bayes', 'neighbors', 'neural_network', 'preprocessing', 'random_projection', 'semi_supervised', 'svm', 'tree'] for name in scikit_modules: # not all modules may be available due to missing dependencies # on the user system. # we just ignore failing imports try: __import__(_sklearn_prefix + '.' + name) except ImportError: pass _WS_LINE_RE = re.compile(r'^\s*$') _WS_PREFIX_RE = re.compile(r'^(\s*)') _HEADINGS_RE = re.compile(r'''^(Parameters|Attributes|Methods|Examples|Notes)\n (----+|====+)''', re.M + re.X) _UNDERLINE_RE = re.compile(r'----+|====+') _VARWITHUNDER_RE = re.compile(r'(\s|^)([a-zA-Z_][a-zA-Z0-9_]*_)(\s|$|[,.])') _HEADINGS = set(['Parameters', 'Attributes', 'Methods', 'Examples', 'Notes', 'References']) _DOC_TEMPLATE = """ %s This node has been automatically generated by wrapping the `%s.%s <http://scikit-learn.org/stable/modules/generated/%s.%s.html>`_ class from the ``sklearn`` library. The wrapped instance can be accessed through the ``scikit_alg`` attribute. %s """ def _gen_docstring(object, docsource=None): """ Generate and modify the docstring for each wrapped node """ module = object.__module__ name = object.__name__ if hasattr(__import__(".".join(module.split(".")[:-1])), name): link_module = ".".join(module.split(".")[:-1]) else: link_module = module # search for documentation string if docsource is None: docsource = object docstring = docsource.__doc__ if docstring is None: docstring = object.__doc__ if docstring is None: docstring = "This algorithm contains no documentation." # # error search for getting docstring # print object # print module # print object.__dict__ # print docsource #warnings.warn("No documentation found for %s.%s" % (module, name)) #return None # old case pass lines = docstring.strip().split('\n') for i, line in enumerate(lines): if _WS_LINE_RE.match(line): break header = [line.strip() for line in lines[:i]] therest = [line.rstrip() for line in lines[i + 1:]] body = [] if therest: prefix = min(len(_WS_PREFIX_RE.match(line).group(1)) for line in therest if line) quoteind = None for i, line in enumerate(therest): line = line[prefix:] if line in _HEADINGS: body.append('**%s**' % line) elif _UNDERLINE_RE.match(line): body.append('') else: line = _VARWITHUNDER_RE.sub(r'\1``\2``\3', line) if quoteind: if len(_WS_PREFIX_RE.match(line).group(1)) >= quoteind: line = quoteind * ' ' + '- ' + line[quoteind:] else: quoteind = None body.append('') body.append(line) if line.endswith(':'): body.append('') if i + 1 < len(therest): next = therest[i + 1][prefix:] quoteind = len(_WS_PREFIX_RE.match(next).group(1)) return _DOC_TEMPLATE % ('\n'.join(header), module, name, link_module, name, '\n'.join(body)) # TODO: generalize dtype support # TODO: have a look at predict_proba for Classifier.prob # TODO: inverse <-> generate/rvs # TODO: deal with input_dim/output_dim # TODO: change signature of overwritten functions # TODO: wrap_scikit_instance # TODO: add sklearn availability to test info strings # TODO: which tests ? (test that particular algorithm are / are not trainable) # XXX: if class defines n_components, allow output_dim, otherwise throw exception # also for classifiers (overwrite _set_output_dim) # Problem: sometimes they call it 'k' (e.g., algorithms in sklearn.cluster)
[docs]def apply_to_scikit_algorithms(current_module, action, processed_modules=None, processed_classes=None): """ Function that traverses a module to find scikit algorithms. 'sklearn' algorithms are identified by the 'fit' 'predict', or 'transform' methods. The 'action' function is applied to each found algorithm. action -- a function that is called with as ``action(class_)``, where ``class_`` is a class that defines the 'fit' or 'predict' method """ # only consider modules and classes once if processed_modules is None: processed_modules = [] if processed_classes is None: processed_classes = [] if current_module in processed_modules: return processed_modules.append(current_module) for member_name, member in current_module.__dict__.items(): if not member_name.startswith('_'): # classes if inspect.isclass(member) and member not in processed_classes: if ((hasattr(member, 'fit') or hasattr(member, 'predict') or hasattr(member, 'transform')) and not member.__module__.endswith('_')): processed_classes.append(member) try: action(member) # ignore failed imports except: warnings.warn("Could not wrap sklearn nodes.") # other modules elif (inspect.ismodule(member) and member.__name__.startswith(_sklearn_prefix)): apply_to_scikit_algorithms(member, action, processed_modules, processed_classes) return processed_classes
_OUTPUTDIM_ERROR = """'output_dim' keyword not supported. Please set the output dimensionality using sklearn keyword arguments (e.g., 'n_components', or 'k'). See the docstring of this class for details."""
[docs]def wrap_scikit_classifier(scikit_class): """Wrap a sklearn classifier as a BaseNode subclass. The wrapper maps these node methods to their sklearn equivalents: - _stop_training -> fit - _execute -> predict """ newaxis = numpy.newaxis # create a wrapper class for a sklearn classifier class ScikitClassifier(BaseNode): input_types = ["FeatureVector"] def __init__(self, input_dim=None, output_dim=None, dtype=None, class_labels=None, **kwargs): if output_dim is not None: # output_dim and n_components cannot be defined at the same time if 'n_components' in kwargs: msg = ("Dimensionality set both by " "output_dim=%d and n_components=%d""") raise ScikitException(msg % (output_dim, kwargs['n_components'])) try: accepted_args = inspect.getargspec(scikit_class.__init__)[0] base_kwargs = {} for key in kwargs.keys(): if key not in accepted_args: base_kwargs[key] = kwargs.pop(key) del(key) del(accepted_args) except TypeError: # happens for GaussianNBSklearnNode base_kwargs = kwargs kwargs = {} super(ScikitClassifier, self).__init__( input_dim=input_dim, output_dim=output_dim, dtype=dtype, **base_kwargs) self.kwargs = kwargs self.set_permanent_attributes(kwargs=kwargs, scikit_alg=scikit_class(**self.kwargs), data=[], labels=[], class_labels=class_labels) # ---- re-direct training and execution to the wrapped algorithm def _train(self, data, y): x = data.view(numpy.ndarray) self.data.append(x[0]) self.labels.append(y) def _stop_training(self, **kwargs): super(ScikitClassifier, self)._stop_training(self) if self.class_labels is None: self.class_labels = sorted(list(set(self.labels))) data = numpy.array(self.data) label_values = \ numpy.array(map(lambda s: self.class_labels.index(s), self.labels)) try: return self.scikit_alg.fit(data, label_values, **kwargs) except Exception as e: raise type(e), \ type(e)("in node %s:\n\t"%self.__class__.__name__+e.args[0]),\ sys.exc_info()[2] def _execute(self, data): x = data.view(numpy.ndarray) try: prediction = self.scikit_alg.predict(x)[0] except Exception as e: raise type(e), \ type(e)("in node %s:\n\t"%self.__class__.__name__+e.args[0]), \ sys.exc_info()[2] if hasattr(self.scikit_alg, "predict_proba"): try: score = self.scikit_alg.predict_proba(x)[0, 1] except Exception as e: warnings.warn("%s in node %s:\n\t"\ %(type(e).__name__,self.__class__.__name__)+e.args[0]) try: score = self.scikit_alg.decision_function(x)[0] except: score = prediction elif hasattr(self.scikit_alg, "decision_function"): score = self.scikit_alg.decision_function(x)[0] else: score = prediction label = self.class_labels[prediction] return PredictionVector(label=label, prediction=score, predictor=self) @classmethod def get_output_type(cls, input_type, as_string=True): if as_string: return "PredictionVector" else: return PredictionVector # ---- administrative details @staticmethod def is_trainable(): """Return True if the node can be trained, False otherwise.""" return hasattr(scikit_class, 'fit') @staticmethod def is_supervised(): """Return True if the node requires labels for training, False otherwise.""" return True # NOTE: at this point scikit nodes can only support up to # 64-bits floats because some call numpy.linalg.svd, which for # some reason does not support higher precisions def _get_supported_dtypes(self): """Return the list of dtypes supported by this node. The types can be specified in any format allowed by numpy.dtype.""" return ['float32', 'float64'] # modify class name and docstring if "Classifier" not in scikit_class.__name__: ScikitClassifier.__name__ = scikit_class.__name__ + \ 'ClassifierSklearnNode' else: ScikitClassifier.__name__ = scikit_class.__name__ + 'SklearnNode' ScikitClassifier.__doc__ = _gen_docstring(scikit_class) # Class must be permanently accessible from module level globals()[ScikitClassifier.__name__] = ScikitClassifier # change the docstring of the methods to match the ones in sklearn # methods_dict maps ScikitNode method names to sklearn method names methods_dict = {'__init__': '__init__', 'stop_training': 'fit', 'execute': 'predict'} #if hasattr(scikit_class, 'predict_proba'): # methods_dict['prob'] = 'predict_proba' for pyspace_name, scikit_name in methods_dict.items(): pyspace_method = getattr(ScikitClassifier, pyspace_name) scikit_method = getattr(scikit_class, scikit_name) if hasattr(scikit_method, 'im_func'): # some scikit algorithms do not define an __init__ method # the one inherited from 'object' is a # "<slot wrapper '__init__' of 'object' objects>" # which does not have a 'im_func' attribute pyspace_method.im_func.__doc__ = _gen_docstring(scikit_class, scikit_method.im_func) if scikit_class.__init__.__doc__ is None: ScikitClassifier.__init__.im_func.__doc__ = _gen_docstring(scikit_class) return ScikitClassifier
[docs]def wrap_scikit_transformer(scikit_class): """ Wrap a sklearn transformer as a pySPACE BaseNode subclass The wrapper maps these pySPACE methods to their sklearn equivalents: - _stop_training -> fit - _execute -> transform """ # create a wrapper class for a sklearn transformer class ScikitTransformer(BaseNode): input_types = ["FeatureVector"] def __init__(self, input_dim=None, output_dim=None, dtype=None, **kwargs): if output_dim is not None: raise ScikitException(_OUTPUTDIM_ERROR) accepted_args = inspect.getargspec(scikit_class.__init__)[0] base_kwargs = {} for key in kwargs.keys(): if key not in accepted_args: base_kwargs[key] = kwargs.pop(key) del(key) del(accepted_args) super(ScikitTransformer, self).__init__( input_dim=input_dim, output_dim=output_dim, dtype=dtype, **base_kwargs) self.kwargs = kwargs self.set_permanent_attributes(kwargs=kwargs, scikit_alg=scikit_class(**self.kwargs), data=[], feature_names=None) # ---- re-direct training and execution to the wrapped algorithm def _train(self, data): assert type(data) == FeatureVector, \ "Scikit-Learn Transformer nodes only support FeatureVector inputs." x = data.view(numpy.ndarray) self.data.append(x[0]) def _stop_training(self, **kwargs): super(ScikitTransformer, self)._stop_training(self) data = numpy.array(self.data) return self.scikit_alg.fit(data, **kwargs) def _execute(self, data): x = data.view(numpy.ndarray) out = self.scikit_alg.transform(x[0]) if self.feature_names is None: self.feature_names = \ ["%s_%s" % (self.__class__.__name__, i) for i in range(out.shape[1])] return FeatureVector(out, self.feature_names) # ---- administrative details @staticmethod def is_trainable(): """Return True if the node can be trained, False otherwise.""" return hasattr(scikit_class, 'fit') @staticmethod def is_supervised(): """Return True if the node requires labels for training, False otherwise.""" return False # NOTE: at this point scikit nodes can only support up to # 64-bits floats because some call numpy.linalg.svd, which for # some reason does not support higher precisions def _get_supported_dtypes(self): """Return the list of dtypes supported by this node. The types can be specified in any format allowed by numpy.dtype.""" return ['float32', 'float64'] @classmethod def get_output_type(cls, input_type, as_string=True): if as_string: return "FeatureVector" else: return FeatureVector # modify class name and docstring if "Transformer" not in scikit_class.__name__: ScikitTransformer.__name__ = scikit_class.__name__ + \ 'TransformerSklearnNode' else: ScikitTransformer.__name__ = scikit_class.__name__ + 'SklearnNode' ScikitTransformer.__doc__ = _gen_docstring(scikit_class) # Class must be permanently accessible from module level globals()[ScikitTransformer.__name__] = ScikitTransformer # change the docstring of the methods to match the ones in sklearn # methods_dict maps ScikitNode method names to sklearn method names methods_dict = {'__init__': '__init__', 'stop_training': 'fit', 'execute': 'transform'} for pyspace_name, scikit_name in methods_dict.items(): pyspace_method = getattr(ScikitTransformer, pyspace_name) scikit_method = getattr(scikit_class, scikit_name, None) if hasattr(scikit_method, 'im_func'): # some scikit algorithms do not define an __init__ method # the one inherited from 'object' is a # "<slot wrapper '__init__' of 'object' objects>" # which does not have a 'im_func' attribute pyspace_method.im_func.__doc__ = _gen_docstring(scikit_class, scikit_method.im_func) if scikit_class.__init__.__doc__ is None: ScikitTransformer.__init__.im_func.__doc__ = _gen_docstring(scikit_class) return ScikitTransformer
[docs]def wrap_scikit_predictor(scikit_class): """ Wrap a sklearn predictor as an pySPACE BaseNode subclass The wrapper maps these pySPACE methods to their sklearn equivalents: * _stop_training -> fit * _execute -> predict """ # create a wrapper class for a sklearn predictor class ScikitPredictor(BaseNode): input_types = ["FeatureVector"] def __init__(self, input_dim=None, output_dim=None, dtype=None, **kwargs): if output_dim is not None: raise ScikitException(_OUTPUTDIM_ERROR) accepted_args = inspect.getargspec(scikit_class.__init__)[0] base_kwargs = {} for key in kwargs.keys(): if key not in accepted_args: base_kwargs[key] = kwargs.pop(key) del(key) del(accepted_args) super(ScikitPredictor, self).__init__( input_dim=input_dim, output_dim=output_dim, dtype=dtype, **base_kwargs) self.kwargs = kwargs self.set_permanent_attributes(kwargs=kwargs, data=[], labels=[], scikit_alg=scikit_class(**self.kwargs)) # ---- re-direct training and execution to the wrapped algorithm def _train(self, data, y): x = data.view(numpy.ndarray) self.data.append(x[0]) self.labels.append(numpy.float64(y)) def _stop_training(self, **kwargs): super(ScikitPredictor, self)._stop_training(self) data = numpy.array(self.data) label_values = numpy.array(self.labels) try: return self.scikit_alg.fit(data, label_values, **kwargs) except Exception as e: raise type(e), \ type(e)("in node %s:\n\t"%self.__class__.__name__+e.args[0]), \ sys.exc_info()[2] def _execute(self, data): x = data.view(numpy.ndarray) try: prediction = self.scikit_alg.predict(x)[0] except Exception as e: raise type(e), \ type(e)("in node %s:\n\t"%self.__class__.__name__+e.args[0]), \ sys.exc_info()[2] if hasattr(self.scikit_alg, "predict_proba"): try: score = self.scikit_alg.predict_proba(x)[0, 1] except Exception as e: warnings.warn("%s in node %s:\n\t" \ %(type(e).__name__,self.__class__.__name__)+e.args[0]) try: score = self.scikit_alg.decision_function(x)[0] except: score = prediction elif hasattr(self.scikit_alg, "decision_function"): score = self.scikit_alg.decision_function(x)[0] else: # if nothing else works, we set the score of the # prediction to be equal to the prediction itself. score = prediction return PredictionVector(label=prediction, prediction=score, predictor=self) # ---- administrative details def is_trainable(self): """Return True if the node can be trained, False otherwise.""" return hasattr(scikit_class, 'fit') # NOTE: at this point scikit nodes can only support up to 64-bits floats # because some call numpy.linalg.svd, which for some reason does not # support higher precisions def _get_supported_dtypes(self): """Return the list of dtypes supported by this node. The types can be specified in any format allowed by numpy.dtype.""" return ['float32', 'float64'] def is_supervised(self): return self.is_trainable() @classmethod def get_output_type(cls, input_type, as_string=True): if as_string: return "PredictionVector" else: return PredictionVector # modify class name and docstring if "Regression" not in scikit_class.__name__ and \ "Regressor" not in scikit_class.__name__: ScikitPredictor.__name__ = scikit_class.__name__ + \ 'RegressorSklearnNode' else: ScikitPredictor.__name__ = scikit_class.__name__ + 'SklearnNode' ScikitPredictor.__doc__ = _gen_docstring(scikit_class) # Class must be permanently accessible from module level globals()[ScikitPredictor.__name__] = ScikitPredictor # change the docstring of the methods to match the ones in sklearn # methods_dict maps ScikitPredictor method names to sklearn method names methods_dict = {'__init__': '__init__', 'stop_training': 'fit', 'execute': 'predict'} for pyspace_name, scikit_name in methods_dict.items(): pyspace_method = getattr(ScikitPredictor, pyspace_name) scikit_method = getattr(scikit_class, scikit_name) if hasattr(scikit_method, 'im_func'): # some scikit algorithms do not define an __init__ method # the one inherited from 'object' is a # "<slot wrapper '__init__' of 'object' objects>" # which does not have a 'im_func' attribute pyspace_method.im_func.__doc__ = _gen_docstring(scikit_class, scikit_method.im_func) if scikit_class.__init__.__doc__ is None: ScikitPredictor.__init__.im_func.__doc__ = _gen_docstring(scikit_class) return ScikitPredictor
#list candidate nodes
[docs]def wrap_scikit_algorithms(scikit_class, nodes_list): """ Check *scikit_class* and append new wrapped class to *nodes_list* Currently only classifiers subclassing ``sklearn.base.ClassifierMixin`` and having a *fit* method were integrated and tested. Algorithms with the *transform* function are also available. *predict* nodes will be available soon but require more testing especially of regression in pySPACE. """ class_name = scikit_class.__name__ if (class_name[:4] == 'Base' or class_name == 'LinearModel' or class_name.startswith('EllipticEnvelop') or class_name.startswith('ForestClassifier')): return if sklearn.base.is_classifier(scikit_class) \ and hasattr(scikit_class, 'fit'): nodes_list.append(wrap_scikit_classifier(scikit_class)) elif sklearn.base.is_regressor(scikit_class) \ and hasattr(scikit_class, 'fit'): # WARNING: Regression is not sufficiently tested in pySPACE nodes_list.append(wrap_scikit_predictor(scikit_class)) # Some (abstract) transformers do not implement fit. elif hasattr(scikit_class, 'transform') and hasattr(scikit_class, 'fit'): nodes_list.append(wrap_scikit_transformer(scikit_class))
if _sklearn_prefix: scikit_nodes = [] apply_to_scikit_algorithms( sklearn, lambda c: wrap_scikit_algorithms(c, scikit_nodes)) # add scikit nodes to dictionary for wrapped_c in scikit_nodes: DEFAULT_NODE_MAPPING[wrapped_c.__name__] = wrapped_c NODE_MAPPING[wrapped_c.__name__] = wrapped_c NODE_MAPPING[wrapped_c.__name__[:-4]] = wrapped_c if not len(scikit_nodes) == 0: del(wrapped_c)