Source code for breton_cretenet.data_preprocessor

from sklearn.preprocessing import (
    MinMaxScaler,
    PolynomialFeatures,
    RobustScaler,
    StandardScaler,
)


[docs]def preprocess(X_train, X_test, method="standardize", degree=2): """ Creates a training and a test set from the features X and labels y in dataset. Parameters ---------- X_train : numpy.ndarray Array containing the features of the training set. X_test : numpy.ndarray Array containing the features of the test set. method : string, optional Selects the preprocessing method we want to apply, if None selected, then "standardize" is chosen by default. degree : int, optional Selects the degree of the polynomial features. Is only used if the method is "poly". Returns ------- numpy.ndarray An array containing the preprocessed features of the training set. numpy.ndarray An array containing the preprocessed features of the test set. """ # Select preprocessor if method == "standardize": preprocessor = StandardScaler() elif method == "minmax": preprocessor = MinMaxScaler() elif method == "poly": preprocessor = PolynomialFeatures(degree=degree) elif method == "robust": preprocessor = RobustScaler() else: print( "WARNING : 'method' can only be set to 'standardize', 'minmax', or 'poly'.\n", "No valid method was selected, 'standard' is selected by default.", ) preprocessor = StandardScaler() # Preprocess features X_train_pp = preprocessor.fit_transform(X_train) X_test_pp = preprocessor.transform(X_test) return X_train_pp, X_test_pp