Source code for breton_cretenet.test

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

from . import algorithm, data_preparator, data_preprocessor


[docs]def rand_data(): """ A function that returns a random dataset for the tests. Parameters: ----------- None Returns: -------- numpy.ndarray An array of size (10, 6) with random features and labels """ X = np.random.rand(10, 5) y = np.random.rand(10, 1) return np.concatenate([X, y], axis=1)
[docs]def test_preparator_is_random_if_no_seed(): """ Test function to ensure that the preparator returns random splits. Parameters: ----------- None Returns: -------- None """ dataset = rand_data() X_train, X_test, y_train, y_test = data_preparator.prepare(dataset) # After preparation, data should not be in the exact same order as in the begining assert not np.allclose( np.concatenate([X_train, X_test], axis=0), dataset[:, :-1], atol=1e-12 ) assert not np.allclose( np.concatenate([y_train, y_test], axis=0), dataset[:, -1], atol=1e-12 ) # Without seeds, two preparations should give different results X_train2, X_test2, y_train2, y_test2 = data_preparator.prepare(dataset) assert not np.allclose(X_train, X_train2, atol=1e-12) assert not np.allclose(y_train, y_train2, atol=1e-12) assert not np.allclose(X_test, X_test2, atol=1e-12) assert not np.allclose(y_test, y_test2, atol=1e-12)
[docs]def test_preparator_with_seed(): """ Test function to ensure that the preparator gives fixed splits if the seed is set. Parameters: ----------- None Returns: -------- None """ dataset = rand_data() X_train, X_test, y_train, y_test = data_preparator.prepare(dataset, random_state=99) X_train2, X_test2, y_train2, y_test2 = data_preparator.prepare( dataset, random_state=99 ) # With the same seed, both preparations should be identical assert np.allclose(X_train, X_train2, atol=1e-12) assert np.allclose(y_train, y_train2, atol=1e-12) assert np.allclose(X_test, X_test2, atol=1e-12) assert np.allclose(y_test, y_test2, atol=1e-12)
[docs]def test_preparator_xy_alignement(): """ Test function to ensure that the preparator keeps the features and the labels grouped correctly after the shuffling. Parameters: ----------- None Returns: -------- None """ dataset = rand_data() X = dataset[:, :-1] y = dataset[:, -1] X_train, X_test, y_train, y_test = data_preparator.prepare(dataset, random_state=99) # Check that the X and y are shuffled but still correspond y_train[i] must correspond to X_train[i, :] for i in range(len(y_train)): assert np.allclose(X_train[i, :], X[y == y_train[i], :], atol=1e-12) for i in range(len(y_test)): assert (X_test[i, :] == X[y == y_test[i], :]).all()
[docs]def test_linear_regression_algorithm(): """ Test function to ensure that the linear_regression_algorithm function returns an instance of the LinearRegression class. Parameters: ----------- None Returns: -------- None """ X_train = np.array([[1, 2], [3, 4], [5, 6]]) y_train = np.array([10, 20, 30]) X_train_labels = ["feature1", "feature2"] model = algorithm.linear_regression_algorithm(X_train, y_train, X_train_labels) assert isinstance(model, LinearRegression)
[docs]def test_decision_tree_regressor_algorithm(): """ Test function to ensure that the decision_tree_regressor_algorithm function returns an instance of the DecisionTreeRegressor class. Parameters: ----------- None Returns: -------- None """ X_train = np.array([[1, 2], [3, 4], [5, 6]]) y_train = np.array([10, 20, 30]) X_train_labels = ["feature1", "feature2"] max_depth = 2 model = algorithm.decision_tree_regressor_algorithm( X_train, y_train, X_train_labels, max_depth ) assert isinstance(model, DecisionTreeRegressor)
[docs]def test_predict_from_regressor(): """ Test function to ensure that the predict_from_regressor function returns an array of predictions with the same length as the input array. Parameters: ----------- None Returns: -------- None """ X_train = np.array([[1, 2], [3, 4], [5, 6]]) y_train = np.array([10, 20, 30]) X_train_labels = ["feature1", "feature2"] model = algorithm.linear_regression_algorithm(X_train, y_train, X_train_labels) X = np.array([[1, 2], [3, 4], [5, 6]]) X_labels = ["feature1", "feature2"] y_predicted = algorithm.predict_from_regressor(model, X, X_labels) assert isinstance(y_predicted, np.ndarray) assert y_predicted.shape == (len(X),)
[docs]def test_lasso_regression_feature_selection(): """ Test the `lasso_regression_feature_selection` function. Parameters ---------- None Returns ------- None """ X_train = np.array([[1, 2, 0], [2, 4, 0], [3, 6, 0]]) y_train = np.array([10, 20, 30]) X_train_labels = ["feature1", "feature2", "feature3"] ( X_train_selected, X_train_labels_selected, ) = algorithm.lasso_regression_feature_selection(X_train, y_train, X_train_labels) assert isinstance(X_train_selected, np.ndarray) assert isinstance(X_train_labels_selected, list)
[docs]def test_preprocessor_standard(): """ Test function to ensure that the standard method of the preprocessor is correctly implemented. Parameters: ----------- None Returns: -------- None """ X_train = np.random.rand(15, 5) X_test = np.random.rand(10, 5) mean = np.mean(X_train, axis=0) std = np.std(X_train, axis=0) X_train_standardized = (X_train - mean) / std X_test_standardized = (X_test - mean) / std X_train_check, X_test_check = data_preprocessor.preprocess( X_train, X_test, method="standardize" ) assert np.allclose(X_train_check, X_train_standardized, atol=1e-12) assert np.allclose(X_test_check, X_test_standardized, atol=1e-12)
[docs]def test_preprocessor_minmax(): """ Test function to ensure that the MinMax method of the preprocessor is correctly implemented. Parameters: ----------- None Returns: -------- None """ X_train = np.random.rand(15, 5) X_test = np.random.rand(10, 5) mininmum = np.min(X_train, axis=0) maximum = np.max(X_train, axis=0) X_train_minmax = (X_train - mininmum) / (maximum - mininmum) X_test_minmax = (X_test - mininmum) / (maximum - mininmum) X_train_check, X_test_check = data_preprocessor.preprocess( X_train, X_test, method="minmax" ) assert np.allclose(X_train_check, X_train_minmax, atol=1e-12) assert np.allclose(X_test_check, X_test_minmax, atol=1e-12)
[docs]def test_preprocessor_robust(): """ Test function to ensure that the robust scaler method of the preprocessor is correctly implemented. Parameters: ----------- None Returns: -------- None """ X_train = np.random.rand(15, 5) X_test = np.random.rand(10, 5) median = np.median(X_train, axis=0) interquartile = np.percentile(X_train, 75, axis=0) - np.percentile( X_train, 25, axis=0 ) X_train_robust = (X_train - median) / interquartile X_test_robust = (X_test - median) / interquartile X_train_check, X_test_check = data_preprocessor.preprocess( X_train, X_test, method="robust" ) print("MAX", np.max(np.abs(X_train_check - X_train_robust))) assert np.allclose(X_train_check, X_train_robust, atol=1e-12) assert np.allclose(X_test_check, X_test_robust, atol=1e-12)
[docs]def test_preprocessor_polynomial(): """ Test function to ensure that the Polynomial Features method of the preprocessor is correctly implemented. Parameters: ----------- None Returns: -------- None """ X = np.random.rand(10, 2) bias = np.ones((10, 1)) col1 = X[:, 0].reshape(-1, 1) col2 = X[:, 1].reshape(-1, 1) X_poly = np.concatenate( [ bias, col1, col2, col1**2, col1 * col2, col2**2, col1**3, col1**2 * col2, col1 * col2**2, col2**3, ], axis=1, ) X_check, _ = data_preprocessor.preprocess(X, X, method="poly", degree=3) assert np.allclose(X_check, X_poly, atol=1e-12)
[docs]def test_preprocessor_inexistant_method(): """ Test function to ensure that the if no existing method is selected, then the standardization is applied. Parameters: ----------- None Returns: -------- None """ X_train = np.random.rand(15, 5) X_test = np.random.rand(10, 5) mean = np.mean(X_train, axis=0) std = np.std(X_train, axis=0) X_train_standardized = (X_train - mean) / std X_test_standardized = (X_test - mean) / std X_train_check, X_test_check = data_preprocessor.preprocess( X_train, X_test, method="wrong_name" ) # Should work like standardize assert np.allclose(X_train_check, X_train_standardized, atol=1e-12) assert np.allclose(X_test_check, X_test_standardized, atol=1e-12)