Source code for breton_cretenet.test

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

from . import algorithm, data_preparator, data_preprocessor


[docs]def rand_data():
    """
    A function that returns a random dataset for the tests.

    Parameters:
    -----------
    None

    Returns:
    --------
    numpy.ndarray
        An array of size (10, 6) with random features and labels
    """
    X = np.random.rand(10, 5)
    y = np.random.rand(10, 1)
    return np.concatenate([X, y], axis=1)


[docs]def test_preparator_is_random_if_no_seed():
    """
    Test function to ensure that the preparator returns random splits.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    dataset = rand_data()
    X_train, X_test, y_train, y_test = data_preparator.prepare(dataset)
    # After preparation, data should not be in the exact same order as in the begining
    assert not np.allclose(
        np.concatenate([X_train, X_test], axis=0), dataset[:, :-1], atol=1e-12
    )
    assert not np.allclose(
        np.concatenate([y_train, y_test], axis=0), dataset[:, -1], atol=1e-12
    )
    # Without seeds, two preparations should give different results
    X_train2, X_test2, y_train2, y_test2 = data_preparator.prepare(dataset)
    assert not np.allclose(X_train, X_train2, atol=1e-12)
    assert not np.allclose(y_train, y_train2, atol=1e-12)
    assert not np.allclose(X_test, X_test2, atol=1e-12)
    assert not np.allclose(y_test, y_test2, atol=1e-12)


[docs]def test_preparator_with_seed():
    """
    Test function to ensure that the preparator gives fixed splits if the seed is set.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    dataset = rand_data()
    X_train, X_test, y_train, y_test = data_preparator.prepare(dataset, random_state=99)
    X_train2, X_test2, y_train2, y_test2 = data_preparator.prepare(
        dataset, random_state=99
    )
    # With the same seed, both preparations should be identical
    assert np.allclose(X_train, X_train2, atol=1e-12)
    assert np.allclose(y_train, y_train2, atol=1e-12)
    assert np.allclose(X_test, X_test2, atol=1e-12)
    assert np.allclose(y_test, y_test2, atol=1e-12)


[docs]def test_preparator_xy_alignement():
    """
    Test function to ensure that the preparator keeps the features and the labels grouped correctly after the shuffling.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    dataset = rand_data()
    X = dataset[:, :-1]
    y = dataset[:, -1]
    X_train, X_test, y_train, y_test = data_preparator.prepare(dataset, random_state=99)
    # Check that the X and y are shuffled but still correspond y_train[i] must correspond to X_train[i, :]
    for i in range(len(y_train)):
        assert np.allclose(X_train[i, :], X[y == y_train[i], :], atol=1e-12)
    for i in range(len(y_test)):
        assert (X_test[i, :] == X[y == y_test[i], :]).all()


[docs]def test_linear_regression_algorithm():
    """
    Test function to ensure that the linear_regression_algorithm function returns an instance of the LinearRegression
    class.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    X_train = np.array([[1, 2], [3, 4], [5, 6]])
    y_train = np.array([10, 20, 30])
    X_train_labels = ["feature1", "feature2"]
    model = algorithm.linear_regression_algorithm(X_train, y_train, X_train_labels)
    assert isinstance(model, LinearRegression)


[docs]def test_decision_tree_regressor_algorithm():
    """
    Test function to ensure that the decision_tree_regressor_algorithm function returns an instance of the DecisionTreeRegressor
    class.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    X_train = np.array([[1, 2], [3, 4], [5, 6]])
    y_train = np.array([10, 20, 30])
    X_train_labels = ["feature1", "feature2"]
    max_depth = 2
    model = algorithm.decision_tree_regressor_algorithm(
        X_train, y_train, X_train_labels, max_depth
    )
    assert isinstance(model, DecisionTreeRegressor)


[docs]def test_predict_from_regressor():
    """
    Test function to ensure that the predict_from_regressor function returns an array of predictions with the same
    length as the input array.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    X_train = np.array([[1, 2], [3, 4], [5, 6]])
    y_train = np.array([10, 20, 30])
    X_train_labels = ["feature1", "feature2"]
    model = algorithm.linear_regression_algorithm(X_train, y_train, X_train_labels)

    X = np.array([[1, 2], [3, 4], [5, 6]])
    X_labels = ["feature1", "feature2"]
    y_predicted = algorithm.predict_from_regressor(model, X, X_labels)
    assert isinstance(y_predicted, np.ndarray)
    assert y_predicted.shape == (len(X),)


[docs]def test_lasso_regression_feature_selection():
    """
    Test the `lasso_regression_feature_selection` function.

    Parameters
    ----------
    None

    Returns
    -------
    None
    """
    X_train = np.array([[1, 2, 0], [2, 4, 0], [3, 6, 0]])
    y_train = np.array([10, 20, 30])
    X_train_labels = ["feature1", "feature2", "feature3"]
    (
        X_train_selected,
        X_train_labels_selected,
    ) = algorithm.lasso_regression_feature_selection(X_train, y_train, X_train_labels)
    assert isinstance(X_train_selected, np.ndarray)
    assert isinstance(X_train_labels_selected, list)


[docs]def test_preprocessor_standard():
    """
    Test function to ensure that the standard method of the preprocessor is correctly implemented.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    X_train = np.random.rand(15, 5)
    X_test = np.random.rand(10, 5)
    mean = np.mean(X_train, axis=0)
    std = np.std(X_train, axis=0)
    X_train_standardized = (X_train - mean) / std
    X_test_standardized = (X_test - mean) / std
    X_train_check, X_test_check = data_preprocessor.preprocess(
        X_train, X_test, method="standardize"
    )
    assert np.allclose(X_train_check, X_train_standardized, atol=1e-12)
    assert np.allclose(X_test_check, X_test_standardized, atol=1e-12)


[docs]def test_preprocessor_minmax():
    """
    Test function to ensure that the MinMax method of the preprocessor is correctly implemented.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    X_train = np.random.rand(15, 5)
    X_test = np.random.rand(10, 5)
    mininmum = np.min(X_train, axis=0)
    maximum = np.max(X_train, axis=0)
    X_train_minmax = (X_train - mininmum) / (maximum - mininmum)
    X_test_minmax = (X_test - mininmum) / (maximum - mininmum)
    X_train_check, X_test_check = data_preprocessor.preprocess(
        X_train, X_test, method="minmax"
    )
    assert np.allclose(X_train_check, X_train_minmax, atol=1e-12)
    assert np.allclose(X_test_check, X_test_minmax, atol=1e-12)


[docs]def test_preprocessor_robust():
    """
    Test function to ensure that the robust scaler method of the preprocessor is correctly implemented.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    X_train = np.random.rand(15, 5)
    X_test = np.random.rand(10, 5)
    median = np.median(X_train, axis=0)
    interquartile = np.percentile(X_train, 75, axis=0) - np.percentile(
        X_train, 25, axis=0
    )
    X_train_robust = (X_train - median) / interquartile
    X_test_robust = (X_test - median) / interquartile
    X_train_check, X_test_check = data_preprocessor.preprocess(
        X_train, X_test, method="robust"
    )
    print("MAX", np.max(np.abs(X_train_check - X_train_robust)))
    assert np.allclose(X_train_check, X_train_robust, atol=1e-12)
    assert np.allclose(X_test_check, X_test_robust, atol=1e-12)


[docs]def test_preprocessor_polynomial():
    """
    Test function to ensure that the Polynomial Features method of the preprocessor is correctly implemented.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    X = np.random.rand(10, 2)
    bias = np.ones((10, 1))
    col1 = X[:, 0].reshape(-1, 1)
    col2 = X[:, 1].reshape(-1, 1)
    X_poly = np.concatenate(
        [
            bias,
            col1,
            col2,
            col1**2,
            col1 * col2,
            col2**2,
            col1**3,
            col1**2 * col2,
            col1 * col2**2,
            col2**3,
        ],
        axis=1,
    )
    X_check, _ = data_preprocessor.preprocess(X, X, method="poly", degree=3)
    assert np.allclose(X_check, X_poly, atol=1e-12)


[docs]def test_preprocessor_inexistant_method():
    """
    Test function to ensure that the if no existing method is selected, then the standardization is applied.

    Parameters:
    -----------
    None

    Returns:
    --------
    None
    """
    X_train = np.random.rand(15, 5)
    X_test = np.random.rand(10, 5)
    mean = np.mean(X_train, axis=0)
    std = np.std(X_train, axis=0)
    X_train_standardized = (X_train - mean) / std
    X_test_standardized = (X_test - mean) / std
    X_train_check, X_test_check = data_preprocessor.preprocess(
        X_train, X_test, method="wrong_name"
    )  # Should work like standardize
    assert np.allclose(X_train_check, X_train_standardized, atol=1e-12)
    assert np.allclose(X_test_check, X_test_standardized, atol=1e-12)