Source code for breton_cretenet.data_preparator

from sklearn.model_selection import train_test_split


[docs]def prepare(dataset, random_state=None, stratify=None):
    """
    Creates a training and a test set from the features X and labels y in dataset.

    Parameters
    ----------
    dataset : numpy.ndarray
        Dataset of shape (n_samples, n_features), with labels in the last columns and features in the other columns.
    random_state : int, optional
        Seed chosen for the train test split. If no argument is given, the seed is not fixed.
    stratify : list, optional
        If not None, the dataset is split in a stratified fashion, using this as the class labels.

    Returns
    -------
    numpy.ndarray
        X_train, an array containing the features of the training set.
    numpy.ndarray
        X_test, an array containing the features of the test set.
    numpy.ndarray
        y_train, an array containing the labels of the training set.
    numpy.ndarray
        y_test, an array containing the labels of the test set.
    """
    # Split the dataset between features X and labels y (y is the last column)
    X = dataset[:, :-1]
    y = dataset[:, -1]
    # Split into train and test set, possibly in a reproductible way (set seed)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, random_state=random_state, stratify=stratify
    )
    return X_train, X_test, y_train, y_test