Source code for breton_cretenet.data_preparator

from sklearn.model_selection import train_test_split


[docs]def prepare(dataset, random_state=None, stratify=None): """ Creates a training and a test set from the features X and labels y in dataset. Parameters ---------- dataset : numpy.ndarray Dataset of shape (n_samples, n_features), with labels in the last columns and features in the other columns. random_state : int, optional Seed chosen for the train test split. If no argument is given, the seed is not fixed. stratify : list, optional If not None, the dataset is split in a stratified fashion, using this as the class labels. Returns ------- numpy.ndarray X_train, an array containing the features of the training set. numpy.ndarray X_test, an array containing the features of the test set. numpy.ndarray y_train, an array containing the labels of the training set. numpy.ndarray y_test, an array containing the labels of the test set. """ # Split the dataset between features X and labels y (y is the last column) X = dataset[:, :-1] y = dataset[:, -1] # Split into train and test set, possibly in a reproductible way (set seed) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.5, random_state=random_state, stratify=stratify ) return X_train, X_test, y_train, y_test