Source code for datafusiontools.machine_learning.mpl

from dataclasses import dataclass
from typing import List, Union
from pathlib import Path
import os
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l2

from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import sklearn
import matplotlib.pylab as plt

from .neural_networks import NeuralNetwork


[docs]@dataclass class MPL(NeuralNetwork): """ Class of the NN object and defines the NN settings. :param nb_neurons: Number of neurons in each hidden layer """ nb_neurons: Union[List, None, np.array] = None def __build_model( self, scaled_training_data, target, output_activation_function: str ) -> None: """ Function that builds a NN model :param scaled_training_data: Scaled training data of the model :param target: Target data of the model :param output_activation_function: Activation function of the output layer of the NN """ inputs = keras.Input(shape=self.training_data.shape[1]) x = scaled_training_data(inputs) # hidden layers for i in range(self.nb_hidden_layers): x = layers.Dense( self.nb_neurons[i], activation=self.activation_fct.value, kernel_regularizer=l2(self.regularisation), )(x) # output layer outputs = layers.Dense( target.shape[1], activation=output_activation_function, kernel_regularizer=l2(self.regularisation), )(x) # NN model self.model = keras.Model(inputs, outputs) self.model.summary() return def __encode_target_data(self): """ Method that encodes targed data for classification """ self.encoder = preprocessing.LabelEncoder() self.encoder.fit(self.target) self.target_label = self.encoder.classes_ # classes target_encoded = self.encoder.transform(self.target) target_encoded = to_categorical( target_encoded, num_classes=len(self.target_label) ) return target_encoded def __calculate_weights_for_classification(self): """ Method that calculates weights in case of imbalanced data """ class_weight = None if self.weights == "Auto": weights = sklearn.utils.class_weight.compute_class_weight( "balanced", np.unique(self.target), self.target ) class_weight = dict(enumerate(weights)) return class_weight
[docs] def train_classification(self) -> None: """ Method that trains a NN classification model. """ target_encoded = self.__encode_target_data() class_weight = self.__calculate_weights_for_classification() scaled_training_data = self.rescale_training_data() self.__build_model(scaled_training_data, target_encoded, "softmax") self.compile_model(["accuracy"]) # Fit the model self.history = self.model.fit( self.training_data, target_encoded, epochs=self.epochs, batch_size=self.batch, class_weight=class_weight, ) scores = self.model.evaluate(self.training_data, target_encoded) # Evaluate print( f"{self.model.metrics_names[1]} of training: {round(scores[1] * 100, 2)} %" ) self.accuracy = scores[1] return
[docs] def train_regression(self) -> None: """ Method that trains a NN regression model. """ # scale training data scaled_training_data = self.rescale_training_data() self.__build_model(scaled_training_data, self.target, "linear") self.compile_model(["mse", "mae"]) # Fit the model if self.validation_features is None and self.validation_targets is None: self.history = self.model.fit( self.training_data, self.target, epochs=self.epochs, batch_size=self.batch, ) else: self.history = self.model.fit( self.training_data, self.target, epochs=self.epochs, batch_size=self.batch, validation_data=(self.validation_features, self.validation_targets), ) # Evaluate scores = self.model.evaluate(self.training_data, self.target) print(f"{self.model.metrics_names[1]}: {round(scores[1], 2)}") return
[docs] def plot_confusion( self, validation: np.ndarray, output_folder: Path = Path("./") ) -> None: """ Plots the confusion matrix for the validation dataset :param validation: Validation data at the predicted points :param output_folder: location where the plot is saved """ if not (self.classification): raise ReferenceError( "Method plot_confusion can only be used after performing classification process. " ) directory = str(output_folder) if not os.path.isdir(directory): os.makedirs(directory) validation = np.array(list(validation)) self.prediction = np.array(list(self.prediction)) # compute confusion matrix confusion = confusion_matrix( self.encoder.transform(validation), self.encoder.transform(self.prediction), labels=self.encoder.transform(self.target_label), ) # , normalize="true") print(f"Confusion matrix:\n {confusion}") disp = ConfusionMatrixDisplay( confusion_matrix=confusion, display_labels=self.target_label ) fig, ax = plt.subplots(figsize=(6, 4)) ax.set_position([0.15, 0.15, 0.8, 0.8]) disp.plot(cmap="binary", ax=ax) # disp.im_.set_clim(0, 1) directory = Path( output_folder, "confusion_matrix_epochsize%d_batchsize%d_regularization%d.png" % (self.epochs, self.batch, self.regularisation), ) plt.savefig(directory) plt.close() return