Source code for cortecs.opt.optimize_neural_net

"""
Performs simple optimization of neural network hyperparameters.
"""
import math

import numpy as np
from tqdm.autonotebook import tqdm

from cortecs.fit.fit_neural_net import *



[docs]
def optimize_neural_net(
    max_size,
    max_evaluations,
    opac,
    min_layers=2,
    min_neurons=2,
    max_layers=3,
    max_neurons=13,
    min_learn_rate=0.01,
    max_learn_rate=0.1,
):
    """
    performs simple optimization of neural network hyperparameters.

    Inputs
    ------
    max_size: float
        maximum size of file in kB.
    max_evaluations: int
        maximum number of evaluations of the neural network.
    """
    cross_section = opac.cross_section
    T = opac.T
    P = opac.P
    if max_evaluations < 1:
        raise ValueError("max_evaluations must be greater than 0.")

    # what are the things we'll be changing? the size of the neural network,
    # the number of layers, the number of neurons per layer, the activation function, the learning rate...

    # restrict based on max size

    # 1.1 kB for 3 layers, 8 nodes / layer =216 weights
    n_weights = 216 * max_size / 1.1

    # let's say it's a fully connected neural network with a bias term. then, for a single layers with m neurons each, there are
    # m * (m + 1) = m^2 + m weights. so, for n layers, there are n * (m^2 + m) weights. So we basically want to find
    # the biggest n, m to satisfy n * (m^2 + m) < n_weights.
    # this grows faster with m.

    n_steps = math.floor(np.power(max_evaluations, 1 / 4))
    n_weights = max_neurons * (max_neurons + 1) * max_layers

    print("max number of weights: ", n_weights)

    n_layers_range = np.linspace(min_layers, max_layers, n_steps).astype(int)
    n_neurons_range = np.linspace(min_neurons, max_neurons, n_steps)
    activation_range = ["sigmoid", "relu"]
    learn_rate_range = np.geomspace(min_learn_rate, max_learn_rate, n_steps)

    print("max number of layers: ", max_layers)
    print("max number of neurons / layer: ", max_neurons)

    n_layers_grid, n_neurons_grid, activation_grid, learn_rate_grid = np.meshgrid(
        n_layers_range, n_neurons_range, activation_range, learn_rate_range
    )
    final_losses = []
    lin_samples = []
    for sample in tqdm(
        range(len(n_layers_grid.flatten())),
        desc="Optimizing neural network hyperparameters",
    ):
        n_layers, n_neurons, activation, learn_rate = (
            n_layers_grid.flatten()[sample],
            n_neurons_grid.flatten()[sample],
            activation_grid.flatten()[sample],
            learn_rate_grid.flatten()[sample],
        )
        history, _ = fit_neural_net(
            cross_section[:, :, -2],
            T,
            P,
            None,
            n_layers=n_layers,
            n_neurons=n_neurons,
            activation=activation,
            learn_rate=learn_rate,
            loss="mean_squared_error",
            epochs=2000,
            verbose=0,
            sequential_model=None,
            plot=False,
        )
        final_loss = history.history["loss"][-1]
        final_losses += [final_loss]
        lin_samples += [sample]

    # return the best-performing hyperparameters
    best_sample_ind = lin_samples[np.argmin(final_losses)]
    best_params = {
        "n_layers": n_layers_grid.flatten()[best_sample_ind],
        "n_neurons": n_neurons_grid.flatten()[best_sample_ind],
        "activation": activation_grid.flatten()[best_sample_ind],
        "learn_rate": learn_rate_grid.flatten()[best_sample_ind],
    }
    return best_params