Source code for cortecs.opt.optimize_neural_net

"""
Performs simple optimization of neural network hyperparameters.
"""
import math

import numpy as np
from tqdm.autonotebook import tqdm

from cortecs.fit.fit_neural_net import *


[docs] def optimize_neural_net( max_size, max_evaluations, opac, min_layers=2, min_neurons=2, max_layers=3, max_neurons=13, min_learn_rate=0.01, max_learn_rate=0.1, ): """ performs simple optimization of neural network hyperparameters. Inputs ------ max_size: float maximum size of file in kB. max_evaluations: int maximum number of evaluations of the neural network. """ cross_section = opac.cross_section T = opac.T P = opac.P if max_evaluations < 1: raise ValueError("max_evaluations must be greater than 0.") # what are the things we'll be changing? the size of the neural network, # the number of layers, the number of neurons per layer, the activation function, the learning rate... # restrict based on max size # 1.1 kB for 3 layers, 8 nodes / layer =216 weights n_weights = 216 * max_size / 1.1 # let's say it's a fully connected neural network with a bias term. then, for a single layers with m neurons each, there are # m * (m + 1) = m^2 + m weights. so, for n layers, there are n * (m^2 + m) weights. So we basically want to find # the biggest n, m to satisfy n * (m^2 + m) < n_weights. # this grows faster with m. n_steps = math.floor(np.power(max_evaluations, 1 / 4)) n_weights = max_neurons * (max_neurons + 1) * max_layers print("max number of weights: ", n_weights) n_layers_range = np.linspace(min_layers, max_layers, n_steps).astype(int) n_neurons_range = np.linspace(min_neurons, max_neurons, n_steps) activation_range = ["sigmoid", "relu"] learn_rate_range = np.geomspace(min_learn_rate, max_learn_rate, n_steps) print("max number of layers: ", max_layers) print("max number of neurons / layer: ", max_neurons) n_layers_grid, n_neurons_grid, activation_grid, learn_rate_grid = np.meshgrid( n_layers_range, n_neurons_range, activation_range, learn_rate_range ) final_losses = [] lin_samples = [] for sample in tqdm( range(len(n_layers_grid.flatten())), desc="Optimizing neural network hyperparameters", ): n_layers, n_neurons, activation, learn_rate = ( n_layers_grid.flatten()[sample], n_neurons_grid.flatten()[sample], activation_grid.flatten()[sample], learn_rate_grid.flatten()[sample], ) history, _ = fit_neural_net( cross_section[:, :, -2], T, P, None, n_layers=n_layers, n_neurons=n_neurons, activation=activation, learn_rate=learn_rate, loss="mean_squared_error", epochs=2000, verbose=0, sequential_model=None, plot=False, ) final_loss = history.history["loss"][-1] final_losses += [final_loss] lin_samples += [sample] # return the best-performing hyperparameters best_sample_ind = lin_samples[np.argmin(final_losses)] best_params = { "n_layers": n_layers_grid.flatten()[best_sample_ind], "n_neurons": n_neurons_grid.flatten()[best_sample_ind], "activation": activation_grid.flatten()[best_sample_ind], "learn_rate": learn_rate_grid.flatten()[best_sample_ind], } return best_params