Source code for cortecs.opt.optimize_pca
"""
Performs simple optimization of PCA hyperparameters — i.e., number of components and wavelength index
for computing eigenvectors.
"""
import math
import numpy as np
from tqdm.autonotebook import tqdm
from cortecs.fit.fit import Fitter
from cortecs.fit.fit_neural_net import *
from cortecs.fit.fit_pca import *
[docs]
def optimize_pca(
max_size,
max_evaluations,
opac,
min_components=3,
max_components=5,
wav_ind_start=3573,
):
"""
Inputs
------
max_size: float
maximum size of file in kB.
max_evaluations: int
maximum number of evaluations of the fitter
"""
T = opac.T
P = opac.P
wl = opac.wl
cross_section = opac.cross_section
# each axis — the wavelength index being tested and the number of components — will be tested n times.
# n * n = max_evaluations.
n_test_each_axis = math.floor(np.power(max_evaluations, 1 / 2))
n_pc_range = np.linspace(min_components, max_components, n_test_each_axis).astype(
int
)
wav_ind_range = np.linspace(wav_ind_start, len(wl) - 1, n_test_each_axis).astype(
int
)
print("len wl")
print(len(wl))
print("wl range")
print(wav_ind_range)
(
n_pc_grid,
wav_ind_grid,
) = np.meshgrid(n_pc_range, wav_ind_range)
# max_size currently isn't used.
final_errors = []
lin_samples = []
# ah. we're supposed to fit at every wavelength.
for sample in tqdm(
range(len(n_pc_grid.flatten())),
desc="Optimizing PCA hyperparameters",
):
n_pc, wav_ind = (
n_pc_grid.flatten()[sample],
wav_ind_grid.flatten()[sample],
)
fitter = Fitter(opac, method="pca", wav_ind=wav_ind, nc=n_pc)
try:
fitter.fit(verbose=0)
# evaluate the fit
vals, orig_vals, abs_diffs, percent_diffs = calc_metrics(fitter, plot=False)
mse = np.mean(np.square(abs_diffs))
except ValueError as e:
mse = np.inf
final_errors += [mse]
lin_samples += [sample]
# return the best-performing hyperparameters
best_sample_ind = lin_samples[np.argmin(final_errors)]
best_params = {
"n_pc": n_pc_grid.flatten()[best_sample_ind],
"wav_ind": wav_ind_grid.flatten()[best_sample_ind],
}
return best_params