Hyperparameter Tuning using Optuna¶

Hyperparameter optimization is one of the crucial steps in training machine learning models. It is often quite a tedious process with many parameters to optimize and long training times for models. Optuna is an automatic hyperparameter optimization software framework, particularly designed for machine learning You can find more about Optuna here.

Optuna an be installed using pip -

$ pip install optuna

or using conda -

$ conda install -c conda-forge optuna

To search for the best hyperparameters for the VanillaKD algorithm -

import torch
import torch.optim as optim
from torchvision import datasets, transforms
from KD_Lib.KD import VanillaKD

import optuna
from sklearn.externals import joblib

# Define datasets, dataloaders, models and optimizers

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(
        "mnist_data",
        train=True,
        download=True,
        transform=transforms.Compose(
            [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
        ),
    ),
    batch_size=32,
    shuffle=True,
)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST(
        "mnist_data",
        train=False,
        transform=transforms.Compose(
            [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
        ),
    ),
    batch_size=32,
    shuffle=True,
)

# Set device to be trained on

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Optuna requires defining an objective function
# The hyperparameters are then optimized for maximizing/minimizing this objective function

def tune_VanillaKD(trial):

    teacher_model = <your model>
    student_model = <your model>

    # Define hyperparams and choose what ranges they should be trialled for

    lr = trial.suggest_float("lr", 1e-4, 1e-1)
    momentum = trial.suggest_float("momentum", 0.9, 0.99)
    optimizer = trial.suggest_categorical('optimizer',[optim.SGD, optim.Adam])

    teacher_optimizer = optimizer(teacher_model.parameters(), lr, momentum)
    student_optimizer = optimizer(student_model.parameters(), lr, momentum)

    temperature = trial.suggest_float("temperature", 5.0, 20.0)
    distil_weight = trial.suggest_float("distil_weight", 0.0, 1.0)

    loss_fn = trial.suggest_categorical("loss_fn",[nn.KLDivLoss(), nn.MSELoss()])

    # Instiate disitller object using KD_Lib and train

    distiller = VanillaKD(teacher_model, student_model, train_loader, test_loader,
                          teacher_optimizer, student_optimizer, loss_fn,
                          temperature, distil_weight, device)
    distiller.train_teacher(epochs=10)
    distiller.train_student(epochs=10)
    test_accuracy = disitller.evaluate()

    # The objective function must return the quantity we're trying to maximize/minimize

    return test_accuracy

# Create a study

study = optuna.create_study(study_name="Hyperparameter Optimization",
                            direction="maximize")
study.optimize(tune_VanillaKD, n_trials=10)

# Access results

results = study.trials_dataframe()
results.head()

# Get best values of hyperparameter

for key, value in study.best_trial.__dict__.items():
print("{} : {}".format(key, value))

# Write results of the study

joblib.dump(study, <your path>)

# Access results at a later time

study = joblib.load(<your path>)
results = study.trials_dataframe()
results.head()