Sequential Classification Task¶
- Sequential classification of a tabular MNIST dataset (Task 3573) using a simple neural network.
In [ ]:
Copied!
# openml imports
import openml
import openml_pytorch as op
from openml_pytorch.callbacks import TestCallback
from openml_pytorch.metrics import accuracy
# pytorch imports
import torch
# other imports
import logging
import warnings
# set up logging
openml.config.logger.setLevel(logging.DEBUG)
openml_pytorch.config.logger.setLevel(logging.DEBUG)
warnings.simplefilter(action='ignore')
# openml imports
import openml
import openml_pytorch as op
from openml_pytorch.callbacks import TestCallback
from openml_pytorch.metrics import accuracy
# pytorch imports
import torch
# other imports
import logging
import warnings
# set up logging
openml.config.logger.setLevel(logging.DEBUG)
openml_pytorch.config.logger.setLevel(logging.DEBUG)
warnings.simplefilter(action='ignore')
Data¶
Configure the Data Module and Choose a Task¶
- Make sure the data is present in the
file_dir
directory, and thefilename_col
is correctly set along with this column correctly pointing to where your data is stored.
In [ ]:
Copied!
data_module = op.OpenMLDataModule(
type_of_data="dataframe",
filename_col="class",
target_mode="categorical",
)
# Download the OpenML task for the mnist 784 dataset.
task = openml.tasks.get_task(3573)
data_module = op.OpenMLDataModule(
type_of_data="dataframe",
filename_col="class",
target_mode="categorical",
)
# Download the OpenML task for the mnist 784 dataset.
task = openml.tasks.get_task(3573)
Model¶
In [ ]:
Copied!
############################################################################
# Define a sequential network that does the initial image reshaping
# and normalization model.
processing_net = torch.nn.Sequential(
op.layers.Functional(function=torch.Tensor.reshape,
shape=(-1, 1, 28, 28)),
torch.nn.BatchNorm2d(num_features=1)
)
############################################################################
############################################################################
# Define a sequential network that does the extracts the features from the
# image.
features_net = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5),
torch.nn.LeakyReLU(),
torch.nn.MaxPool2d(kernel_size=2),
torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5),
torch.nn.LeakyReLU(),
torch.nn.MaxPool2d(kernel_size=2),
)
############################################################################
############################################################################
# Define a sequential network that flattens the features and compiles the
# results into probabilities for each digit.
results_net = torch.nn.Sequential(
op.layers.Functional(function=torch.Tensor.reshape,
shape=(-1, 4 * 4 * 64)),
torch.nn.Linear(in_features=4 * 4 * 64, out_features=256),
torch.nn.LeakyReLU(),
torch.nn.Dropout(),
torch.nn.Linear(in_features=256, out_features=10),
)
############################################################################
# openml.config.apikey = 'key'
############################################################################
# The main network, composed of the above specified networks.
model = torch.nn.Sequential(
processing_net,
features_net,
results_net
)
############################################################################
############################################################################
# Define a sequential network that does the initial image reshaping
# and normalization model.
processing_net = torch.nn.Sequential(
op.layers.Functional(function=torch.Tensor.reshape,
shape=(-1, 1, 28, 28)),
torch.nn.BatchNorm2d(num_features=1)
)
############################################################################
############################################################################
# Define a sequential network that does the extracts the features from the
# image.
features_net = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5),
torch.nn.LeakyReLU(),
torch.nn.MaxPool2d(kernel_size=2),
torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5),
torch.nn.LeakyReLU(),
torch.nn.MaxPool2d(kernel_size=2),
)
############################################################################
############################################################################
# Define a sequential network that flattens the features and compiles the
# results into probabilities for each digit.
results_net = torch.nn.Sequential(
op.layers.Functional(function=torch.Tensor.reshape,
shape=(-1, 4 * 4 * 64)),
torch.nn.Linear(in_features=4 * 4 * 64, out_features=256),
torch.nn.LeakyReLU(),
torch.nn.Dropout(),
torch.nn.Linear(in_features=256, out_features=10),
)
############################################################################
# openml.config.apikey = 'key'
############################################################################
# The main network, composed of the above specified networks.
model = torch.nn.Sequential(
processing_net,
features_net,
results_net
)
############################################################################
Train your model on the data¶
- Note that by default, OpenML runs a 10 fold cross validation on the data. You cannot change this for now.
In [ ]:
Copied!
trainer = op.OpenMLTrainerModule(
experiment_name= "MNIST",
data_module=data_module,
verbose=True,
epoch_count=1,
metrics= [accuracy],
# remove the TestCallback when you are done testing your pipeline. Having it here will make the pipeline run for a very short time.
callbacks=[
TestCallback,
],
opt = torch.optim.Adam,
)
op.config.trainer = trainer
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
trainer = op.OpenMLTrainerModule(
experiment_name= "MNIST",
data_module=data_module,
verbose=True,
epoch_count=1,
metrics= [accuracy],
# remove the TestCallback when you are done testing your pipeline. Having it here will make the pipeline run for a very short time.
callbacks=[
TestCallback,
],
opt = torch.optim.Adam,
)
op.config.trainer = trainer
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
View information about your run¶
Learning rate and loss plot¶
In [ ]:
Copied!
trainer.plot_loss()
trainer.plot_loss()
In [ ]:
Copied!
trainer.plot_lr()
trainer.plot_lr()
Class labels¶
In [ ]:
Copied!
trainer.model_classes
trainer.model_classes
Model Vizualization¶
- Sometimes you may want to visualize the model. You can either use netron or tensorboard for this purpose.
Netron¶
In [ ]:
Copied!
trainer.export_to_netron()
trainer.export_to_netron()
Tensorboard¶
- By default, openml will log the tensorboard logs in the
tensorboard_logs
directory. You can view the logs by runningtensorboard --logdir tensorboard_logs
in the terminal.
Publish your model to OpenML¶
- This is Optional, but publishing your model to OpenML will allow you to track your experiments and compare them with others.
- Make sure to set your apikey first.
- You can find your apikey on your OpenML account page.
In [ ]:
Copied!
openml.config.apikey = ''
run = op.add_experiment_info_to_run(run=run, trainer=trainer)
run.publish()
openml.config.apikey = ''
run = op.add_experiment_info_to_run(run=run, trainer=trainer)
run.publish()