Pretrained Transformer Image Classification Task¶
In [ ]:
Copied!
# openml imports
import openml
import openml_pytorch as op
from openml_pytorch.callbacks import TestCallback
from openml_pytorch.metrics import accuracy
from openml_pytorch.trainer import convert_to_rgb
# pytorch imports
from torch.utils.tensorboard.writer import SummaryWriter
from torchvision.transforms import Compose, Resize, ToPILImage, ToTensor, Lambda
import torchvision
import torch
# other imports
import logging
import warnings
# set up logging
openml.config.logger.setLevel(logging.DEBUG)
openml_pytorch.config.logger.setLevel(logging.DEBUG)
warnings.simplefilter(action='ignore')
# openml imports
import openml
import openml_pytorch as op
from openml_pytorch.callbacks import TestCallback
from openml_pytorch.metrics import accuracy
from openml_pytorch.trainer import convert_to_rgb
# pytorch imports
from torch.utils.tensorboard.writer import SummaryWriter
from torchvision.transforms import Compose, Resize, ToPILImage, ToTensor, Lambda
import torchvision
import torch
# other imports
import logging
import warnings
# set up logging
openml.config.logger.setLevel(logging.DEBUG)
openml_pytorch.config.logger.setLevel(logging.DEBUG)
warnings.simplefilter(action='ignore')
Data¶
Define image transformations¶
In [ ]:
Copied!
transform = Compose(
[
ToPILImage(), # Convert tensor to PIL Image to ensure PIL Image operations can be applied.
Lambda(convert_to_rgb), # Convert PIL Image to RGB if it's not already.
Resize((64, 64)), # Resize the image.
ToTensor(), # Convert the PIL Image back to a tensor.
]
)
transform = Compose(
[
ToPILImage(), # Convert tensor to PIL Image to ensure PIL Image operations can be applied.
Lambda(convert_to_rgb), # Convert PIL Image to RGB if it's not already.
Resize((64, 64)), # Resize the image.
ToTensor(), # Convert the PIL Image back to a tensor.
]
)
Configure the Data Module and Choose a Task¶
- Make sure the data is present in the
file_dir
directory, and thefilename_col
is correctly set along with this column correctly pointing to where your data is stored.
In [ ]:
Copied!
data_module = op.OpenMLDataModule(
type_of_data="image",
file_dir="datasets",
filename_col="image_path",
target_mode="categorical",
target_column="label",
batch_size=64,
transform=transform,
)
# Download the OpenML task for tiniest imagenet
task = openml.tasks.get_task(363295)
data_module = op.OpenMLDataModule(
type_of_data="image",
file_dir="datasets",
filename_col="image_path",
target_mode="categorical",
target_column="label",
batch_size=64,
transform=transform,
)
# Download the OpenML task for tiniest imagenet
task = openml.tasks.get_task(363295)
Model¶
In [ ]:
Copied!
# Example model. You can do better :)
import torchvision.models as models
import torch.nn as nn
# Load the pre-trained model
model = models.efficientnet_b0(pretrained=True)
# Modify the last fully connected layer to the required number of classes
num_classes = 200
in_features = model.classifier[-1].in_features
model.classifier = nn.Sequential(
nn.Dropout(p=0.2, inplace=True),
nn.Linear(in_features, num_classes),
)
# Optional: If you're fine-tuning, you may want to freeze the pre-trained layers
# for param in model.parameters():
# param.requires_grad = False
# # If you want to train the last layer only (the newly added layer)
# for param in model.fc.parameters():
# param.requires_grad = True
# Example model. You can do better :)
import torchvision.models as models
import torch.nn as nn
# Load the pre-trained model
model = models.efficientnet_b0(pretrained=True)
# Modify the last fully connected layer to the required number of classes
num_classes = 200
in_features = model.classifier[-1].in_features
model.classifier = nn.Sequential(
nn.Dropout(p=0.2, inplace=True),
nn.Linear(in_features, num_classes),
)
# Optional: If you're fine-tuning, you may want to freeze the pre-trained layers
# for param in model.parameters():
# param.requires_grad = False
# # If you want to train the last layer only (the newly added layer)
# for param in model.fc.parameters():
# param.requires_grad = True
Train your model on the data¶
- Note that by default, OpenML runs a 10 fold cross validation on the data. You cannot change this for now.
In [ ]:
Copied!
trainer = op.OpenMLTrainerModule(
experiment_name= "Tiny ImageNet",
data_module=data_module,
verbose=True,
epoch_count=1,
metrics= [accuracy],
# remove the TestCallback when you are done testing your pipeline. Having it here will make the pipeline run for a very short time.
callbacks=[
TestCallback,
],
opt=torch.optim.Adam,
)
op.config.trainer = trainer
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
trainer = op.OpenMLTrainerModule(
experiment_name= "Tiny ImageNet",
data_module=data_module,
verbose=True,
epoch_count=1,
metrics= [accuracy],
# remove the TestCallback when you are done testing your pipeline. Having it here will make the pipeline run for a very short time.
callbacks=[
TestCallback,
],
opt=torch.optim.Adam,
)
op.config.trainer = trainer
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
View information about your run¶
Learning rate and loss plot¶
In [ ]:
Copied!
trainer.plot_loss()
trainer.plot_loss()
In [ ]:
Copied!
trainer.plot_lr()
trainer.plot_lr()
Class labels¶
In [ ]:
Copied!
trainer.model_classes
trainer.model_classes
Model Vizualization¶
- Sometimes you may want to visualize the model. You can either use netron or tensorboard for this purpose.
Netron¶
In [ ]:
Copied!
trainer.export_to_netron()
trainer.export_to_netron()
Tensorboard¶
- By default, openml will log the tensorboard logs in the
tensorboard_logs
directory. You can view the logs by runningtensorboard --logdir tensorboard_logs
in the terminal.
Publish your model to OpenML¶
- This is Optional, but publishing your model to OpenML will allow you to track your experiments and compare them with others.
- Make sure to set your apikey first.
- You can find your apikey on your OpenML account page.
In [ ]:
Copied!
openml.config.apikey = ''
run = op.add_experiment_info_to_run(run=run, trainer=trainer)
run.publish()
openml.config.apikey = ''
run = op.add_experiment_info_to_run(run=run, trainer=trainer)
run.publish()