Use a Custom Loss Function¶
In [ ]:
Copied!
# openml imports
import openml
import openml_pytorch as op
from openml_pytorch.callbacks import TestCallback
from openml_pytorch.metrics import accuracy
from openml_pytorch.trainer import convert_to_rgb
# pytorch imports
from torch.utils.tensorboard.writer import SummaryWriter
from torchvision.transforms import Compose, Resize, ToPILImage, ToTensor, Lambda
import torchvision
# other imports
import logging
import warnings
# set up logging
openml.config.logger.setLevel(logging.DEBUG)
op.config.logger.setLevel(logging.DEBUG)
warnings.simplefilter(action='ignore')
# openml imports
import openml
import openml_pytorch as op
from openml_pytorch.callbacks import TestCallback
from openml_pytorch.metrics import accuracy
from openml_pytorch.trainer import convert_to_rgb
# pytorch imports
from torch.utils.tensorboard.writer import SummaryWriter
from torchvision.transforms import Compose, Resize, ToPILImage, ToTensor, Lambda
import torchvision
# other imports
import logging
import warnings
# set up logging
openml.config.logger.setLevel(logging.DEBUG)
op.config.logger.setLevel(logging.DEBUG)
warnings.simplefilter(action='ignore')
Data¶
Define image transformations¶
In [ ]:
Copied!
transform = Compose(
[
ToPILImage(), # Convert tensor to PIL Image to ensure PIL Image operations can be applied.
Lambda(convert_to_rgb), # Convert PIL Image to RGB if it's not already.
Resize((64, 64)), # Resize the image.
ToTensor(), # Convert the PIL Image back to a tensor.
]
)
transform = Compose(
[
ToPILImage(), # Convert tensor to PIL Image to ensure PIL Image operations can be applied.
Lambda(convert_to_rgb), # Convert PIL Image to RGB if it's not already.
Resize((64, 64)), # Resize the image.
ToTensor(), # Convert the PIL Image back to a tensor.
]
)
Configure the Data Module and Choose a Task¶
- Make sure the data is present in the
file_dir
directory, and thefilename_col
is correctly set along with this column correctly pointing to where your data is stored.
In [ ]:
Copied!
data_module = op.OpenMLDataModule(
type_of_data="image",
file_dir="datasets",
filename_col="image_path",
target_mode="categorical",
target_column="label",
batch_size=64,
transform=transform,
)
# Download the OpenML task for tiniest imagenet
task = openml.tasks.get_task(363295)
data_module = op.OpenMLDataModule(
type_of_data="image",
file_dir="datasets",
filename_col="image_path",
target_mode="categorical",
target_column="label",
batch_size=64,
transform=transform,
)
# Download the OpenML task for tiniest imagenet
task = openml.tasks.get_task(363295)
Model¶
In [ ]:
Copied!
model = torchvision.models.resnet18(num_classes=200)
model = torchvision.models.resnet18(num_classes=200)
Train your model on the data¶
- Note that by default, OpenML runs a 10 fold cross validation on the data. You cannot change this for now.
- The default Loss function is
CrossEntropyLoss
which is used for classification tasks. You can change this to any other loss function that is available in PyTorch. Note that sometimes you might get errors if the loss function is not compatible with the task you are trying to solve.- If you cant solve it yourself, feel free to create a Github issue
In [ ]:
Copied!
import torch
trainer = op.OpenMLTrainerModule(
experiment_name= "Tiny ImageNet",
data_module=data_module,
verbose=True,
epoch_count=1,
metrics= [accuracy],
# remove the TestCallback when you are done testing your pipeline. Having it here will make the pipeline run for a very short time.
callbacks=[
# TestCallback,
],
loss_fn= torch.nn.CrossEntropyLoss , #NOTE! There is the class, do not call it like torch.nn.CrossEntropyLoss()
)
op.config.trainer = trainer
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
import torch
trainer = op.OpenMLTrainerModule(
experiment_name= "Tiny ImageNet",
data_module=data_module,
verbose=True,
epoch_count=1,
metrics= [accuracy],
# remove the TestCallback when you are done testing your pipeline. Having it here will make the pipeline run for a very short time.
callbacks=[
# TestCallback,
],
loss_fn= torch.nn.CrossEntropyLoss , #NOTE! There is the class, do not call it like torch.nn.CrossEntropyLoss()
)
op.config.trainer = trainer
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
In [ ]:
Copied!
openml.config.apikey = ''
run = op.add_experiment_info_to_run(run=run, trainer=trainer)
run.publish()
openml.config.apikey = ''
run = op.add_experiment_info_to_run(run=run, trainer=trainer)
run.publish()