Tabular classification¶
- Supervised credit-g classification
In [ ]:
Copied!
# openml imports
import openml
import openml_pytorch as op
from openml_pytorch.callbacks import TestCallback
from openml_pytorch.metrics import accuracy
# pytorch imports
import torch
# other imports
import logging
import warnings
# set up logging
openml.config.logger.setLevel(logging.DEBUG)
op.config.logger.setLevel(logging.DEBUG)
warnings.simplefilter(action='ignore')
# openml imports
import openml
import openml_pytorch as op
from openml_pytorch.callbacks import TestCallback
from openml_pytorch.metrics import accuracy
# pytorch imports
import torch
# other imports
import logging
import warnings
# set up logging
openml.config.logger.setLevel(logging.DEBUG)
op.config.logger.setLevel(logging.DEBUG)
warnings.simplefilter(action='ignore')
Data¶
Define image transformations¶
Configure the Data Module and Choose a Task¶
- Make sure the data is present in the
file_dir
directory, and thefilename_col
is correctly set along with this column correctly pointing to where your data is stored.
In [ ]:
Copied!
data_module = op.OpenMLDataModule(
type_of_data="dataframe",
target_column="class",
target_mode="categorical",
)
# supervised credit-g classification
task = openml.tasks.get_task(31)
data_module = op.OpenMLDataModule(
type_of_data="dataframe",
target_column="class",
target_mode="categorical",
)
# supervised credit-g classification
task = openml.tasks.get_task(31)
Model¶
In [ ]:
Copied!
class TabularClassificationmodel(torch.nn.Module):
def __init__(self, input_size, output_size):
super(TabularClassificationmodel, self).__init__()
self.fc1 = torch.nn.Linear(input_size, 128)
self.fc2 = torch.nn.Linear(128, 64)
self.fc3 = torch.nn.Linear(64, output_size)
self.relu = torch.nn.ReLU()
self.softmax = torch.nn.Softmax(dim=1)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = self.softmax(x)
return x
model = TabularClassificationmodel(20, 2)
class TabularClassificationmodel(torch.nn.Module):
def __init__(self, input_size, output_size):
super(TabularClassificationmodel, self).__init__()
self.fc1 = torch.nn.Linear(input_size, 128)
self.fc2 = torch.nn.Linear(128, 64)
self.fc3 = torch.nn.Linear(64, output_size)
self.relu = torch.nn.ReLU()
self.softmax = torch.nn.Softmax(dim=1)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = self.softmax(x)
return x
model = TabularClassificationmodel(20, 2)
Train your model on the data¶
- Note that by default, OpenML runs a 10 fold cross validation on the data. You cannot change this for now.
In [ ]:
Copied!
trainer = op.OpenMLTrainerModule(
experiment_name= "Credit-G",
data_module=data_module,
verbose=True,
epoch_count=2,
metrics= [accuracy],
# remove the TestCallback when you are done testing your pipeline. Having it here will make the pipeline run for a very short time.
callbacks=[
# TestCallback,
],
opt = torch.optim.Adam,
)
op.config.trainer = trainer
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
trainer = op.OpenMLTrainerModule(
experiment_name= "Credit-G",
data_module=data_module,
verbose=True,
epoch_count=2,
metrics= [accuracy],
# remove the TestCallback when you are done testing your pipeline. Having it here will make the pipeline run for a very short time.
callbacks=[
# TestCallback,
],
opt = torch.optim.Adam,
)
op.config.trainer = trainer
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
View information about your run¶
Learning rate and loss plot¶
In [ ]:
Copied!
trainer.plot_loss()
trainer.plot_loss()
In [ ]:
Copied!
trainer.plot_lr()
trainer.plot_lr()
Class labels¶
In [ ]:
Copied!
trainer.model_classes
trainer.model_classes
Model Vizualization¶
- Sometimes you may want to visualize the model. You can either use netron or tensorboard for this purpose.
Netron¶
In [ ]:
Copied!
trainer.export_to_netron()
trainer.export_to_netron()
Tensorboard¶
- By default, openml will log the tensorboard logs in the
tensorboard_logs
directory. You can view the logs by runningtensorboard --logdir tensorboard_logs
in the terminal.
Publish your model to OpenML¶
- This is Optional, but publishing your model to OpenML will allow you to track your experiments and compare them with others.
- Make sure to set your apikey first.
- You can find your apikey on your OpenML account page.
In [ ]:
Copied!
openml.config.apikey = ''
run = op.add_experiment_info_to_run(run=run, trainer=trainer)
run.publish()
openml.config.apikey = ''
run = op.add_experiment_info_to_run(run=run, trainer=trainer)
run.publish()