# Import librariesimportopenmlimporttorchimportnumpyasnpfromsklearn.model_selectionimporttrain_test_splitfromtypingimportAnyfromtqdmimporttqdmfromopenml_pytorchimportGenericDataset# Get dataset by ID and split into train and testdataset=openml.datasets.get_dataset(20)X,y,_,_=dataset.get_data(target=dataset.default_target_attribute)X=X.to_numpy(dtype=np.float32)y=y.to_numpy(dtype=np.int64)X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=1,stratify=y)# Dataloadersds_train=GenericDataset(X_train,y_train)ds_test=GenericDataset(X_test,y_test)dataloader_train=torch.utils.data.DataLoader(ds_train,batch_size=64,shuffle=True)dataloader_test=torch.utils.data.DataLoader(ds_test,batch_size=64,shuffle=False)# Model DefinitionclassTabularClassificationModel(torch.nn.Module):def__init__(self,input_size,output_size):super(TabularClassificationModel,self).__init__()self.fc1=torch.nn.Linear(input_size,128)self.fc2=torch.nn.Linear(128,64)self.fc3=torch.nn.Linear(64,output_size)self.relu=torch.nn.ReLU()self.softmax=torch.nn.Softmax(dim=1)defforward(self,x):x=self.fc1(x)x=self.relu(x)x=self.fc2(x)x=self.relu(x)x=self.fc3(x)x=self.softmax(x)returnx# Train the model. Feel free to replace this with your own training pipeline. trainer=BasicTrainer(model=TabularClassificationModel(X_train.shape[1],len(np.unique(y_train))),loss_fn=torch.nn.CrossEntropyLoss(),opt=torch.optim.Adam,dataloader_train=dataloader_train,dataloader_test=dataloader_test,device=torch.device("mps"))trainer.fit(10)
# openml importsimportopenmlimportopenml_pytorchasopfromopenml_pytorch.callbacksimportTestCallbackfromopenml_pytorch.metricsimportaccuracyfromopenml_pytorch.trainerimportconvert_to_rgb# pytorch importsfromtorch.utils.tensorboard.writerimportSummaryWriterfromtorchvision.transformsimportCompose,Resize,ToPILImage,ToTensor,Lambdaimporttorchvision# other importsimportloggingimportwarnings# set up loggingopenml.config.logger.setLevel(logging.DEBUG)op.config.logger.setLevel(logging.DEBUG)warnings.simplefilter(action='ignore')## Data### Define image transformationstransform=Compose([ToPILImage(),# Convert tensor to PIL Image to ensure PIL Image operations can be applied.Lambda(convert_to_rgb),# Convert PIL Image to RGB if it's not already.Resize((64,64)),# Resize the image.ToTensor(),# Convert the PIL Image back to a tensor.])### Configure the Data Module and Choose a Task"""- Make sure the data is present in the `file_dir` directory, and the `filename_col` is correctly set along with this column correctly pointing to where your data is stored. """data_module=op.OpenMLDataModule(type_of_data="image",file_dir="datasets",filename_col="image_path",target_mode="categorical",target_column="label",batch_size=64,transform=transform,)# Download the OpenML task for tiniest imagenettask=openml.tasks.get_task(363295)## Modelmodel=torchvision.models.resnet18(num_classes=200)## Train your model on the data#- Note that by default, OpenML runs a 10 fold cross validation on the data. You cannot change this for now.importtorchtrainer=op.OpenMLTrainerModule(experiment_name="Tiny ImageNet",data_module=data_module,verbose=True,epoch_count=2,metrics=[accuracy],# remove the TestCallback when you are done testing your pipeline. Having it here will make the pipeline run for a very short time.callbacks=[# TestCallback,],opt=torch.optim.Adam,)op.config.trainer=trainerrun=openml.runs.run_model_on_task(model,task,avoid_duplicate_runs=False)## View information about your run### Learning rate and loss plottrainer.plot_loss()trainer.plot_lr()trainer.plot_all_metrics()### Class labelstrainer.model_classes## Model Vizualization#- Sometimes you may want to visualize the model. You can either use netron or tensorboard for this purpose.### Netrontrainer.export_to_netron()### Tensorboard"""- By default, openml will log the tensorboard logs in the `tensorboard_logs` directory. You can view the logs by running `tensorboard --logdir tensorboard_logs` in the terminal."""## Publish your model to OpenML"""- This is Optional, but publishing your model to OpenML will allow you to track your experiments and compare them with others.- Make sure to set your apikey first. - You can find your apikey on your OpenML account page."""trainer.plot_all_metrics()openml.config.apikey=''run=op.add_experiment_info_to_run(run=run,trainer=trainer)run.publish()