Basic tutorial

from IPython.display import display, HTML, Markdown
import os
import yaml
with open("../../../mkdocs.yml", "r") as f:
    load_config = yaml.safe_load(f)
repo_url = load_config["repo_url"].replace("https://github.com/", "")
binder_url = load_config["binder_url"]
relative_file_path = "integrations/Scikit-learn/basic_tutorial.ipynb"
display(HTML(f"""<a href="https://colab.research.google.com/github/{repo_url}/{relative_file_path}" target="_blank">
<img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg"/>
</a>"""))
display(Markdown("[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/SubhadityaMukherjee/openml_docs/HEAD?labpath=Scikit-learn%2Fdatasets_tutorial)"))
!pip install openml
import openml
from sklearn import impute, tree, pipeline
openml.config.start_using_configuration_for_example()
/Users/eragon/.pyenv/versions/3.9.19/envs/openml/lib/python3.9/site-packages/openml/config.py:184: UserWarning: Switching to the test server https://test.openml.org/api/v1/xml to not upload results to the live server. Using the test server may result in reduced performance of the API!
  warnings.warn(

# Define a scikit-learn classifier or pipeline
clf = pipeline.Pipeline(
    steps=[
        ('imputer', impute.SimpleImputer()),
        ('estimator', tree.DecisionTreeClassifier())
    ]
)
# Download the OpenML task for the pendigits dataset with 10-fold
# cross-validation.
task = openml.tasks.get_task(32)
task
OpenML Classification Task
==========================
Task Type Description: https://test.openml.org/tt/TaskType.SUPERVISED_CLASSIFICATION
Task ID..............: 32
Task URL.............: https://test.openml.org/t/32
Estimation Procedure.: crossvalidation
Target Feature.......: class
# of Classes.........: 10
Cost Matrix..........: Available
# Run the scikit-learn model on the task.
run = openml.runs.run_model_on_task(clf, task)
# Publish the experiment on OpenML (optional, requires an API key.
# You can get your own API key by signing up to OpenML.org)
run.publish()
print(f'View the run online: {run.openml_url}')