task

openml.tasks.task #

OpenMLClassificationTask #

OpenMLClassificationTask(task_type_id: TaskType, task_type: str, data_set_id: int, target_name: str, estimation_procedure_id: int = 1, estimation_procedure_type: str | None = None, estimation_parameters: dict[str, str] | None = None, evaluation_measure: str | None = None, data_splits_url: str | None = None, task_id: int | None = None, class_labels: list[str] | None = None, cost_matrix: ndarray | None = None)

Bases: OpenMLSupervisedTask

OpenML Classification object.

Parameters#

task_type_id : TaskType ID of the Classification task type. task_type : str Name of the Classification task type. data_set_id : int ID of the OpenML dataset associated with the Classification task. target_name : str Name of the target variable. estimation_procedure_id : int, default=None ID of the estimation procedure for the Classification task. estimation_procedure_type : str, default=None Type of the estimation procedure. estimation_parameters : dict, default=None Estimation parameters for the Classification task. evaluation_measure : str, default=None Name of the evaluation measure. data_splits_url : str, default=None URL of the data splits for the Classification task. task_id : Union[int, None] ID of the Classification task (if it already exists on OpenML). class_labels : List of str, default=None A list of class labels (for classification tasks). cost_matrix : array, default=None A cost matrix (for classification tasks).

Source code in openml/tasks/task.py

def __init__(  # noqa: PLR0913
    self,
    task_type_id: TaskType,
    task_type: str,
    data_set_id: int,
    target_name: str,
    estimation_procedure_id: int = 1,
    estimation_procedure_type: str | None = None,
    estimation_parameters: dict[str, str] | None = None,
    evaluation_measure: str | None = None,
    data_splits_url: str | None = None,
    task_id: int | None = None,
    class_labels: list[str] | None = None,
    cost_matrix: np.ndarray | None = None,
):
    super().__init__(
        task_id=task_id,
        task_type_id=task_type_id,
        task_type=task_type,
        data_set_id=data_set_id,
        estimation_procedure_id=estimation_procedure_id,
        estimation_procedure_type=estimation_procedure_type,
        estimation_parameters=estimation_parameters,
        evaluation_measure=evaluation_measure,
        target_name=target_name,
        data_splits_url=data_splits_url,
    )
    self.class_labels = class_labels
    self.cost_matrix = cost_matrix

    if cost_matrix is not None:
        raise NotImplementedError("Costmatrix")

estimation_parameters `property` `writable` #

estimation_parameters: dict[str, str] | None

Return the estimation parameters for the task.

id `property` #

id: int | None

Return the OpenML ID of this task.

openml_url `property` #

openml_url: str | None

The URL of the object on the server, if it was uploaded, else None.

download_split #

download_split() -> OpenMLSplit

Download the OpenML split for a given task.

Source code in openml/tasks/task.py

def download_split(self) -> OpenMLSplit:
    """Download the OpenML split for a given task."""
    # TODO(eddiebergman): Can this every be `None`?
    assert self.task_id is not None
    cache_dir = _create_cache_directory_for_id("tasks", self.task_id)
    cached_split_file = cache_dir / "datasplits.arff"

    try:
        split = OpenMLSplit._from_arff_file(cached_split_file)
    except OSError:
        # Next, download and cache the associated split file
        self._download_split(cached_split_file)
        split = OpenMLSplit._from_arff_file(cached_split_file)

    return split

get_X_and_y #

get_X_and_y() -> tuple[DataFrame, Series | DataFrame | None]

Get data associated with the current task.

Returns#

tuple - X and y

Source code in openml/tasks/task.py

def get_X_and_y(self) -> tuple[pd.DataFrame, pd.Series | pd.DataFrame | None]:
    """Get data associated with the current task.

    Returns
    -------
    tuple - X and y

    """
    dataset = self.get_dataset()
    if self.task_type_id not in (
        TaskType.SUPERVISED_CLASSIFICATION,
        TaskType.SUPERVISED_REGRESSION,
        TaskType.LEARNING_CURVE,
    ):
        raise NotImplementedError(self.task_type)

    X, y, _, _ = dataset.get_data(target=self.target_name)
    return X, y

get_dataset #

get_dataset(**kwargs: Any) -> OpenMLDataset

Download dataset associated with task.

Accepts the same keyword arguments as the openml.datasets.get_dataset.

Source code in openml/tasks/task.py

def get_dataset(self, **kwargs: Any) -> datasets.OpenMLDataset:
    """Download dataset associated with task.

    Accepts the same keyword arguments as the `openml.datasets.get_dataset`.
    """
    return datasets.get_dataset(self.dataset_id, **kwargs)

get_split_dimensions #

get_split_dimensions() -> tuple[int, int, int]

Get the (repeats, folds, samples) of the split for a given task.

Source code in openml/tasks/task.py

def get_split_dimensions(self) -> tuple[int, int, int]:
    """Get the (repeats, folds, samples) of the split for a given task."""
    if self.split is None:
        self.split = self.download_split()

    return self.split.repeats, self.split.folds, self.split.samples

get_train_test_split_indices #

get_train_test_split_indices(fold: int = 0, repeat: int = 0, sample: int = 0) -> tuple[ndarray, ndarray]

Get the indices of the train and test splits for a given task.

Source code in openml/tasks/task.py

def get_train_test_split_indices(
    self,
    fold: int = 0,
    repeat: int = 0,
    sample: int = 0,
) -> tuple[np.ndarray, np.ndarray]:
    """Get the indices of the train and test splits for a given task."""
    # Replace with retrieve from cache
    if self.split is None:
        self.split = self.download_split()

    return self.split.get(repeat=repeat, fold=fold, sample=sample)

open_in_browser #

open_in_browser() -> None

Opens the OpenML web page corresponding to this object in your default browser.

Source code in openml/base.py

def open_in_browser(self) -> None:
    """Opens the OpenML web page corresponding to this object in your default browser."""
    if self.openml_url is None:
        raise ValueError(
            "Cannot open element on OpenML.org when attribute `openml_url` is `None`",
        )

    webbrowser.open(self.openml_url)

publish #

publish() -> OpenMLBase

Publish the object on the OpenML server.

Source code in openml/base.py

def publish(self) -> OpenMLBase:
    """Publish the object on the OpenML server."""
    file_elements = self._get_file_elements()

    if "description" not in file_elements:
        file_elements["description"] = self._to_xml()

    call = f"{_get_rest_api_type_alias(self)}/"
    response_text = openml._api_calls._perform_api_call(
        call,
        "post",
        file_elements=file_elements,
    )
    xml_response = xmltodict.parse(response_text)

    self._parse_publish_response(xml_response)
    return self

push_tag #

push_tag(tag: str) -> None

Annotates this entity with a tag on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def push_tag(self, tag: str) -> None:
    """Annotates this entity with a tag on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag)

remove_tag #

remove_tag(tag: str) -> None

Removes a tag from this entity on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def remove_tag(self, tag: str) -> None:
    """Removes a tag from this entity on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag, untag=True)

url_for_id `classmethod` #

url_for_id(id_: int) -> str

Return the OpenML URL for the object of the class entity with the given id.

Source code in openml/base.py

@classmethod
def url_for_id(cls, id_: int) -> str:
    """Return the OpenML URL for the object of the class entity with the given id."""
    # Sample url for a flow: openml.org/f/123
    return f"{openml.config.get_server_base_url()}/{cls._entity_letter()}/{id_}"

OpenMLClusteringTask #

OpenMLClusteringTask(task_type_id: TaskType, task_type: str, data_set_id: int, estimation_procedure_id: int = 17, task_id: int | None = None, estimation_procedure_type: str | None = None, estimation_parameters: dict[str, str] | None = None, data_splits_url: str | None = None, evaluation_measure: str | None = None, target_name: str | None = None)

Bases: OpenMLTask

OpenML Clustering object.

Parameters#

task_type_id : TaskType Task type ID of the OpenML clustering task. task_type : str Task type of the OpenML clustering task. data_set_id : int ID of the OpenML dataset used in clustering the task. estimation_procedure_id : int, default=None ID of the OpenML estimation procedure. task_id : Union[int, None] ID of the OpenML clustering task. estimation_procedure_type : str, default=None Type of the OpenML estimation procedure used in the clustering task. estimation_parameters : dict, default=None Parameters used by the OpenML estimation procedure. data_splits_url : str, default=None URL of the OpenML data splits for the clustering task. evaluation_measure : str, default=None Evaluation measure used in the clustering task. target_name : str, default=None Name of the target feature (class) that is not part of the feature set for the clustering task.

Source code in openml/tasks/task.py

def __init__(  # noqa: PLR0913
    self,
    task_type_id: TaskType,
    task_type: str,
    data_set_id: int,
    estimation_procedure_id: int = 17,
    task_id: int | None = None,
    estimation_procedure_type: str | None = None,
    estimation_parameters: dict[str, str] | None = None,
    data_splits_url: str | None = None,
    evaluation_measure: str | None = None,
    target_name: str | None = None,
):
    super().__init__(
        task_id=task_id,
        task_type_id=task_type_id,
        task_type=task_type,
        data_set_id=data_set_id,
        evaluation_measure=evaluation_measure,
        estimation_procedure_id=estimation_procedure_id,
        estimation_procedure_type=estimation_procedure_type,
        estimation_parameters=estimation_parameters,
        data_splits_url=data_splits_url,
    )

    self.target_name = target_name

id `property` #

id: int | None

Return the OpenML ID of this task.

openml_url `property` #

openml_url: str | None

The URL of the object on the server, if it was uploaded, else None.

download_split #

download_split() -> OpenMLSplit

Download the OpenML split for a given task.

Source code in openml/tasks/task.py

def download_split(self) -> OpenMLSplit:
    """Download the OpenML split for a given task."""
    # TODO(eddiebergman): Can this every be `None`?
    assert self.task_id is not None
    cache_dir = _create_cache_directory_for_id("tasks", self.task_id)
    cached_split_file = cache_dir / "datasplits.arff"

    try:
        split = OpenMLSplit._from_arff_file(cached_split_file)
    except OSError:
        # Next, download and cache the associated split file
        self._download_split(cached_split_file)
        split = OpenMLSplit._from_arff_file(cached_split_file)

    return split

get_X #

get_X() -> DataFrame

Get data associated with the current task.

Returns#

The X data as a dataframe

Source code in openml/tasks/task.py

def get_X(self) -> pd.DataFrame:
    """Get data associated with the current task.

    Returns
    -------
    The X data as a dataframe
    """
    dataset = self.get_dataset()
    data, *_ = dataset.get_data(target=None)
    return data

get_dataset #

get_dataset(**kwargs: Any) -> OpenMLDataset

Download dataset associated with task.

Accepts the same keyword arguments as the openml.datasets.get_dataset.

Source code in openml/tasks/task.py

def get_dataset(self, **kwargs: Any) -> datasets.OpenMLDataset:
    """Download dataset associated with task.

    Accepts the same keyword arguments as the `openml.datasets.get_dataset`.
    """
    return datasets.get_dataset(self.dataset_id, **kwargs)

get_split_dimensions #

get_split_dimensions() -> tuple[int, int, int]

Get the (repeats, folds, samples) of the split for a given task.

Source code in openml/tasks/task.py

def get_split_dimensions(self) -> tuple[int, int, int]:
    """Get the (repeats, folds, samples) of the split for a given task."""
    if self.split is None:
        self.split = self.download_split()

    return self.split.repeats, self.split.folds, self.split.samples

get_train_test_split_indices #

get_train_test_split_indices(fold: int = 0, repeat: int = 0, sample: int = 0) -> tuple[ndarray, ndarray]

Get the indices of the train and test splits for a given task.

Source code in openml/tasks/task.py

def get_train_test_split_indices(
    self,
    fold: int = 0,
    repeat: int = 0,
    sample: int = 0,
) -> tuple[np.ndarray, np.ndarray]:
    """Get the indices of the train and test splits for a given task."""
    # Replace with retrieve from cache
    if self.split is None:
        self.split = self.download_split()

    return self.split.get(repeat=repeat, fold=fold, sample=sample)

open_in_browser #

open_in_browser() -> None

Opens the OpenML web page corresponding to this object in your default browser.

Source code in openml/base.py

def open_in_browser(self) -> None:
    """Opens the OpenML web page corresponding to this object in your default browser."""
    if self.openml_url is None:
        raise ValueError(
            "Cannot open element on OpenML.org when attribute `openml_url` is `None`",
        )

    webbrowser.open(self.openml_url)

publish #

publish() -> OpenMLBase

Publish the object on the OpenML server.

Source code in openml/base.py

def publish(self) -> OpenMLBase:
    """Publish the object on the OpenML server."""
    file_elements = self._get_file_elements()

    if "description" not in file_elements:
        file_elements["description"] = self._to_xml()

    call = f"{_get_rest_api_type_alias(self)}/"
    response_text = openml._api_calls._perform_api_call(
        call,
        "post",
        file_elements=file_elements,
    )
    xml_response = xmltodict.parse(response_text)

    self._parse_publish_response(xml_response)
    return self

push_tag #

push_tag(tag: str) -> None

Annotates this entity with a tag on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def push_tag(self, tag: str) -> None:
    """Annotates this entity with a tag on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag)

remove_tag #

remove_tag(tag: str) -> None

Removes a tag from this entity on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def remove_tag(self, tag: str) -> None:
    """Removes a tag from this entity on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag, untag=True)

url_for_id `classmethod` #

url_for_id(id_: int) -> str

Return the OpenML URL for the object of the class entity with the given id.

Source code in openml/base.py

@classmethod
def url_for_id(cls, id_: int) -> str:
    """Return the OpenML URL for the object of the class entity with the given id."""
    # Sample url for a flow: openml.org/f/123
    return f"{openml.config.get_server_base_url()}/{cls._entity_letter()}/{id_}"

OpenMLLearningCurveTask #

OpenMLLearningCurveTask(task_type_id: TaskType, task_type: str, data_set_id: int, target_name: str, estimation_procedure_id: int = 13, estimation_procedure_type: str | None = None, estimation_parameters: dict[str, str] | None = None, data_splits_url: str | None = None, task_id: int | None = None, evaluation_measure: str | None = None, class_labels: list[str] | None = None, cost_matrix: ndarray | None = None)

Bases: OpenMLClassificationTask

OpenML Learning Curve object.

Parameters#

task_type_id : TaskType ID of the Learning Curve task. task_type : str Name of the Learning Curve task. data_set_id : int ID of the dataset that this task is associated with. target_name : str Name of the target feature in the dataset. estimation_procedure_id : int, default=None ID of the estimation procedure to use for evaluating models. estimation_procedure_type : str, default=None Type of the estimation procedure. estimation_parameters : dict, default=None Additional parameters for the estimation procedure. data_splits_url : str, default=None URL of the file containing the data splits for Learning Curve task. task_id : Union[int, None] ID of the Learning Curve task. evaluation_measure : str, default=None Name of the evaluation measure to use for evaluating models. class_labels : list of str, default=None Class labels for Learning Curve tasks. cost_matrix : numpy array, default=None Cost matrix for Learning Curve tasks.

Source code in openml/tasks/task.py

def __init__(  # noqa: PLR0913
    self,
    task_type_id: TaskType,
    task_type: str,
    data_set_id: int,
    target_name: str,
    estimation_procedure_id: int = 13,
    estimation_procedure_type: str | None = None,
    estimation_parameters: dict[str, str] | None = None,
    data_splits_url: str | None = None,
    task_id: int | None = None,
    evaluation_measure: str | None = None,
    class_labels: list[str] | None = None,
    cost_matrix: np.ndarray | None = None,
):
    super().__init__(
        task_id=task_id,
        task_type_id=task_type_id,
        task_type=task_type,
        data_set_id=data_set_id,
        estimation_procedure_id=estimation_procedure_id,
        estimation_procedure_type=estimation_procedure_type,
        estimation_parameters=estimation_parameters,
        evaluation_measure=evaluation_measure,
        target_name=target_name,
        data_splits_url=data_splits_url,
        class_labels=class_labels,
        cost_matrix=cost_matrix,
    )

estimation_parameters `property` `writable` #

estimation_parameters: dict[str, str] | None

Return the estimation parameters for the task.

id `property` #

id: int | None

Return the OpenML ID of this task.

openml_url `property` #

openml_url: str | None

The URL of the object on the server, if it was uploaded, else None.

download_split #

download_split() -> OpenMLSplit

Download the OpenML split for a given task.

Source code in openml/tasks/task.py

def download_split(self) -> OpenMLSplit:
    """Download the OpenML split for a given task."""
    # TODO(eddiebergman): Can this every be `None`?
    assert self.task_id is not None
    cache_dir = _create_cache_directory_for_id("tasks", self.task_id)
    cached_split_file = cache_dir / "datasplits.arff"

    try:
        split = OpenMLSplit._from_arff_file(cached_split_file)
    except OSError:
        # Next, download and cache the associated split file
        self._download_split(cached_split_file)
        split = OpenMLSplit._from_arff_file(cached_split_file)

    return split

get_X_and_y #

get_X_and_y() -> tuple[DataFrame, Series | DataFrame | None]

Get data associated with the current task.

Returns#

tuple - X and y

Source code in openml/tasks/task.py

def get_X_and_y(self) -> tuple[pd.DataFrame, pd.Series | pd.DataFrame | None]:
    """Get data associated with the current task.

    Returns
    -------
    tuple - X and y

    """
    dataset = self.get_dataset()
    if self.task_type_id not in (
        TaskType.SUPERVISED_CLASSIFICATION,
        TaskType.SUPERVISED_REGRESSION,
        TaskType.LEARNING_CURVE,
    ):
        raise NotImplementedError(self.task_type)

    X, y, _, _ = dataset.get_data(target=self.target_name)
    return X, y

get_dataset #

get_dataset(**kwargs: Any) -> OpenMLDataset

Download dataset associated with task.

Accepts the same keyword arguments as the openml.datasets.get_dataset.

Source code in openml/tasks/task.py

def get_dataset(self, **kwargs: Any) -> datasets.OpenMLDataset:
    """Download dataset associated with task.

    Accepts the same keyword arguments as the `openml.datasets.get_dataset`.
    """
    return datasets.get_dataset(self.dataset_id, **kwargs)

get_split_dimensions #

get_split_dimensions() -> tuple[int, int, int]

Get the (repeats, folds, samples) of the split for a given task.

Source code in openml/tasks/task.py

def get_split_dimensions(self) -> tuple[int, int, int]:
    """Get the (repeats, folds, samples) of the split for a given task."""
    if self.split is None:
        self.split = self.download_split()

    return self.split.repeats, self.split.folds, self.split.samples

get_train_test_split_indices #

get_train_test_split_indices(fold: int = 0, repeat: int = 0, sample: int = 0) -> tuple[ndarray, ndarray]

Get the indices of the train and test splits for a given task.

Source code in openml/tasks/task.py

def get_train_test_split_indices(
    self,
    fold: int = 0,
    repeat: int = 0,
    sample: int = 0,
) -> tuple[np.ndarray, np.ndarray]:
    """Get the indices of the train and test splits for a given task."""
    # Replace with retrieve from cache
    if self.split is None:
        self.split = self.download_split()

    return self.split.get(repeat=repeat, fold=fold, sample=sample)

open_in_browser #

open_in_browser() -> None

Opens the OpenML web page corresponding to this object in your default browser.

Source code in openml/base.py

def open_in_browser(self) -> None:
    """Opens the OpenML web page corresponding to this object in your default browser."""
    if self.openml_url is None:
        raise ValueError(
            "Cannot open element on OpenML.org when attribute `openml_url` is `None`",
        )

    webbrowser.open(self.openml_url)

publish #

publish() -> OpenMLBase

Publish the object on the OpenML server.

Source code in openml/base.py

def publish(self) -> OpenMLBase:
    """Publish the object on the OpenML server."""
    file_elements = self._get_file_elements()

    if "description" not in file_elements:
        file_elements["description"] = self._to_xml()

    call = f"{_get_rest_api_type_alias(self)}/"
    response_text = openml._api_calls._perform_api_call(
        call,
        "post",
        file_elements=file_elements,
    )
    xml_response = xmltodict.parse(response_text)

    self._parse_publish_response(xml_response)
    return self

push_tag #

push_tag(tag: str) -> None

Annotates this entity with a tag on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def push_tag(self, tag: str) -> None:
    """Annotates this entity with a tag on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag)

remove_tag #

remove_tag(tag: str) -> None

Removes a tag from this entity on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def remove_tag(self, tag: str) -> None:
    """Removes a tag from this entity on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag, untag=True)

url_for_id `classmethod` #

url_for_id(id_: int) -> str

Return the OpenML URL for the object of the class entity with the given id.

Source code in openml/base.py

@classmethod
def url_for_id(cls, id_: int) -> str:
    """Return the OpenML URL for the object of the class entity with the given id."""
    # Sample url for a flow: openml.org/f/123
    return f"{openml.config.get_server_base_url()}/{cls._entity_letter()}/{id_}"

OpenMLRegressionTask #

OpenMLRegressionTask(task_type_id: TaskType, task_type: str, data_set_id: int, target_name: str, estimation_procedure_id: int = 7, estimation_procedure_type: str | None = None, estimation_parameters: dict[str, str] | None = None, data_splits_url: str | None = None, task_id: int | None = None, evaluation_measure: str | None = None)

Bases: OpenMLSupervisedTask

OpenML Regression object.

Parameters#

task_type_id : TaskType Task type ID of the OpenML Regression task. task_type : str Task type of the OpenML Regression task. data_set_id : int ID of the OpenML dataset. target_name : str Name of the target feature used in the Regression task. estimation_procedure_id : int, default=None ID of the OpenML estimation procedure. estimation_procedure_type : str, default=None Type of the OpenML estimation procedure. estimation_parameters : dict, default=None Parameters used by the OpenML estimation procedure. data_splits_url : str, default=None URL of the OpenML data splits for the Regression task. task_id : Union[int, None] ID of the OpenML Regression task. evaluation_measure : str, default=None Evaluation measure used in the Regression task.

Source code in openml/tasks/task.py

def __init__(  # noqa: PLR0913
    self,
    task_type_id: TaskType,
    task_type: str,
    data_set_id: int,
    target_name: str,
    estimation_procedure_id: int = 7,
    estimation_procedure_type: str | None = None,
    estimation_parameters: dict[str, str] | None = None,
    data_splits_url: str | None = None,
    task_id: int | None = None,
    evaluation_measure: str | None = None,
):
    super().__init__(
        task_id=task_id,
        task_type_id=task_type_id,
        task_type=task_type,
        data_set_id=data_set_id,
        estimation_procedure_id=estimation_procedure_id,
        estimation_procedure_type=estimation_procedure_type,
        estimation_parameters=estimation_parameters,
        evaluation_measure=evaluation_measure,
        target_name=target_name,
        data_splits_url=data_splits_url,
    )

estimation_parameters `property` `writable` #

estimation_parameters: dict[str, str] | None

Return the estimation parameters for the task.

id `property` #

id: int | None

Return the OpenML ID of this task.

openml_url `property` #

openml_url: str | None

The URL of the object on the server, if it was uploaded, else None.

download_split #

download_split() -> OpenMLSplit

Download the OpenML split for a given task.

Source code in openml/tasks/task.py

def download_split(self) -> OpenMLSplit:
    """Download the OpenML split for a given task."""
    # TODO(eddiebergman): Can this every be `None`?
    assert self.task_id is not None
    cache_dir = _create_cache_directory_for_id("tasks", self.task_id)
    cached_split_file = cache_dir / "datasplits.arff"

    try:
        split = OpenMLSplit._from_arff_file(cached_split_file)
    except OSError:
        # Next, download and cache the associated split file
        self._download_split(cached_split_file)
        split = OpenMLSplit._from_arff_file(cached_split_file)

    return split

get_X_and_y #

get_X_and_y() -> tuple[DataFrame, Series | DataFrame | None]

Get data associated with the current task.

Returns#

tuple - X and y

Source code in openml/tasks/task.py

def get_X_and_y(self) -> tuple[pd.DataFrame, pd.Series | pd.DataFrame | None]:
    """Get data associated with the current task.

    Returns
    -------
    tuple - X and y

    """
    dataset = self.get_dataset()
    if self.task_type_id not in (
        TaskType.SUPERVISED_CLASSIFICATION,
        TaskType.SUPERVISED_REGRESSION,
        TaskType.LEARNING_CURVE,
    ):
        raise NotImplementedError(self.task_type)

    X, y, _, _ = dataset.get_data(target=self.target_name)
    return X, y

get_dataset #

get_dataset(**kwargs: Any) -> OpenMLDataset

Download dataset associated with task.

Accepts the same keyword arguments as the openml.datasets.get_dataset.

Source code in openml/tasks/task.py

def get_dataset(self, **kwargs: Any) -> datasets.OpenMLDataset:
    """Download dataset associated with task.

    Accepts the same keyword arguments as the `openml.datasets.get_dataset`.
    """
    return datasets.get_dataset(self.dataset_id, **kwargs)

get_split_dimensions #

get_split_dimensions() -> tuple[int, int, int]

Get the (repeats, folds, samples) of the split for a given task.

Source code in openml/tasks/task.py

def get_split_dimensions(self) -> tuple[int, int, int]:
    """Get the (repeats, folds, samples) of the split for a given task."""
    if self.split is None:
        self.split = self.download_split()

    return self.split.repeats, self.split.folds, self.split.samples

get_train_test_split_indices #

get_train_test_split_indices(fold: int = 0, repeat: int = 0, sample: int = 0) -> tuple[ndarray, ndarray]

Get the indices of the train and test splits for a given task.

Source code in openml/tasks/task.py

def get_train_test_split_indices(
    self,
    fold: int = 0,
    repeat: int = 0,
    sample: int = 0,
) -> tuple[np.ndarray, np.ndarray]:
    """Get the indices of the train and test splits for a given task."""
    # Replace with retrieve from cache
    if self.split is None:
        self.split = self.download_split()

    return self.split.get(repeat=repeat, fold=fold, sample=sample)

open_in_browser #

open_in_browser() -> None

Opens the OpenML web page corresponding to this object in your default browser.

Source code in openml/base.py

def open_in_browser(self) -> None:
    """Opens the OpenML web page corresponding to this object in your default browser."""
    if self.openml_url is None:
        raise ValueError(
            "Cannot open element on OpenML.org when attribute `openml_url` is `None`",
        )

    webbrowser.open(self.openml_url)

publish #

publish() -> OpenMLBase

Publish the object on the OpenML server.

Source code in openml/base.py

def publish(self) -> OpenMLBase:
    """Publish the object on the OpenML server."""
    file_elements = self._get_file_elements()

    if "description" not in file_elements:
        file_elements["description"] = self._to_xml()

    call = f"{_get_rest_api_type_alias(self)}/"
    response_text = openml._api_calls._perform_api_call(
        call,
        "post",
        file_elements=file_elements,
    )
    xml_response = xmltodict.parse(response_text)

    self._parse_publish_response(xml_response)
    return self

push_tag #

push_tag(tag: str) -> None

Annotates this entity with a tag on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def push_tag(self, tag: str) -> None:
    """Annotates this entity with a tag on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag)

remove_tag #

remove_tag(tag: str) -> None

Removes a tag from this entity on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def remove_tag(self, tag: str) -> None:
    """Removes a tag from this entity on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag, untag=True)

url_for_id `classmethod` #

url_for_id(id_: int) -> str

Return the OpenML URL for the object of the class entity with the given id.

Source code in openml/base.py

@classmethod
def url_for_id(cls, id_: int) -> str:
    """Return the OpenML URL for the object of the class entity with the given id."""
    # Sample url for a flow: openml.org/f/123
    return f"{openml.config.get_server_base_url()}/{cls._entity_letter()}/{id_}"

OpenMLSupervisedTask #

OpenMLSupervisedTask(task_type_id: TaskType, task_type: str, data_set_id: int, target_name: str, estimation_procedure_id: int = 1, estimation_procedure_type: str | None = None, estimation_parameters: dict[str, str] | None = None, evaluation_measure: str | None = None, data_splits_url: str | None = None, task_id: int | None = None)

Bases: OpenMLTask, ABC

OpenML Supervised Classification object.

Parameters#

task_type_id : TaskType ID of the task type. task_type : str Name of the task type. data_set_id : int ID of the OpenML dataset associated with the task. target_name : str Name of the target feature (the class variable). estimation_procedure_id : int, default=None ID of the estimation procedure for the task. estimation_procedure_type : str, default=None Type of the estimation procedure for the task. estimation_parameters : dict, default=None Estimation parameters for the task. evaluation_measure : str, default=None Name of the evaluation measure for the task. data_splits_url : str, default=None URL of the data splits for the task. task_id: Union[int, None] Refers to the unique identifier of task.

Source code in openml/tasks/task.py

def __init__(  # noqa: PLR0913
    self,
    task_type_id: TaskType,
    task_type: str,
    data_set_id: int,
    target_name: str,
    estimation_procedure_id: int = 1,
    estimation_procedure_type: str | None = None,
    estimation_parameters: dict[str, str] | None = None,
    evaluation_measure: str | None = None,
    data_splits_url: str | None = None,
    task_id: int | None = None,
):
    super().__init__(
        task_id=task_id,
        task_type_id=task_type_id,
        task_type=task_type,
        data_set_id=data_set_id,
        estimation_procedure_id=estimation_procedure_id,
        estimation_procedure_type=estimation_procedure_type,
        estimation_parameters=estimation_parameters,
        evaluation_measure=evaluation_measure,
        data_splits_url=data_splits_url,
    )

    self.target_name = target_name

estimation_parameters `property` `writable` #

estimation_parameters: dict[str, str] | None

Return the estimation parameters for the task.

id `property` #

id: int | None

Return the OpenML ID of this task.

openml_url `property` #

openml_url: str | None

The URL of the object on the server, if it was uploaded, else None.

download_split #

download_split() -> OpenMLSplit

Download the OpenML split for a given task.

Source code in openml/tasks/task.py

def download_split(self) -> OpenMLSplit:
    """Download the OpenML split for a given task."""
    # TODO(eddiebergman): Can this every be `None`?
    assert self.task_id is not None
    cache_dir = _create_cache_directory_for_id("tasks", self.task_id)
    cached_split_file = cache_dir / "datasplits.arff"

    try:
        split = OpenMLSplit._from_arff_file(cached_split_file)
    except OSError:
        # Next, download and cache the associated split file
        self._download_split(cached_split_file)
        split = OpenMLSplit._from_arff_file(cached_split_file)

    return split

get_X_and_y #

get_X_and_y() -> tuple[DataFrame, Series | DataFrame | None]

Get data associated with the current task.

Returns#

tuple - X and y

Source code in openml/tasks/task.py

def get_X_and_y(self) -> tuple[pd.DataFrame, pd.Series | pd.DataFrame | None]:
    """Get data associated with the current task.

    Returns
    -------
    tuple - X and y

    """
    dataset = self.get_dataset()
    if self.task_type_id not in (
        TaskType.SUPERVISED_CLASSIFICATION,
        TaskType.SUPERVISED_REGRESSION,
        TaskType.LEARNING_CURVE,
    ):
        raise NotImplementedError(self.task_type)

    X, y, _, _ = dataset.get_data(target=self.target_name)
    return X, y

get_dataset #

get_dataset(**kwargs: Any) -> OpenMLDataset

Download dataset associated with task.

Accepts the same keyword arguments as the openml.datasets.get_dataset.

Source code in openml/tasks/task.py

def get_dataset(self, **kwargs: Any) -> datasets.OpenMLDataset:
    """Download dataset associated with task.

    Accepts the same keyword arguments as the `openml.datasets.get_dataset`.
    """
    return datasets.get_dataset(self.dataset_id, **kwargs)

get_split_dimensions #

get_split_dimensions() -> tuple[int, int, int]

Get the (repeats, folds, samples) of the split for a given task.

Source code in openml/tasks/task.py

def get_split_dimensions(self) -> tuple[int, int, int]:
    """Get the (repeats, folds, samples) of the split for a given task."""
    if self.split is None:
        self.split = self.download_split()

    return self.split.repeats, self.split.folds, self.split.samples

get_train_test_split_indices #

get_train_test_split_indices(fold: int = 0, repeat: int = 0, sample: int = 0) -> tuple[ndarray, ndarray]

Get the indices of the train and test splits for a given task.

Source code in openml/tasks/task.py

def get_train_test_split_indices(
    self,
    fold: int = 0,
    repeat: int = 0,
    sample: int = 0,
) -> tuple[np.ndarray, np.ndarray]:
    """Get the indices of the train and test splits for a given task."""
    # Replace with retrieve from cache
    if self.split is None:
        self.split = self.download_split()

    return self.split.get(repeat=repeat, fold=fold, sample=sample)

open_in_browser #

open_in_browser() -> None

Opens the OpenML web page corresponding to this object in your default browser.

Source code in openml/base.py

def open_in_browser(self) -> None:
    """Opens the OpenML web page corresponding to this object in your default browser."""
    if self.openml_url is None:
        raise ValueError(
            "Cannot open element on OpenML.org when attribute `openml_url` is `None`",
        )

    webbrowser.open(self.openml_url)

publish #

publish() -> OpenMLBase

Publish the object on the OpenML server.

Source code in openml/base.py

def publish(self) -> OpenMLBase:
    """Publish the object on the OpenML server."""
    file_elements = self._get_file_elements()

    if "description" not in file_elements:
        file_elements["description"] = self._to_xml()

    call = f"{_get_rest_api_type_alias(self)}/"
    response_text = openml._api_calls._perform_api_call(
        call,
        "post",
        file_elements=file_elements,
    )
    xml_response = xmltodict.parse(response_text)

    self._parse_publish_response(xml_response)
    return self

push_tag #

push_tag(tag: str) -> None

Annotates this entity with a tag on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def push_tag(self, tag: str) -> None:
    """Annotates this entity with a tag on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag)

remove_tag #

remove_tag(tag: str) -> None

Removes a tag from this entity on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def remove_tag(self, tag: str) -> None:
    """Removes a tag from this entity on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag, untag=True)

url_for_id `classmethod` #

url_for_id(id_: int) -> str

Return the OpenML URL for the object of the class entity with the given id.

Source code in openml/base.py

@classmethod
def url_for_id(cls, id_: int) -> str:
    """Return the OpenML URL for the object of the class entity with the given id."""
    # Sample url for a flow: openml.org/f/123
    return f"{openml.config.get_server_base_url()}/{cls._entity_letter()}/{id_}"

OpenMLTask #

OpenMLTask(task_id: int | None, task_type_id: TaskType, task_type: str, data_set_id: int, estimation_procedure_id: int = 1, estimation_procedure_type: str | None = None, estimation_parameters: dict[str, str] | None = None, evaluation_measure: str | None = None, data_splits_url: str | None = None)

Bases: OpenMLBase

OpenML Task object.

Parameters#

task_id: Union[int, None] Refers to the unique identifier of OpenML task. task_type_id: TaskType Refers to the type of OpenML task. task_type: str Refers to the OpenML task. data_set_id: int Refers to the data. estimation_procedure_id: int Refers to the type of estimates used. estimation_procedure_type: str, default=None Refers to the type of estimation procedure used for the OpenML task. estimation_parameters: [Dict[str, str]], default=None Estimation parameters used for the OpenML task. evaluation_measure: str, default=None Refers to the evaluation measure. data_splits_url: str, default=None Refers to the URL of the data splits used for the OpenML task.

Source code in openml/tasks/task.py

def __init__(  # noqa: PLR0913
    self,
    task_id: int | None,
    task_type_id: TaskType,
    task_type: str,
    data_set_id: int,
    estimation_procedure_id: int = 1,
    estimation_procedure_type: str | None = None,
    estimation_parameters: dict[str, str] | None = None,
    evaluation_measure: str | None = None,
    data_splits_url: str | None = None,
):
    self.task_id = int(task_id) if task_id is not None else None
    self.task_type_id = task_type_id
    self.task_type = task_type
    self.dataset_id = int(data_set_id)
    self.evaluation_measure = evaluation_measure
    self.estimation_procedure: _EstimationProcedure = {
        "type": estimation_procedure_type,
        "parameters": estimation_parameters,
        "data_splits_url": data_splits_url,
    }
    self.estimation_procedure_id = estimation_procedure_id
    self.split: OpenMLSplit | None = None

id `property` #

id: int | None

Return the OpenML ID of this task.

openml_url `property` #

openml_url: str | None

The URL of the object on the server, if it was uploaded, else None.

download_split #

download_split() -> OpenMLSplit

Download the OpenML split for a given task.

Source code in openml/tasks/task.py

def download_split(self) -> OpenMLSplit:
    """Download the OpenML split for a given task."""
    # TODO(eddiebergman): Can this every be `None`?
    assert self.task_id is not None
    cache_dir = _create_cache_directory_for_id("tasks", self.task_id)
    cached_split_file = cache_dir / "datasplits.arff"

    try:
        split = OpenMLSplit._from_arff_file(cached_split_file)
    except OSError:
        # Next, download and cache the associated split file
        self._download_split(cached_split_file)
        split = OpenMLSplit._from_arff_file(cached_split_file)

    return split

get_dataset #

get_dataset(**kwargs: Any) -> OpenMLDataset

Download dataset associated with task.

Accepts the same keyword arguments as the openml.datasets.get_dataset.

Source code in openml/tasks/task.py

def get_dataset(self, **kwargs: Any) -> datasets.OpenMLDataset:
    """Download dataset associated with task.

    Accepts the same keyword arguments as the `openml.datasets.get_dataset`.
    """
    return datasets.get_dataset(self.dataset_id, **kwargs)

get_split_dimensions #

get_split_dimensions() -> tuple[int, int, int]

Get the (repeats, folds, samples) of the split for a given task.

Source code in openml/tasks/task.py

def get_split_dimensions(self) -> tuple[int, int, int]:
    """Get the (repeats, folds, samples) of the split for a given task."""
    if self.split is None:
        self.split = self.download_split()

    return self.split.repeats, self.split.folds, self.split.samples

get_train_test_split_indices #

get_train_test_split_indices(fold: int = 0, repeat: int = 0, sample: int = 0) -> tuple[ndarray, ndarray]

Get the indices of the train and test splits for a given task.

Source code in openml/tasks/task.py

def get_train_test_split_indices(
    self,
    fold: int = 0,
    repeat: int = 0,
    sample: int = 0,
) -> tuple[np.ndarray, np.ndarray]:
    """Get the indices of the train and test splits for a given task."""
    # Replace with retrieve from cache
    if self.split is None:
        self.split = self.download_split()

    return self.split.get(repeat=repeat, fold=fold, sample=sample)

open_in_browser #

open_in_browser() -> None

Opens the OpenML web page corresponding to this object in your default browser.

Source code in openml/base.py

def open_in_browser(self) -> None:
    """Opens the OpenML web page corresponding to this object in your default browser."""
    if self.openml_url is None:
        raise ValueError(
            "Cannot open element on OpenML.org when attribute `openml_url` is `None`",
        )

    webbrowser.open(self.openml_url)

publish #

publish() -> OpenMLBase

Publish the object on the OpenML server.

Source code in openml/base.py

def publish(self) -> OpenMLBase:
    """Publish the object on the OpenML server."""
    file_elements = self._get_file_elements()

    if "description" not in file_elements:
        file_elements["description"] = self._to_xml()

    call = f"{_get_rest_api_type_alias(self)}/"
    response_text = openml._api_calls._perform_api_call(
        call,
        "post",
        file_elements=file_elements,
    )
    xml_response = xmltodict.parse(response_text)

    self._parse_publish_response(xml_response)
    return self

push_tag #

push_tag(tag: str) -> None

Annotates this entity with a tag on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def push_tag(self, tag: str) -> None:
    """Annotates this entity with a tag on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag)

remove_tag #

remove_tag(tag: str) -> None

Removes a tag from this entity on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py

def remove_tag(self, tag: str) -> None:
    """Removes a tag from this entity on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag, untag=True)

url_for_id `classmethod` #

url_for_id(id_: int) -> str

Return the OpenML URL for the object of the class entity with the given id.

Source code in openml/base.py

@classmethod
def url_for_id(cls, id_: int) -> str:
    """Return the OpenML URL for the object of the class entity with the given id."""
    # Sample url for a flow: openml.org/f/123
    return f"{openml.config.get_server_base_url()}/{cls._entity_letter()}/{id_}"

TaskType #

Bases: Enum

Possible task types as defined in OpenML.

task

openml.tasks.task #

OpenMLClassificationTask #

Parameters#

estimation_parameters property writable #

id property #

openml_url property #

download_split #

get_X_and_y #

Returns#

get_dataset #

get_split_dimensions #

get_train_test_split_indices #

open_in_browser #

publish #

push_tag #

Parameters#

remove_tag #

Parameters#

url_for_id classmethod #

OpenMLClusteringTask #

Parameters#

id property #

openml_url property #

download_split #

get_X #

Returns#

get_dataset #

get_split_dimensions #

get_train_test_split_indices #

open_in_browser #

publish #

push_tag #

Parameters#

remove_tag #

Parameters#

url_for_id classmethod #

OpenMLLearningCurveTask #

Parameters#

estimation_parameters property writable #

id property #

openml_url property #

download_split #

get_X_and_y #

Returns#

get_dataset #

get_split_dimensions #

get_train_test_split_indices #

open_in_browser #

publish #

push_tag #

Parameters#

remove_tag #

Parameters#

url_for_id classmethod #

OpenMLRegressionTask #

Parameters#

estimation_parameters property writable #

id property #

openml_url property #

download_split #

get_X_and_y #

Returns#

get_dataset #

get_split_dimensions #

get_train_test_split_indices #

open_in_browser #

publish #

push_tag #

Parameters#

remove_tag #

Parameters#

url_for_id classmethod #

OpenMLSupervisedTask #

Parameters#

estimation_parameters property writable #

id property #

openml_url property #

download_split #

get_X_and_y #

estimation_parameters `property` `writable` #

id `property` #

openml_url `property` #

url_for_id `classmethod` #

id `property` #

openml_url `property` #

url_for_id `classmethod` #

estimation_parameters `property` `writable` #

id `property` #

openml_url `property` #

url_for_id `classmethod` #

estimation_parameters `property` `writable` #

id `property` #

openml_url `property` #

url_for_id `classmethod` #

estimation_parameters `property` `writable` #

id `property` #

openml_url `property` #

url_for_id `classmethod` #

id `property` #

openml_url `property` #

url_for_id `classmethod` #