Skip to content

data_feature

openml.datasets.data_feature #

OpenMLDataFeature #

OpenMLDataFeature(index: int, name: str, data_type: str, nominal_values: list[str], number_missing_values: int, ontologies: list[str] | None = None)

Data Feature (a.k.a. Attribute) object.

Parameters#

index : int The index of this feature name : str Name of the feature data_type : str can be nominal, numeric, string, date (corresponds to arff) nominal_values : list(str) list of the possible values, in case of nominal attribute number_missing_values : int Number of rows that have a missing value for this feature. ontologies : list(str) list of ontologies attached to this feature. An ontology describes the concept that are described in a feature. An ontology is defined by an URL where the information is provided.

Source code in openml/datasets/data_feature.py
def __init__(  # noqa: PLR0913
    self,
    index: int,
    name: str,
    data_type: str,
    nominal_values: list[str],
    number_missing_values: int,
    ontologies: list[str] | None = None,
):
    if not isinstance(index, int):
        raise TypeError(f"Index must be `int` but is {type(index)}")

    if data_type not in self.LEGAL_DATA_TYPES:
        raise ValueError(
            f"data type should be in {self.LEGAL_DATA_TYPES!s}, found: {data_type}",
        )

    if data_type == "nominal":
        if nominal_values is None:
            raise TypeError(
                "Dataset features require attribute `nominal_values` for nominal "
                "feature type.",
            )

        if not isinstance(nominal_values, list):
            raise TypeError(
                "Argument `nominal_values` is of wrong datatype, should be list, "
                f"but is {type(nominal_values)}",
            )
    elif nominal_values is not None:
        raise TypeError("Argument `nominal_values` must be None for non-nominal feature.")

    if not isinstance(number_missing_values, int):
        msg = f"number_missing_values must be int but is {type(number_missing_values)}"
        raise TypeError(msg)

    self.index = index
    self.name = str(name)
    self.data_type = str(data_type)
    self.nominal_values = nominal_values
    self.number_missing_values = number_missing_values
    self.ontologies = ontologies