OpenMLDataFeature(index: int, name: str, data_type: str, nominal_values: list[str], number_missing_values: int, ontologies: list[str] | None = None)
Data Feature (a.k.a. Attribute) object.
Parameters
index : int
The index of this feature
name : str
Name of the feature
data_type : str
can be nominal, numeric, string, date (corresponds to arff)
nominal_values : list(str)
list of the possible values, in case of nominal attribute
number_missing_values : int
Number of rows that have a missing value for this feature.
ontologies : list(str)
list of ontologies attached to this feature. An ontology describes the
concept that are described in a feature. An ontology is defined by an
URL where the information is provided.
Source code in openml/datasets/data_feature.py
| def __init__( # noqa: PLR0913
self,
index: int,
name: str,
data_type: str,
nominal_values: list[str],
number_missing_values: int,
ontologies: list[str] | None = None,
):
if not isinstance(index, int):
raise TypeError(f"Index must be `int` but is {type(index)}")
if data_type not in self.LEGAL_DATA_TYPES:
raise ValueError(
f"data type should be in {self.LEGAL_DATA_TYPES!s}, found: {data_type}",
)
if data_type == "nominal":
if nominal_values is None:
raise TypeError(
"Dataset features require attribute `nominal_values` for nominal "
"feature type.",
)
if not isinstance(nominal_values, list):
raise TypeError(
"Argument `nominal_values` is of wrong datatype, should be list, "
f"but is {type(nominal_values)}",
)
elif nominal_values is not None:
raise TypeError("Argument `nominal_values` must be None for non-nominal feature.")
if not isinstance(number_missing_values, int):
msg = f"number_missing_values must be int but is {type(number_missing_values)}"
raise TypeError(msg)
self.index = index
self.name = str(name)
self.data_type = str(data_type)
self.nominal_values = nominal_values
self.number_missing_values = number_missing_values
self.ontologies = ontologies
|