Skip to content

setups

openml.setups #

OpenMLParameter #

OpenMLParameter(input_id: int, flow_id: int, flow_name: str, full_name: str, parameter_name: str, data_type: str, default_value: str, value: str)

Parameter object (used in setup).

PARAMETER DESCRIPTION
input_id

The input id from the openml database

TYPE: int

flow

The flow to which this parameter is associated

flow

The name of the flow (no version number) to which this parameter is associated

full_name

The name of the flow and parameter combined

TYPE: str

parameter_name

The name of the parameter

TYPE: str

data_type

The datatype of the parameter. generally unused for sklearn flows

TYPE: str

default_value

The default value. For sklearn parameters, this is unknown and a default value is selected arbitrarily

TYPE: str

value

If the parameter was set, the value that it was set to.

TYPE: str

Source code in openml/setups/setup.py
def __init__(  # noqa: PLR0913
    self,
    input_id: int,
    flow_id: int,
    flow_name: str,
    full_name: str,
    parameter_name: str,
    data_type: str,
    default_value: str,
    value: str,
):
    self.id = input_id
    self.flow_id = flow_id
    self.flow_name = flow_name
    self.full_name = full_name
    self.parameter_name = parameter_name
    self.data_type = data_type
    self.default_value = default_value
    self.value = value

OpenMLSetup #

OpenMLSetup(setup_id: int, flow_id: int, parameters: dict[int, Any] | None)

Setup object (a.k.a. Configuration).

PARAMETER DESCRIPTION
setup_id

The OpenML setup id

TYPE: int

flow_id

The flow that it is build upon

TYPE: int

parameters

The setting of the parameters

TYPE: dict

Source code in openml/setups/setup.py
def __init__(self, setup_id: int, flow_id: int, parameters: dict[int, Any] | None):
    if not isinstance(setup_id, int):
        raise ValueError("setup id should be int")

    if not isinstance(flow_id, int):
        raise ValueError("flow id should be int")

    if parameters is not None and not isinstance(parameters, dict):
        raise ValueError("parameters should be dict")

    self.setup_id = setup_id
    self.flow_id = flow_id
    self.parameters = parameters

get_setup #

get_setup(setup_id: int) -> OpenMLSetup

Downloads the setup (configuration) description from OpenML and returns a structured object

PARAMETER DESCRIPTION
setup_id

The Openml setup_id

TYPE: int

RETURNS DESCRIPTION
OpenMLSetup (an initialized openml setup object)
Source code in openml/setups/functions.py
def get_setup(setup_id: int) -> OpenMLSetup:
    """
     Downloads the setup (configuration) description from OpenML
     and returns a structured object

    Parameters
    ----------
    setup_id : int
        The Openml setup_id

    Returns
    -------
    OpenMLSetup (an initialized openml setup object)
    """
    setup_dir = Path(config.get_cache_directory()) / "setups" / str(setup_id)
    setup_dir.mkdir(exist_ok=True, parents=True)

    setup_file = setup_dir / "description.xml"

    try:
        return _get_cached_setup(setup_id)
    except openml.exceptions.OpenMLCacheException:
        url_suffix = f"/setup/{setup_id}"
        setup_xml = openml._api_calls._perform_api_call(url_suffix, "get")
        with setup_file.open("w", encoding="utf8") as fh:
            fh.write(setup_xml)

    result_dict = xmltodict.parse(setup_xml)
    return _create_setup_from_xml(result_dict)

initialize_model #

initialize_model(setup_id: int, *, strict_version: bool = True) -> Any

Initialized a model based on a setup_id (i.e., using the exact same parameter settings)

PARAMETER DESCRIPTION
setup_id

The Openml setup_id

TYPE: int

strict_version

See flow_to_model strict_version.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
model
Source code in openml/setups/functions.py
def initialize_model(setup_id: int, *, strict_version: bool = True) -> Any:
    """
    Initialized a model based on a setup_id (i.e., using the exact
    same parameter settings)

    Parameters
    ----------
    setup_id : int
        The Openml setup_id
    strict_version: bool (default=True)
        See `flow_to_model` strict_version.

    Returns
    -------
    model
    """
    setup = get_setup(setup_id)
    flow = openml.flows.get_flow(setup.flow_id)

    # instead of using scikit-learns or any other library's "set_params" function, we override the
    # OpenMLFlow objects default parameter value so we can utilize the
    # Extension.flow_to_model() function to reinitialize the flow with the set defaults.
    if setup.parameters is not None:
        for hyperparameter in setup.parameters.values():
            structure = flow.get_structure("flow_id")
            if len(structure[hyperparameter.flow_id]) > 0:
                subflow = flow.get_subflow(structure[hyperparameter.flow_id])
            else:
                subflow = flow
            subflow.parameters[hyperparameter.parameter_name] = hyperparameter.value

    return flow.extension.flow_to_model(flow, strict_version=strict_version)

list_setups #

list_setups(offset: int | None = None, size: int | None = None, flow: int | None = None, tag: str | None = None, setup: Iterable[int] | None = None, output_format: Literal['object', 'dataframe'] = 'object') -> dict[int, OpenMLSetup] | DataFrame

List all setups matching all of the given filters.

PARAMETER DESCRIPTION
offset

TYPE: int DEFAULT: None

size

TYPE: int DEFAULT: None

flow

TYPE: int DEFAULT: None

tag

TYPE: str DEFAULT: None

setup

TYPE: Iterable[int] DEFAULT: None

output_format

The parameter decides the format of the output. - If 'dataframe' the output is a pandas DataFrame - If 'object' the output is a dictionary of OpenMLSetup objects

TYPE: Literal['object', 'dataframe'] DEFAULT: 'object'

RETURNS DESCRIPTION
dict or dataframe
Source code in openml/setups/functions.py
def list_setups(  # noqa: PLR0913
    offset: int | None = None,
    size: int | None = None,
    flow: int | None = None,
    tag: str | None = None,
    setup: Iterable[int] | None = None,
    output_format: Literal["object", "dataframe"] = "object",
) -> dict[int, OpenMLSetup] | pd.DataFrame:
    """
    List all setups matching all of the given filters.

    Parameters
    ----------
    offset : int, optional
    size : int, optional
    flow : int, optional
    tag : str, optional
    setup : Iterable[int], optional
    output_format: str, optional (default='object')
        The parameter decides the format of the output.
        - If 'dataframe' the output is a pandas DataFrame
        - If 'object' the output is a dictionary of OpenMLSetup objects

    Returns
    -------
    dict or dataframe
    """
    if output_format not in ["dataframe", "object"]:
        raise ValueError(
            "Invalid output format selected. Only 'object', or 'dataframe' applicable.",
        )

    listing_call = partial(_list_setups, flow=flow, tag=tag, setup=setup)
    batches = openml.utils._list_all(
        listing_call,
        batch_size=1_000,  # batch size for setups is lower
        offset=offset,
        limit=size,
    )
    flattened = list(chain.from_iterable(batches))
    if output_format == "object":
        return {setup.setup_id: setup for setup in flattened}

    records = [setup._to_dict() for setup in flattened]
    return pd.DataFrame.from_records(records, index="setup_id")

setup_exists #

setup_exists(flow: OpenMLFlow) -> int

Checks whether a hyperparameter configuration already exists on the server.

PARAMETER DESCRIPTION
flow

The openml flow object. Should have flow id present for the main flow and all subflows (i.e., it should be downloaded from the server by means of flow.get, and not instantiated locally)

TYPE: OpenMLFlow

RETURNS DESCRIPTION
setup_id

setup id iff exists, False otherwise

TYPE: int

Source code in openml/setups/functions.py
def setup_exists(flow: OpenMLFlow) -> int:
    """
    Checks whether a hyperparameter configuration already exists on the server.

    Parameters
    ----------
    flow : OpenMLFlow
        The openml flow object. Should have flow id present for the main flow
        and all subflows (i.e., it should be downloaded from the server by
        means of flow.get, and not instantiated locally)

    Returns
    -------
    setup_id : int
        setup id iff exists, False otherwise
    """
    # sadly, this api call relies on a run object
    openml.flows.functions._check_flow_for_server_id(flow)
    if flow.model is None:
        raise ValueError("Flow should have model field set with the actual model.")
    if flow.extension is None:
        raise ValueError("Flow should have model field set with the correct extension.")

    # checks whether the flow exists on the server and flow ids align
    exists = flow_exists(flow.name, flow.external_version)
    if exists != flow.flow_id:
        raise ValueError(
            f"Local flow id ({flow.id}) differs from server id ({exists}). "
            "If this issue persists, please contact the developers.",
        )

    openml_param_settings = flow.extension.obtain_parameter_values(flow)
    description = xmltodict.unparse(_to_dict(flow.flow_id, openml_param_settings), pretty=True)
    file_elements = {
        "description": ("description.arff", description),
    }  # type: openml._api_calls.FILE_ELEMENTS_TYPE
    result = openml._api_calls._perform_api_call(
        "/setup/exists/",
        "post",
        file_elements=file_elements,
    )
    result_dict = xmltodict.parse(result)
    setup_id = int(result_dict["oml:setup_exists"]["oml:id"])
    return setup_id if setup_id > 0 else False