Skip to content

flow

openml.flows.flow #

OpenMLFlow #

OpenMLFlow(name: str, description: str, model: object, components: dict, parameters: dict, parameters_meta_info: dict, external_version: str, tags: list, language: str, dependencies: str, class_name: str | None = None, custom_name: str | None = None, binary_url: str | None = None, binary_format: str | None = None, binary_md5: str | None = None, uploader: str | None = None, upload_date: str | None = None, flow_id: int | None = None, extension: Extension | None = None, version: str | None = None)

Bases: OpenMLBase

OpenML Flow. Stores machine learning models.

Flows should not be generated manually, but by the function :meth:openml.flows.create_flow_from_model. Using this helper function ensures that all relevant fields are filled in.

Implements openml.implementation.upload.xsd <https://github.com/openml/openml/blob/master/openml_OS/views/pages/api_new/v1/xsd/ openml.implementation.upload.xsd>_.

Parameters#

name : str Name of the flow. Is used together with the attribute external_version as a unique identifier of the flow. description : str Human-readable description of the flow (free text). model : object ML model which is described by this flow. components : OrderedDict Mapping from component identifier to an OpenMLFlow object. Components are usually subfunctions of an algorithm (e.g. kernels), base learners in ensemble algorithms (decision tree in adaboost) or building blocks of a machine learning pipeline. Components are modeled as independent flows and can be shared between flows (different pipelines can use the same components). parameters : OrderedDict Mapping from parameter name to the parameter default value. The parameter default value must be of type str, so that the respective toolbox plugin can take care of casting the parameter default value to the correct type. parameters_meta_info : OrderedDict Mapping from parameter name to dict. Stores additional information for each parameter. Required keys are data_type and description. external_version : str Version number of the software the flow is implemented in. Is used together with the attribute name as a uniquer identifier of the flow. tags : list List of tags. Created on the server by other API calls. language : str Natural language the flow is described in (not the programming language). dependencies : str A list of dependencies necessary to run the flow. This field should contain all libraries the flow depends on. To allow reproducibility it should also specify the exact version numbers. class_name : str, optional The development language name of the class which is described by this flow. custom_name : str, optional Custom name of the flow given by the owner. binary_url : str, optional Url from which the binary can be downloaded. Added by the server. Ignored when uploaded manually. Will not be used by the python API because binaries aren't compatible across machines. binary_format : str, optional Format in which the binary code was uploaded. Will not be used by the python API because binaries aren't compatible across machines. binary_md5 : str, optional MD5 checksum to check if the binary code was correctly downloaded. Will not be used by the python API because binaries aren't compatible across machines. uploader : str, optional OpenML user ID of the uploader. Filled in by the server. upload_date : str, optional Date the flow was uploaded. Filled in by the server. flow_id : int, optional Flow ID. Assigned by the server. extension : Extension, optional The extension for a flow (e.g., sklearn). version : str, optional OpenML version of the flow. Assigned by the server.

Source code in openml/flows/flow.py
def __init__(  # noqa: PLR0913
    self,
    name: str,
    description: str,
    model: object,
    components: dict,
    parameters: dict,
    parameters_meta_info: dict,
    external_version: str,
    tags: list,
    language: str,
    dependencies: str,
    class_name: str | None = None,
    custom_name: str | None = None,
    binary_url: str | None = None,
    binary_format: str | None = None,
    binary_md5: str | None = None,
    uploader: str | None = None,
    upload_date: str | None = None,
    flow_id: int | None = None,
    extension: Extension | None = None,
    version: str | None = None,
):
    self.name = name
    self.description = description
    self.model = model

    for variable, variable_name in [
        [components, "components"],
        [parameters, "parameters"],
        [parameters_meta_info, "parameters_meta_info"],
    ]:
        if not isinstance(variable, (OrderedDict, dict)):
            raise TypeError(
                f"{variable_name} must be of type OrderedDict or dict, "
                f"but is {type(variable)}.",
            )

    self.components = components
    self.parameters = parameters
    self.parameters_meta_info = parameters_meta_info
    self.class_name = class_name

    keys_parameters = set(parameters.keys())
    keys_parameters_meta_info = set(parameters_meta_info.keys())
    if len(keys_parameters.difference(keys_parameters_meta_info)) > 0:
        raise ValueError(
            f"Parameter {keys_parameters.difference(keys_parameters_meta_info)!s} only in "
            "parameters, but not in parameters_meta_info.",
        )
    if len(keys_parameters_meta_info.difference(keys_parameters)) > 0:
        raise ValueError(
            f"Parameter {keys_parameters_meta_info.difference(keys_parameters)!s} only in "
            " parameters_meta_info, but not in parameters.",
        )

    self.external_version = external_version
    self.uploader = uploader

    self.custom_name = custom_name
    self.tags = tags if tags is not None else []
    self.binary_url = binary_url
    self.binary_format = binary_format
    self.binary_md5 = binary_md5
    self.version = version
    self.upload_date = upload_date
    self.language = language
    self.dependencies = dependencies
    self.flow_id = flow_id
    self._extension = extension

extension property #

extension: Extension

The extension of the flow (e.g., sklearn).

id property #

id: int | None

The ID of the flow.

openml_url property #

openml_url: str | None

The URL of the object on the server, if it was uploaded, else None.

from_filesystem classmethod #

from_filesystem(input_directory: str | Path) -> OpenMLFlow

Read a flow from an XML in input_directory on the filesystem.

Source code in openml/flows/flow.py
@classmethod
def from_filesystem(cls, input_directory: str | Path) -> OpenMLFlow:
    """Read a flow from an XML in input_directory on the filesystem."""
    input_directory = Path(input_directory) / "flow.xml"
    with input_directory.open() as f:
        xml_string = f.read()
    return OpenMLFlow._from_dict(xmltodict.parse(xml_string))

get_structure #

get_structure(key_item: str) -> dict[str, list[str]]

Returns for each sub-component of the flow the path of identifiers that should be traversed to reach this component. The resulting dict maps a key (identifying a flow by either its id, name or fullname) to the parameter prefix.

Parameters#

key_item: str The flow attribute that will be used to identify flows in the structure. Allowed values {flow_id, name}

Returns#

dict[str, List[str]] The flow structure

Source code in openml/flows/flow.py
def get_structure(self, key_item: str) -> dict[str, list[str]]:
    """
    Returns for each sub-component of the flow the path of identifiers
    that should be traversed to reach this component. The resulting dict
    maps a key (identifying a flow by either its id, name or fullname) to
    the parameter prefix.

    Parameters
    ----------
    key_item: str
        The flow attribute that will be used to identify flows in the
        structure. Allowed values {flow_id, name}

    Returns
    -------
    dict[str, List[str]]
        The flow structure
    """
    if key_item not in ["flow_id", "name"]:
        raise ValueError("key_item should be in {flow_id, name}")
    structure = {}
    for key, sub_flow in self.components.items():
        sub_structure = sub_flow.get_structure(key_item)
        for flow_name, flow_sub_structure in sub_structure.items():
            structure[flow_name] = [key, *flow_sub_structure]
    structure[getattr(self, key_item)] = []
    return structure

get_subflow #

get_subflow(structure: list[str]) -> OpenMLFlow

Returns a subflow from the tree of dependencies.

Parameters#

structure: list[str] A list of strings, indicating the location of the subflow

Returns#

OpenMLFlow The OpenMLFlow that corresponds to the structure

Source code in openml/flows/flow.py
def get_subflow(self, structure: list[str]) -> OpenMLFlow:
    """
    Returns a subflow from the tree of dependencies.

    Parameters
    ----------
    structure: list[str]
        A list of strings, indicating the location of the subflow

    Returns
    -------
    OpenMLFlow
        The OpenMLFlow that corresponds to the structure
    """
    # make a copy of structure, as we don't want to change it in the
    # outer scope
    structure = list(structure)
    if len(structure) < 1:
        raise ValueError("Please provide a structure list of size >= 1")
    sub_identifier = structure[0]
    if sub_identifier not in self.components:
        raise ValueError(
            f"Flow {self.name} does not contain component with " f"identifier {sub_identifier}",
        )
    if len(structure) == 1:
        return self.components[sub_identifier]  # type: ignore

    structure.pop(0)
    return self.components[sub_identifier].get_subflow(structure)  # type: ignore

open_in_browser #

open_in_browser() -> None

Opens the OpenML web page corresponding to this object in your default browser.

Source code in openml/base.py
def open_in_browser(self) -> None:
    """Opens the OpenML web page corresponding to this object in your default browser."""
    if self.openml_url is None:
        raise ValueError(
            "Cannot open element on OpenML.org when attribute `openml_url` is `None`",
        )

    webbrowser.open(self.openml_url)

publish #

publish(raise_error_if_exists: bool = False) -> OpenMLFlow

Publish this flow to OpenML server.

Raises a PyOpenMLError if the flow exists on the server, but self.flow_id does not match the server known flow id.

Parameters#

raise_error_if_exists : bool, optional (default=False) If True, raise PyOpenMLError if the flow exists on the server. If False, update the local flow to match the server flow.

Returns#

self : OpenMLFlow

Source code in openml/flows/flow.py
def publish(self, raise_error_if_exists: bool = False) -> OpenMLFlow:  # noqa: FBT001, FBT002
    """Publish this flow to OpenML server.

    Raises a PyOpenMLError if the flow exists on the server, but
    `self.flow_id` does not match the server known flow id.

    Parameters
    ----------
    raise_error_if_exists : bool, optional (default=False)
        If True, raise PyOpenMLError if the flow exists on the server.
        If False, update the local flow to match the server flow.

    Returns
    -------
    self : OpenMLFlow

    """
    # Import at top not possible because of cyclic dependencies. In
    # particular, flow.py tries to import functions.py in order to call
    # get_flow(), while functions.py tries to import flow.py in order to
    # instantiate an OpenMLFlow.
    import openml.flows.functions

    flow_id = openml.flows.functions.flow_exists(self.name, self.external_version)
    if not flow_id:
        if self.flow_id:
            raise openml.exceptions.PyOpenMLError(
                "Flow does not exist on the server, " "but 'flow.flow_id' is not None.",
            )
        super().publish()
        assert self.flow_id is not None  # for mypy
        flow_id = self.flow_id
    elif raise_error_if_exists:
        error_message = f"This OpenMLFlow already exists with id: {flow_id}."
        raise openml.exceptions.PyOpenMLError(error_message)
    elif self.flow_id is not None and self.flow_id != flow_id:
        raise openml.exceptions.PyOpenMLError(
            "Local flow_id does not match server flow_id: " f"'{self.flow_id}' vs '{flow_id}'",
        )

    flow = openml.flows.functions.get_flow(flow_id)
    _copy_server_fields(flow, self)
    try:
        openml.flows.functions.assert_flows_equal(
            self,
            flow,
            flow.upload_date,
            ignore_parameter_values=True,
            ignore_custom_name_if_none=True,
        )
    except ValueError as e:
        message = e.args[0]
        raise ValueError(
            "The flow on the server is inconsistent with the local flow. "
            f"The server flow ID is {flow_id}. Please check manually and remove "
            f"the flow if necessary! Error is:\n'{message}'",
        ) from e
    return self

push_tag #

push_tag(tag: str) -> None

Annotates this entity with a tag on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py
def push_tag(self, tag: str) -> None:
    """Annotates this entity with a tag on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag)

remove_tag #

remove_tag(tag: str) -> None

Removes a tag from this entity on the server.

Parameters#

tag : str Tag to attach to the flow.

Source code in openml/base.py
def remove_tag(self, tag: str) -> None:
    """Removes a tag from this entity on the server.

    Parameters
    ----------
    tag : str
        Tag to attach to the flow.
    """
    _tag_openml_base(self, tag, untag=True)

to_filesystem #

to_filesystem(output_directory: str | Path) -> None

Write a flow to the filesystem as XML to output_directory.

Source code in openml/flows/flow.py
def to_filesystem(self, output_directory: str | Path) -> None:
    """Write a flow to the filesystem as XML to output_directory."""
    output_directory = Path(output_directory)
    output_directory.mkdir(parents=True, exist_ok=True)

    output_path = output_directory / "flow.xml"
    if output_path.exists():
        raise ValueError("Output directory already contains a flow.xml file.")

    run_xml = self._to_xml()
    with output_path.open("w") as f:
        f.write(run_xml)

url_for_id classmethod #

url_for_id(id_: int) -> str

Return the OpenML URL for the object of the class entity with the given id.

Source code in openml/base.py
@classmethod
def url_for_id(cls, id_: int) -> str:
    """Return the OpenML URL for the object of the class entity with the given id."""
    # Sample url for a flow: openml.org/f/123
    return f"{openml.config.get_server_base_url()}/{cls._entity_letter()}/{id_}"