OpenMLFlow(name: str, description: str, model: object, components: dict, parameters: dict, parameters_meta_info: dict, external_version: str, tags: list, language: str, dependencies: str, class_name: str | None = None, custom_name: str | None = None, binary_url: str | None = None, binary_format: str | None = None, binary_md5: str | None = None, uploader: str | None = None, upload_date: str | None = None, flow_id: int | None = None, extension: Extension | None = None, version: str | None = None)
Bases: OpenMLBase
OpenML Flow. Stores machine learning models.
Flows should not be generated manually, but by the function
:meth:openml.flows.create_flow_from_model
. Using this helper function
ensures that all relevant fields are filled in.
Implements openml.implementation.upload.xsd
<https://github.com/openml/openml/blob/master/openml_OS/views/pages/api_new/v1/xsd/
openml.implementation.upload.xsd>
_.
Parameters
name : str
Name of the flow. Is used together with the attribute
external_version
as a unique identifier of the flow.
description : str
Human-readable description of the flow (free text).
model : object
ML model which is described by this flow.
components : OrderedDict
Mapping from component identifier to an OpenMLFlow object. Components
are usually subfunctions of an algorithm (e.g. kernels), base learners
in ensemble algorithms (decision tree in adaboost) or building blocks
of a machine learning pipeline. Components are modeled as independent
flows and can be shared between flows (different pipelines can use
the same components).
parameters : OrderedDict
Mapping from parameter name to the parameter default value. The
parameter default value must be of type str
, so that the respective
toolbox plugin can take care of casting the parameter default value to
the correct type.
parameters_meta_info : OrderedDict
Mapping from parameter name to dict
. Stores additional information
for each parameter. Required keys are data_type
and description
.
external_version : str
Version number of the software the flow is implemented in. Is used
together with the attribute name
as a uniquer identifier of the flow.
tags : list
List of tags. Created on the server by other API calls.
language : str
Natural language the flow is described in (not the programming
language).
dependencies : str
A list of dependencies necessary to run the flow. This field should
contain all libraries the flow depends on. To allow reproducibility
it should also specify the exact version numbers.
class_name : str, optional
The development language name of the class which is described by this
flow.
custom_name : str, optional
Custom name of the flow given by the owner.
binary_url : str, optional
Url from which the binary can be downloaded. Added by the server.
Ignored when uploaded manually. Will not be used by the python API
because binaries aren't compatible across machines.
binary_format : str, optional
Format in which the binary code was uploaded. Will not be used by the
python API because binaries aren't compatible across machines.
binary_md5 : str, optional
MD5 checksum to check if the binary code was correctly downloaded. Will
not be used by the python API because binaries aren't compatible across
machines.
uploader : str, optional
OpenML user ID of the uploader. Filled in by the server.
upload_date : str, optional
Date the flow was uploaded. Filled in by the server.
flow_id : int, optional
Flow ID. Assigned by the server.
extension : Extension, optional
The extension for a flow (e.g., sklearn).
version : str, optional
OpenML version of the flow. Assigned by the server.
Source code in openml/flows/flow.py
| def __init__( # noqa: PLR0913
self,
name: str,
description: str,
model: object,
components: dict,
parameters: dict,
parameters_meta_info: dict,
external_version: str,
tags: list,
language: str,
dependencies: str,
class_name: str | None = None,
custom_name: str | None = None,
binary_url: str | None = None,
binary_format: str | None = None,
binary_md5: str | None = None,
uploader: str | None = None,
upload_date: str | None = None,
flow_id: int | None = None,
extension: Extension | None = None,
version: str | None = None,
):
self.name = name
self.description = description
self.model = model
for variable, variable_name in [
[components, "components"],
[parameters, "parameters"],
[parameters_meta_info, "parameters_meta_info"],
]:
if not isinstance(variable, (OrderedDict, dict)):
raise TypeError(
f"{variable_name} must be of type OrderedDict or dict, "
f"but is {type(variable)}.",
)
self.components = components
self.parameters = parameters
self.parameters_meta_info = parameters_meta_info
self.class_name = class_name
keys_parameters = set(parameters.keys())
keys_parameters_meta_info = set(parameters_meta_info.keys())
if len(keys_parameters.difference(keys_parameters_meta_info)) > 0:
raise ValueError(
f"Parameter {keys_parameters.difference(keys_parameters_meta_info)!s} only in "
"parameters, but not in parameters_meta_info.",
)
if len(keys_parameters_meta_info.difference(keys_parameters)) > 0:
raise ValueError(
f"Parameter {keys_parameters_meta_info.difference(keys_parameters)!s} only in "
" parameters_meta_info, but not in parameters.",
)
self.external_version = external_version
self.uploader = uploader
self.custom_name = custom_name
self.tags = tags if tags is not None else []
self.binary_url = binary_url
self.binary_format = binary_format
self.binary_md5 = binary_md5
self.version = version
self.upload_date = upload_date
self.language = language
self.dependencies = dependencies
self.flow_id = flow_id
self._extension = extension
|
extension
property
The extension of the flow (e.g., sklearn).
openml_url
property
The URL of the object on the server, if it was uploaded, else None.
from_filesystem
classmethod
from_filesystem(input_directory: str | Path) -> OpenMLFlow
Read a flow from an XML in input_directory on the filesystem.
Source code in openml/flows/flow.py
| @classmethod
def from_filesystem(cls, input_directory: str | Path) -> OpenMLFlow:
"""Read a flow from an XML in input_directory on the filesystem."""
input_directory = Path(input_directory) / "flow.xml"
with input_directory.open() as f:
xml_string = f.read()
return OpenMLFlow._from_dict(xmltodict.parse(xml_string))
|
get_structure
get_structure(key_item: str) -> dict[str, list[str]]
Returns for each sub-component of the flow the path of identifiers
that should be traversed to reach this component. The resulting dict
maps a key (identifying a flow by either its id, name or fullname) to
the parameter prefix.
Parameters
key_item: str
The flow attribute that will be used to identify flows in the
structure. Allowed values {flow_id, name}
Returns
dict[str, List[str]]
The flow structure
Source code in openml/flows/flow.py
| def get_structure(self, key_item: str) -> dict[str, list[str]]:
"""
Returns for each sub-component of the flow the path of identifiers
that should be traversed to reach this component. The resulting dict
maps a key (identifying a flow by either its id, name or fullname) to
the parameter prefix.
Parameters
----------
key_item: str
The flow attribute that will be used to identify flows in the
structure. Allowed values {flow_id, name}
Returns
-------
dict[str, List[str]]
The flow structure
"""
if key_item not in ["flow_id", "name"]:
raise ValueError("key_item should be in {flow_id, name}")
structure = {}
for key, sub_flow in self.components.items():
sub_structure = sub_flow.get_structure(key_item)
for flow_name, flow_sub_structure in sub_structure.items():
structure[flow_name] = [key, *flow_sub_structure]
structure[getattr(self, key_item)] = []
return structure
|
get_subflow
Returns a subflow from the tree of dependencies.
Parameters
structure: list[str]
A list of strings, indicating the location of the subflow
Returns
OpenMLFlow
The OpenMLFlow that corresponds to the structure
Source code in openml/flows/flow.py
| def get_subflow(self, structure: list[str]) -> OpenMLFlow:
"""
Returns a subflow from the tree of dependencies.
Parameters
----------
structure: list[str]
A list of strings, indicating the location of the subflow
Returns
-------
OpenMLFlow
The OpenMLFlow that corresponds to the structure
"""
# make a copy of structure, as we don't want to change it in the
# outer scope
structure = list(structure)
if len(structure) < 1:
raise ValueError("Please provide a structure list of size >= 1")
sub_identifier = structure[0]
if sub_identifier not in self.components:
raise ValueError(
f"Flow {self.name} does not contain component with " f"identifier {sub_identifier}",
)
if len(structure) == 1:
return self.components[sub_identifier] # type: ignore
structure.pop(0)
return self.components[sub_identifier].get_subflow(structure) # type: ignore
|
open_in_browser
open_in_browser() -> None
Opens the OpenML web page corresponding to this object in your default browser.
Source code in openml/base.py
| def open_in_browser(self) -> None:
"""Opens the OpenML web page corresponding to this object in your default browser."""
if self.openml_url is None:
raise ValueError(
"Cannot open element on OpenML.org when attribute `openml_url` is `None`",
)
webbrowser.open(self.openml_url)
|
publish
publish(raise_error_if_exists: bool = False) -> OpenMLFlow
Publish this flow to OpenML server.
Raises a PyOpenMLError if the flow exists on the server, but
self.flow_id
does not match the server known flow id.
Parameters
raise_error_if_exists : bool, optional (default=False)
If True, raise PyOpenMLError if the flow exists on the server.
If False, update the local flow to match the server flow.
Returns
self : OpenMLFlow
Source code in openml/flows/flow.py
| def publish(self, raise_error_if_exists: bool = False) -> OpenMLFlow: # noqa: FBT001, FBT002
"""Publish this flow to OpenML server.
Raises a PyOpenMLError if the flow exists on the server, but
`self.flow_id` does not match the server known flow id.
Parameters
----------
raise_error_if_exists : bool, optional (default=False)
If True, raise PyOpenMLError if the flow exists on the server.
If False, update the local flow to match the server flow.
Returns
-------
self : OpenMLFlow
"""
# Import at top not possible because of cyclic dependencies. In
# particular, flow.py tries to import functions.py in order to call
# get_flow(), while functions.py tries to import flow.py in order to
# instantiate an OpenMLFlow.
import openml.flows.functions
flow_id = openml.flows.functions.flow_exists(self.name, self.external_version)
if not flow_id:
if self.flow_id:
raise openml.exceptions.PyOpenMLError(
"Flow does not exist on the server, " "but 'flow.flow_id' is not None.",
)
super().publish()
assert self.flow_id is not None # for mypy
flow_id = self.flow_id
elif raise_error_if_exists:
error_message = f"This OpenMLFlow already exists with id: {flow_id}."
raise openml.exceptions.PyOpenMLError(error_message)
elif self.flow_id is not None and self.flow_id != flow_id:
raise openml.exceptions.PyOpenMLError(
"Local flow_id does not match server flow_id: " f"'{self.flow_id}' vs '{flow_id}'",
)
flow = openml.flows.functions.get_flow(flow_id)
_copy_server_fields(flow, self)
try:
openml.flows.functions.assert_flows_equal(
self,
flow,
flow.upload_date,
ignore_parameter_values=True,
ignore_custom_name_if_none=True,
)
except ValueError as e:
message = e.args[0]
raise ValueError(
"The flow on the server is inconsistent with the local flow. "
f"The server flow ID is {flow_id}. Please check manually and remove "
f"the flow if necessary! Error is:\n'{message}'",
) from e
return self
|
push_tag
push_tag(tag: str) -> None
Annotates this entity with a tag on the server.
Parameters
tag : str
Tag to attach to the flow.
Source code in openml/base.py
| def push_tag(self, tag: str) -> None:
"""Annotates this entity with a tag on the server.
Parameters
----------
tag : str
Tag to attach to the flow.
"""
_tag_openml_base(self, tag)
|
remove_tag
remove_tag(tag: str) -> None
Removes a tag from this entity on the server.
Parameters
tag : str
Tag to attach to the flow.
Source code in openml/base.py
| def remove_tag(self, tag: str) -> None:
"""Removes a tag from this entity on the server.
Parameters
----------
tag : str
Tag to attach to the flow.
"""
_tag_openml_base(self, tag, untag=True)
|
to_filesystem
to_filesystem(output_directory: str | Path) -> None
Write a flow to the filesystem as XML to output_directory.
Source code in openml/flows/flow.py
| def to_filesystem(self, output_directory: str | Path) -> None:
"""Write a flow to the filesystem as XML to output_directory."""
output_directory = Path(output_directory)
output_directory.mkdir(parents=True, exist_ok=True)
output_path = output_directory / "flow.xml"
if output_path.exists():
raise ValueError("Output directory already contains a flow.xml file.")
run_xml = self._to_xml()
with output_path.open("w") as f:
f.write(run_xml)
|
url_for_id
classmethod
url_for_id(id_: int) -> str
Return the OpenML URL for the object of the class entity with the given id.
Source code in openml/base.py
| @classmethod
def url_for_id(cls, id_: int) -> str:
"""Return the OpenML URL for the object of the class entity with the given id."""
# Sample url for a flow: openml.org/f/123
return f"{openml.config.get_server_base_url()}/{cls._entity_letter()}/{id_}"
|