functions

openml.runs.functions #

__list_runs #

__list_runs(api_call: str) -> DataFrame

Helper function to parse API calls which are lists of runs

Source code in openml/runs/functions.py

def __list_runs(api_call: str) -> pd.DataFrame:
    """Helper function to parse API calls which are lists of runs"""
    xml_string = openml._api_calls._perform_api_call(api_call, "get")
    runs_dict = xmltodict.parse(xml_string, force_list=("oml:run",))
    # Minimalistic check if the XML is useful
    if "oml:runs" not in runs_dict:
        raise ValueError(f'Error in return XML, does not contain "oml:runs": {runs_dict}')

    if "@xmlns:oml" not in runs_dict["oml:runs"]:
        raise ValueError(
            f'Error in return XML, does not contain "oml:runs"/@xmlns:oml: {runs_dict}'
        )

    if runs_dict["oml:runs"]["@xmlns:oml"] != "http://openml.org/openml":
        raise ValueError(
            "Error in return XML, value of  "
            '"oml:runs"/@xmlns:oml is not '
            f'"http://openml.org/openml": {runs_dict}',
        )

    if not isinstance(runs_dict["oml:runs"]["oml:run"], list):
        raise TypeError(
            f"Expected runs_dict['oml:runs']['oml:run'] to be a list, "
            f"got {type(runs_dict['oml:runs']['oml:run']).__name__}"
        )

    runs = {
        int(r["oml:run_id"]): {
            "run_id": int(r["oml:run_id"]),
            "task_id": int(r["oml:task_id"]),
            "setup_id": int(r["oml:setup_id"]),
            "flow_id": int(r["oml:flow_id"]),
            "uploader": int(r["oml:uploader"]),
            "task_type": TaskType(int(r["oml:task_type_id"])),
            "upload_time": str(r["oml:upload_time"]),
            "error_message": str((r["oml:error_message"]) or ""),
        }
        for r in runs_dict["oml:runs"]["oml:run"]
    }
    return pd.DataFrame.from_dict(runs, orient="index")

delete_run #

delete_run(run_id: int) -> bool

Delete run with id run_id from the OpenML server.

You can only delete runs which you uploaded.

PARAMETER	DESCRIPTION
`run_id`	OpenML id of the run TYPE: `int`

RETURNS	DESCRIPTION
`bool`	True if the deletion was successful. False otherwise.

Source code in openml/runs/functions.py

def delete_run(run_id: int) -> bool:
    """Delete run with id `run_id` from the OpenML server.

    You can only delete runs which you uploaded.

    Parameters
    ----------
    run_id : int
        OpenML id of the run

    Returns
    -------
    bool
        True if the deletion was successful. False otherwise.
    """
    return openml.utils._delete_entity("run", run_id)

format_prediction #

format_prediction(task: OpenMLSupervisedTask, repeat: int, fold: int, index: int, prediction: str | int | float, truth: str | int | float, sample: int | None = None, proba: dict[str, float] | None = None) -> list[str | int | float]

Format the predictions in the specific order as required for the run results.

PARAMETER	DESCRIPTION
`task`	Task for which to format the predictions. TYPE: `OpenMLSupervisedTask`
`repeat`	From which repeat this predictions is made. TYPE: `int`
`fold`	From which fold this prediction is made. TYPE: `int`
`index`	For which index this prediction is made. TYPE: `int`
`prediction`	The predicted class label or value. TYPE: `str \| int \| float`
`truth`	The true class label or value. TYPE: `str \| int \| float`
`sample`	From which sample set this prediction is made. Required only for LearningCurve tasks. TYPE: `int \| None` DEFAULT: `None`
`proba`	For classification tasks only. A mapping from each class label to their predicted probability. The dictionary should contain an entry for each of the `task.class_labels`. E.g.: {"Iris-Setosa": 0.2, "Iris-Versicolor": 0.7, "Iris-Virginica": 0.1} TYPE: `dict[str, float] \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`A list with elements for the prediction results of a run.`
`The returned order of the elements is (if available):`	[repeat, fold, sample, index, prediction, truth, *probabilities]
`This order follows the R Client API.`

Source code in openml/runs/functions.py

def format_prediction(  # noqa: PLR0913
    task: OpenMLSupervisedTask,
    repeat: int,
    fold: int,
    index: int,
    prediction: str | int | float,
    truth: str | int | float,
    sample: int | None = None,
    proba: dict[str, float] | None = None,
) -> list[str | int | float]:
    """Format the predictions in the specific order as required for the run results.

    Parameters
    ----------
    task: OpenMLSupervisedTask
        Task for which to format the predictions.
    repeat: int
        From which repeat this predictions is made.
    fold: int
        From which fold this prediction is made.
    index: int
        For which index this prediction is made.
    prediction: str, int or float
        The predicted class label or value.
    truth: str, int or float
        The true class label or value.
    sample: int, optional (default=None)
        From which sample set this prediction is made.
        Required only for LearningCurve tasks.
    proba: Dict[str, float], optional (default=None)
        For classification tasks only.
        A mapping from each class label to their predicted probability.
        The dictionary should contain an entry for each of the `task.class_labels`.
        E.g.: {"Iris-Setosa": 0.2, "Iris-Versicolor": 0.7, "Iris-Virginica": 0.1}

    Returns
    -------
    A list with elements for the prediction results of a run.

    The returned order of the elements is (if available):
        [repeat, fold, sample, index, prediction, truth, *probabilities]

    This order follows the R Client API.
    """
    if isinstance(task, OpenMLClassificationTask):
        if proba is None:
            raise ValueError("`proba` is required for classification task")
        if task.class_labels is None:
            raise ValueError("The classification task must have class labels set")
        if not set(task.class_labels) == set(proba):
            raise ValueError("Each class should have a predicted probability")
        if sample is None:
            if isinstance(task, OpenMLLearningCurveTask):
                raise ValueError("`sample` can not be none for LearningCurveTask")

            sample = 0
        probabilities = [proba[c] for c in task.class_labels]
        return [repeat, fold, sample, index, prediction, truth, *probabilities]

    if isinstance(task, OpenMLRegressionTask):
        return [repeat, fold, index, prediction, truth]

    raise NotImplementedError(
        f"Formatting for {type(task)} is not supported."
        f"Supported task types: OpenMLClassificationTask, OpenMLRegressionTask,"
        f"and OpenMLLearningCurveTask. "
        f"Please ensure your task is one of these types."
    )

get_run #

get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun

Gets run corresponding to run_id.

PARAMETER	DESCRIPTION
`run_id`	TYPE: `int`
`ignore_cache`	Whether to ignore the cache. If `true` this will download and overwrite the run xml even if the requested run is already cached. TYPE: `bool` DEFAULT: `False`
`ignore_cache`	TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`run`	Run corresponding to ID, fetched from the server. TYPE: `OpenMLRun`

Source code in openml/runs/functions.py

@openml.utils.thread_safe_if_oslo_installed
def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT002
    """Gets run corresponding to run_id.

    Parameters
    ----------
    run_id : int

    ignore_cache : bool
        Whether to ignore the cache. If ``true`` this will download and overwrite the run xml
        even if the requested run is already cached.

    ignore_cache

    Returns
    -------
    run : OpenMLRun
        Run corresponding to ID, fetched from the server.
    """
    run_dir = Path(openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id))
    run_file = run_dir / "description.xml"

    run_dir.mkdir(parents=True, exist_ok=True)

    try:
        if not ignore_cache:
            return _get_cached_run(run_id)

        raise OpenMLCacheException(message="dummy")

    except OpenMLCacheException:
        run_xml = openml._api_calls._perform_api_call(f"run/{run_id}", "get")
        with run_file.open("w", encoding="utf8") as fh:
            fh.write(run_xml)

    return _create_run_from_xml(run_xml)

get_run_trace #

get_run_trace(run_id: int) -> OpenMLRunTrace

Get the optimization trace object for a given run id.

PARAMETER	DESCRIPTION
`run_id`	TYPE: `int`

RETURNS	DESCRIPTION
`OpenMLTrace`

Source code in openml/runs/functions.py

def get_run_trace(run_id: int) -> OpenMLRunTrace:
    """
    Get the optimization trace object for a given run id.

    Parameters
    ----------
    run_id : int

    Returns
    -------
    openml.runs.OpenMLTrace
    """
    trace_xml = openml._api_calls._perform_api_call(f"run/trace/{run_id}", "get")
    return OpenMLRunTrace.trace_from_xml(trace_xml)

get_runs #

get_runs(run_ids: list[int]) -> list[OpenMLRun]

Gets all runs in run_ids list.

PARAMETER	DESCRIPTION
`run_ids`	TYPE: `list of ints`

RETURNS	DESCRIPTION
`runs`	List of runs corresponding to IDs, fetched from the server. TYPE: `list of OpenMLRun`

Source code in openml/runs/functions.py

def get_runs(run_ids: list[int]) -> list[OpenMLRun]:
    """Gets all runs in run_ids list.

    Parameters
    ----------
    run_ids : list of ints

    Returns
    -------
    runs : list of OpenMLRun
        List of runs corresponding to IDs, fetched from the server.
    """
    runs = []
    for run_id in run_ids:
        runs.append(get_run(run_id))
    return runs

initialize_model_from_run #

initialize_model_from_run(run_id: int, *, strict_version: bool = True) -> Any

Initialized a model based on a run_id (i.e., using the exact same parameter settings)

PARAMETER	DESCRIPTION
`run_id`	The Openml run_id TYPE: `int`
`strict_version`	See `flow_to_model` strict_version. TYPE: `bool` DEFAULT: `True`

RETURNS	DESCRIPTION
`model`

Source code in openml/runs/functions.py

def initialize_model_from_run(run_id: int, *, strict_version: bool = True) -> Any:
    """
    Initialized a model based on a run_id (i.e., using the exact
    same parameter settings)

    Parameters
    ----------
    run_id : int
        The Openml run_id
    strict_version: bool (default=True)
        See `flow_to_model` strict_version.

    Returns
    -------
    model
    """
    run = get_run(run_id)
    # TODO(eddiebergman): I imagine this is None if it's not published,
    # might need to raise an explicit error for that
    if run.setup_id is None:
        raise ValueError(f"Run {run_id} has no associated setup_id. Cannot initialize model.")
    return initialize_model(setup_id=run.setup_id, strict_version=strict_version)

initialize_model_from_trace #

initialize_model_from_trace(run_id: int, repeat: int, fold: int, iteration: int | None = None) -> Any

Initialize a model based on the parameters that were set by an optimization procedure (i.e., using the exact same parameter settings)

PARAMETER	DESCRIPTION
`run_id`	The Openml run_id. Should contain a trace file, otherwise a OpenMLServerException is raised TYPE: `int`
`repeat`	The repeat nr (column in trace file) TYPE: `int`
`fold`	The fold nr (column in trace file) TYPE: `int`
`iteration`	The iteration nr (column in trace file). If None, the best (selected) iteration will be searched (slow), according to the selection criteria implemented in OpenMLRunTrace.get_selected_iteration TYPE: `int` DEFAULT: `None`

RETURNS	DESCRIPTION
`model`

Source code in openml/runs/functions.py

def initialize_model_from_trace(
    run_id: int,
    repeat: int,
    fold: int,
    iteration: int | None = None,
) -> Any:
    """
    Initialize a model based on the parameters that were set
    by an optimization procedure (i.e., using the exact same
    parameter settings)

    Parameters
    ----------
    run_id : int
        The Openml run_id. Should contain a trace file,
        otherwise a OpenMLServerException is raised

    repeat : int
        The repeat nr (column in trace file)

    fold : int
        The fold nr (column in trace file)

    iteration : int
        The iteration nr (column in trace file). If None, the
        best (selected) iteration will be searched (slow),
        according to the selection criteria implemented in
        OpenMLRunTrace.get_selected_iteration

    Returns
    -------
    model
    """
    run = get_run(run_id)
    # TODO(eddiebergman): I imagine this is None if it's not published,
    # might need to raise an explicit error for that
    if run.flow_id is None:
        raise ValueError(f"Run {run_id} has no associated flow_id. Cannot initialize model.")

    flow = get_flow(run.flow_id)
    run_trace = get_run_trace(run_id)

    if iteration is None:
        iteration = run_trace.get_selected_iteration(repeat, fold)

    request = (repeat, fold, iteration)
    if request not in run_trace.trace_iterations:
        raise ValueError("Combination repeat, fold, iteration not available")
    current = run_trace.trace_iterations[(repeat, fold, iteration)]

    search_model = initialize_model_from_run(run_id)
    return flow.extension.instantiate_model_from_hpo_class(search_model, current)

list_runs #

list_runs(offset: int | None = None, size: int | None = None, id: list | None = None, task: list[int] | None = None, setup: list | None = None, flow: list | None = None, uploader: list | None = None, tag: str | None = None, study: int | None = None, display_errors: bool = False, task_type: TaskType | int | None = None) -> DataFrame

List all runs matching all of the given filters. (Supports large amount of results)

PARAMETER	DESCRIPTION
`offset`	the number of runs to skip, starting from the first TYPE: `int` DEFAULT: `None`
`size`	the maximum number of runs to show TYPE: `int` DEFAULT: `None`
`id`	TYPE: `list` DEFAULT: `None`
`task`	TYPE: `list` DEFAULT: `None`
`setup`	TYPE: `list \| None` DEFAULT: `None`
`flow`	TYPE: `list` DEFAULT: `None`
`uploader`	TYPE: `list` DEFAULT: `None`
`tag`	TYPE: `str` DEFAULT: `None`
`study`	TYPE: `int` DEFAULT: `None`
`display_errors`	Whether to list runs which have an error (for example a missing prediction file). TYPE: `(bool, optional(default=None))` DEFAULT: `False`
`task_type`	TYPE: `str` DEFAULT: `None`

RETURNS	DESCRIPTION
`dataframe`

Source code in openml/runs/functions.py

def list_runs(  # noqa: PLR0913
    offset: int | None = None,
    size: int | None = None,
    id: list | None = None,  # noqa: A002
    task: list[int] | None = None,
    setup: list | None = None,
    flow: list | None = None,
    uploader: list | None = None,
    tag: str | None = None,
    study: int | None = None,
    display_errors: bool = False,  # noqa: FBT002
    task_type: TaskType | int | None = None,
) -> pd.DataFrame:
    """
    List all runs matching all of the given filters.
    (Supports large amount of results)

    Parameters
    ----------
    offset : int, optional
        the number of runs to skip, starting from the first
    size : int, optional
        the maximum number of runs to show

    id : list, optional

    task : list, optional

    setup: list, optional

    flow : list, optional

    uploader : list, optional

    tag : str, optional

    study : int, optional

    display_errors : bool, optional (default=None)
        Whether to list runs which have an error (for example a missing
        prediction file).

    task_type : str, optional

    Returns
    -------
    dataframe
    """
    if id is not None and (not isinstance(id, list)):
        raise TypeError("id must be of type list.")
    if task is not None and (not isinstance(task, list)):
        raise TypeError("task must be of type list.")
    if setup is not None and (not isinstance(setup, list)):
        raise TypeError("setup must be of type list.")
    if flow is not None and (not isinstance(flow, list)):
        raise TypeError("flow must be of type list.")
    if uploader is not None and (not isinstance(uploader, list)):
        raise TypeError("uploader must be of type list.")

    listing_call = partial(
        _list_runs,
        id=id,
        task=task,
        setup=setup,
        flow=flow,
        uploader=uploader,
        tag=tag,
        study=study,
        display_errors=display_errors,
        task_type=task_type,
    )
    batches = openml.utils._list_all(listing_call, offset=offset, limit=size)
    if len(batches) == 0:
        return pd.DataFrame()

    return pd.concat(batches)

run_exists #

run_exists(task_id: int, setup_id: int) -> set[int]

Checks whether a task/setup combination is already present on the server.

PARAMETER	DESCRIPTION
`task_id`	TYPE: `int`
`setup_id`	TYPE: `int`

RETURNS	DESCRIPTION
`Set run ids for runs where flow setup_id was run on task_id. Empty`	set if it wasn't run yet.

Source code in openml/runs/functions.py

def run_exists(task_id: int, setup_id: int) -> set[int]:
    """Checks whether a task/setup combination is already present on the
    server.

    Parameters
    ----------
    task_id : int

    setup_id : int

    Returns
    -------
        Set run ids for runs where flow setup_id was run on task_id. Empty
        set if it wasn't run yet.
    """
    if setup_id <= 0:
        # openml setups are in range 1-inf
        return set()

    try:
        result = list_runs(task=[task_id], setup=[setup_id])
        return set() if result.empty else set(result["run_id"])
    except OpenMLServerException as exception:
        # error code implies no results. The run does not exist yet
        if exception.code != ERROR_CODE:
            raise exception
        return set()

run_flow_on_task #

run_flow_on_task(flow: OpenMLFlow, task: OpenMLTask, avoid_duplicate_runs: bool | None = None, flow_tags: list[str] | None = None, seed: int | None = None, add_local_measures: bool = True, upload_flow: bool = False, n_jobs: int | None = None) -> OpenMLRun

Run the model provided by the flow on the dataset defined by task.

Takes the flow and repeat information into account. The Flow may optionally be published.

PARAMETER	DESCRIPTION
`flow`	A flow wraps a machine learning model together with relevant information. The model has a function fit(X,Y) and predict(X), all supervised estimators of scikit learn follow this definition of a model. TYPE: `OpenMLFlow`
`task`	Task to perform. This may be an OpenMLFlow instead if the first argument is an OpenMLTask. TYPE: `OpenMLTask`
`avoid_duplicate_runs`	If True, the run will throw an error if the setup/task combination is already present on the server. This feature requires an internet connection. If not set, it will use the default from your openml configuration (False if unset). TYPE: `(bool, optional(default=None))` DEFAULT: `None`
`flow_tags`	A list of tags that the flow should have at creation. TYPE: `(List[str], optional(default=None))` DEFAULT: `None`
`seed`	Models that are not seeded will get this seed. TYPE: `int \| None` DEFAULT: `None`
`add_local_measures`	Determines whether to calculate a set of evaluation measures locally, to later verify server behaviour. TYPE: `(bool, optional(default=True))` DEFAULT: `True`
`upload_flow`	If True, upload the flow to OpenML if it does not exist yet. If False, do not upload the flow to OpenML. TYPE: `bool(default=False)` DEFAULT: `False`
`n_jobs`	The number of processes/threads to distribute the evaluation asynchronously. If `None` or `1`, then the evaluation is treated as synchronous and processed sequentially. If `-1`, then the job uses as many cores available. TYPE: `int(default=None)` DEFAULT: `None`

RETURNS	DESCRIPTION
`run`	Result of the run. TYPE: `OpenMLRun`

Source code in openml/runs/functions.py

def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
    flow: OpenMLFlow,
    task: OpenMLTask,
    avoid_duplicate_runs: bool | None = None,
    flow_tags: list[str] | None = None,
    seed: int | None = None,
    add_local_measures: bool = True,  # noqa: FBT002
    upload_flow: bool = False,  # noqa: FBT002
    n_jobs: int | None = None,
) -> OpenMLRun:
    """Run the model provided by the flow on the dataset defined by task.

    Takes the flow and repeat information into account.
    The Flow may optionally be published.

    Parameters
    ----------
    flow : OpenMLFlow
        A flow wraps a machine learning model together with relevant information.
        The model has a function fit(X,Y) and predict(X),
        all supervised estimators of scikit learn follow this definition of a model.
    task : OpenMLTask
        Task to perform. This may be an OpenMLFlow instead if the first argument is an OpenMLTask.
    avoid_duplicate_runs : bool, optional (default=None)
        If True, the run will throw an error if the setup/task combination is already present on
        the server. This feature requires an internet connection.
        If not set, it will use the default from your openml configuration (False if unset).
    flow_tags : List[str], optional (default=None)
        A list of tags that the flow should have at creation.
    seed: int, optional (default=None)
        Models that are not seeded will get this seed.
    add_local_measures : bool, optional (default=True)
        Determines whether to calculate a set of evaluation measures locally,
        to later verify server behaviour.
    upload_flow : bool (default=False)
        If True, upload the flow to OpenML if it does not exist yet.
        If False, do not upload the flow to OpenML.
    n_jobs : int (default=None)
        The number of processes/threads to distribute the evaluation asynchronously.
        If `None` or `1`, then the evaluation is treated as synchronous and processed sequentially.
        If `-1`, then the job uses as many cores available.

    Returns
    -------
    run : OpenMLRun
        Result of the run.
    """
    if flow_tags is not None and not isinstance(flow_tags, list):
        raise ValueError("flow_tags should be a list")

    if avoid_duplicate_runs is None:
        avoid_duplicate_runs = openml.config.avoid_duplicate_runs

    # TODO: At some point in the future do not allow for arguments in old order (changed 6-2018).
    # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
    if isinstance(flow, OpenMLTask) and isinstance(task, OpenMLFlow):
        # We want to allow either order of argument (to avoid confusion).
        warnings.warn(
            "The old argument order (Flow, model) is deprecated and "
            "will not be supported in the future. Please use the "
            "order (model, Flow).",
            DeprecationWarning,
            stacklevel=2,
        )
        task, flow = flow, task

    if task.task_id is None:
        raise ValueError("The task should be published at OpenML")

    if flow.model is None:
        flow.model = flow.extension.flow_to_model(flow)

    flow.model = flow.extension.seed_model(flow.model, seed=seed)

    # We only need to sync with the server right now if we want to upload the flow,
    # or ensure no duplicate runs exist. Otherwise it can be synced at upload time.
    flow_id = None
    if upload_flow or avoid_duplicate_runs:
        flow_id = flow_exists(flow.name, flow.external_version)
        if isinstance(flow.flow_id, int) and flow_id != flow.flow_id:
            if flow_id is not False:
                raise PyOpenMLError(
                    f"Local flow_id does not match server flow_id: '{flow.flow_id}' vs '{flow_id}'",
                )
            raise PyOpenMLError(
                "Flow does not exist on the server, but 'flow.flow_id' is not None."
            )
        if upload_flow and flow_id is False:
            flow.publish()
            flow_id = flow.flow_id
        elif flow_id:
            flow_from_server = get_flow(flow_id)
            _copy_server_fields(flow_from_server, flow)
            if avoid_duplicate_runs:
                flow_from_server.model = flow.model
                setup_id = setup_exists(flow_from_server)
                ids = run_exists(task.task_id, setup_id)
                if ids:
                    error_message = (
                        "One or more runs of this setup were already performed on the task."
                    )
                    raise OpenMLRunsExistError(ids, error_message)
        else:
            # Flow does not exist on server and we do not want to upload it.
            # No sync with the server happens.
            flow_id = None

    dataset = task.get_dataset()

    run_environment = flow.extension.get_version_information()
    tags = ["openml-python", run_environment[1]]

    if flow.extension.check_if_model_fitted(flow.model):
        warnings.warn(
            "The model is already fitted! This might cause inconsistency in comparison of results.",
            RuntimeWarning,
            stacklevel=2,
        )

    # execute the run
    res = _run_task_get_arffcontent(
        model=flow.model,
        task=task,
        extension=flow.extension,
        add_local_measures=add_local_measures,
        n_jobs=n_jobs,
    )

    data_content, trace, fold_evaluations, sample_evaluations = res
    fields = [*run_environment, time.strftime("%c"), "Created by run_flow_on_task"]
    generated_description = "\n".join(fields)
    run = OpenMLRun(
        task_id=task.task_id,
        flow_id=flow_id,
        dataset_id=dataset.dataset_id,
        model=flow.model,
        flow_name=flow.name,
        tags=tags,
        trace=trace,
        data_content=data_content,
        flow=flow,
        setup_string=flow.extension.create_setup_string(flow.model),
        description_text=generated_description,
    )

    if (upload_flow or avoid_duplicate_runs) and flow.flow_id is not None:
        # We only extract the parameter settings if a sync happened with the server.
        # I.e. when the flow was uploaded or we found it in the avoid_duplicate check.
        # Otherwise, we will do this at upload time.
        run.parameter_settings = flow.extension.obtain_parameter_values(flow)

    # now we need to attach the detailed evaluations
    if task.task_type_id == TaskType.LEARNING_CURVE:
        run.sample_evaluations = sample_evaluations
    else:
        run.fold_evaluations = fold_evaluations

    if flow_id:
        message = f"Executed Task {task.task_id} with Flow id:{run.flow_id}"
    else:
        message = f"Executed Task {task.task_id} on local Flow with name {flow.name}."
    config.logger.info(message)

    return run

run_model_on_task #

run_model_on_task(model: Any, task: int | str | OpenMLTask, avoid_duplicate_runs: bool | None = None, flow_tags: list[str] | None = None, seed: int | None = None, add_local_measures: bool = True, upload_flow: bool = False, return_flow: bool = False, n_jobs: int | None = None) -> OpenMLRun | tuple[OpenMLRun, OpenMLFlow]

Run the model on the dataset defined by the task.

PARAMETER	DESCRIPTION
`model`	A model which has a function fit(X,Y) and predict(X), all supervised estimators of scikit learn follow this definition of a model. TYPE: `sklearn model`
`task`	Task to perform or Task id. This may be a model instead if the first argument is an OpenMLTask. TYPE: `OpenMLTask or int or str`
`avoid_duplicate_runs`	If True, the run will throw an error if the setup/task combination is already present on the server. This feature requires an internet connection. If not set, it will use the default from your openml configuration (False if unset). TYPE: `(bool, optional(default=None))` DEFAULT: `None`
`flow_tags`	A list of tags that the flow should have at creation. TYPE: `(List[str], optional(default=None))` DEFAULT: `None`
`seed`	Models that are not seeded will get this seed. TYPE: `int \| None` DEFAULT: `None`
`add_local_measures`	Determines whether to calculate a set of evaluation measures locally, to later verify server behaviour. TYPE: `(bool, optional(default=True))` DEFAULT: `True`
`upload_flow`	If True, upload the flow to OpenML if it does not exist yet. If False, do not upload the flow to OpenML. TYPE: `bool(default=False)` DEFAULT: `False`
`return_flow`	If True, returns the OpenMLFlow generated from the model in addition to the OpenMLRun. TYPE: `bool(default=False)` DEFAULT: `False`
`n_jobs`	The number of processes/threads to distribute the evaluation asynchronously. If `None` or `1`, then the evaluation is treated as synchronous and processed sequentially. If `-1`, then the job uses as many cores available. TYPE: `int(default=None)` DEFAULT: `None`

RETURNS	DESCRIPTION
`run`	Result of the run. TYPE: `OpenMLRun`
`flow`	Flow generated from the model. TYPE: OpenMLFlow (optional, only if `return_flow` is True).

Source code in openml/runs/functions.py

def run_model_on_task(  # noqa: PLR0913
    model: Any,
    task: int | str | OpenMLTask,
    avoid_duplicate_runs: bool | None = None,
    flow_tags: list[str] | None = None,
    seed: int | None = None,
    add_local_measures: bool = True,  # noqa: FBT002
    upload_flow: bool = False,  # noqa: FBT002
    return_flow: bool = False,  # noqa: FBT002
    n_jobs: int | None = None,
) -> OpenMLRun | tuple[OpenMLRun, OpenMLFlow]:
    """Run the model on the dataset defined by the task.

    Parameters
    ----------
    model : sklearn model
        A model which has a function fit(X,Y) and predict(X),
        all supervised estimators of scikit learn follow this definition of a model.
    task : OpenMLTask or int or str
        Task to perform or Task id.
        This may be a model instead if the first argument is an OpenMLTask.
    avoid_duplicate_runs : bool, optional (default=None)
        If True, the run will throw an error if the setup/task combination is already present on
        the server. This feature requires an internet connection.
        If not set, it will use the default from your openml configuration (False if unset).
    flow_tags : List[str], optional (default=None)
        A list of tags that the flow should have at creation.
    seed: int, optional (default=None)
        Models that are not seeded will get this seed.
    add_local_measures : bool, optional (default=True)
        Determines whether to calculate a set of evaluation measures locally,
        to later verify server behaviour.
    upload_flow : bool (default=False)
        If True, upload the flow to OpenML if it does not exist yet.
        If False, do not upload the flow to OpenML.
    return_flow : bool (default=False)
        If True, returns the OpenMLFlow generated from the model in addition to the OpenMLRun.
    n_jobs : int (default=None)
        The number of processes/threads to distribute the evaluation asynchronously.
        If `None` or `1`, then the evaluation is treated as synchronous and processed sequentially.
        If `-1`, then the job uses as many cores available.

    Returns
    -------
    run : OpenMLRun
        Result of the run.
    flow : OpenMLFlow (optional, only if `return_flow` is True).
        Flow generated from the model.
    """
    if avoid_duplicate_runs is None:
        avoid_duplicate_runs = openml.config.avoid_duplicate_runs
    if avoid_duplicate_runs and not config.apikey:
        warnings.warn(
            "avoid_duplicate_runs is set to True, but no API key is set. "
            "Please set your API key in the OpenML configuration file, see"
            "https://openml.github.io/openml-python/main/examples/20_basic/introduction_tutorial"
            ".html#authentication for more information on authentication.",
            RuntimeWarning,
            stacklevel=2,
        )

    # TODO: At some point in the future do not allow for arguments in old order (6-2018).
    # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
    # When removing this please also remove the method `is_estimator` from the extension
    # interface as it is only used here (MF, 3-2019)
    if isinstance(model, (int, str, OpenMLTask)):
        warnings.warn(
            "The old argument order (task, model) is deprecated and "
            "will not be supported in the future. Please use the "
            "order (model, task).",
            DeprecationWarning,
            stacklevel=2,
        )
        task, model = model, task

    extension = get_extension_by_model(model, raise_if_no_extension=True)
    if extension is None:
        # This should never happen and is only here to please mypy will be gone soon once the
        # whole function is removed
        raise TypeError(extension)

    flow = extension.model_to_flow(model)

    def get_task_and_type_conversion(_task: int | str | OpenMLTask) -> OpenMLTask:
        """Retrieve an OpenMLTask object from either an integer or string ID,
        or directly from an OpenMLTask object.

        Parameters
        ----------
        _task : Union[int, str, OpenMLTask]
            The task ID or the OpenMLTask object.

        Returns
        -------
        OpenMLTask
            The OpenMLTask object.
        """
        if isinstance(_task, (int, str)):
            return get_task(int(_task))  # type: ignore

        return _task

    task = get_task_and_type_conversion(task)

    run = run_flow_on_task(
        task=task,
        flow=flow,
        avoid_duplicate_runs=avoid_duplicate_runs,
        flow_tags=flow_tags,
        seed=seed,
        add_local_measures=add_local_measures,
        upload_flow=upload_flow,
        n_jobs=n_jobs,
    )
    if return_flow:
        return run, flow
    return run