functions

openml.evaluations.functions #

__list_evaluations #

__list_evaluations(api_call: str) -> list[OpenMLEvaluation]

Helper function to parse API calls which are lists of runs

Source code in openml/evaluations/functions.py

def __list_evaluations(api_call: str) -> list[OpenMLEvaluation]:
    """Helper function to parse API calls which are lists of runs"""
    xml_string = openml._api_calls._perform_api_call(api_call, "get")
    evals_dict = xmltodict.parse(xml_string, force_list=("oml:evaluation",))
    # Minimalistic check if the XML is useful
    if "oml:evaluations" not in evals_dict:
        raise ValueError(
            f'Error in return XML, does not contain "oml:evaluations": {evals_dict!s}',
        )

    assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), (
        "Expected 'oml:evaluation' to be a list, but got"
        f"{type(evals_dict['oml:evaluations']['oml:evaluation']).__name__}. "
    )

    uploader_ids = list(
        {eval_["oml:uploader"] for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]},
    )
    api_users = "user/list/user_id/" + ",".join(uploader_ids)
    xml_string_user = openml._api_calls._perform_api_call(api_users, "get")

    users = xmltodict.parse(xml_string_user, force_list=("oml:user",))
    user_dict = {user["oml:id"]: user["oml:username"] for user in users["oml:users"]["oml:user"]}

    evals = []
    for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]:
        run_id = int(eval_["oml:run_id"])
        value = float(eval_["oml:value"]) if "oml:value" in eval_ else None
        values = json.loads(eval_["oml:values"]) if eval_.get("oml:values", None) else None
        array_data = eval_.get("oml:array_data")

        evals.append(
            OpenMLEvaluation(
                run_id=run_id,
                task_id=int(eval_["oml:task_id"]),
                setup_id=int(eval_["oml:setup_id"]),
                flow_id=int(eval_["oml:flow_id"]),
                flow_name=eval_["oml:flow_name"],
                data_id=int(eval_["oml:data_id"]),
                data_name=eval_["oml:data_name"],
                function=eval_["oml:function"],
                upload_time=eval_["oml:upload_time"],
                uploader=int(eval_["oml:uploader"]),
                uploader_name=user_dict[eval_["oml:uploader"]],
                value=value,
                values=values,
                array_data=array_data,
            )
        )

    return evals

list_estimation_procedures #

list_estimation_procedures() -> list[str]

Return list of evaluation procedures available.

The function performs an API call to retrieve the entire list of evaluation procedures' names that are available.

RETURNS	DESCRIPTION
`list`

Source code in openml/evaluations/functions.py

def list_estimation_procedures() -> list[str]:
    """Return list of evaluation procedures available.

    The function performs an API call to retrieve the entire list of
    evaluation procedures' names that are available.

    Returns
    -------
    list
    """
    api_call = "estimationprocedure/list"
    xml_string = openml._api_calls._perform_api_call(api_call, "get")
    api_results = xmltodict.parse(xml_string)

    # Minimalistic check if the XML is useful
    if "oml:estimationprocedures" not in api_results:
        raise ValueError('Error in return XML, does not contain "oml:estimationprocedures"')

    if "oml:estimationprocedure" not in api_results["oml:estimationprocedures"]:
        raise ValueError('Error in return XML, does not contain "oml:estimationprocedure"')

    if not isinstance(api_results["oml:estimationprocedures"]["oml:estimationprocedure"], list):
        raise TypeError('Error in return XML, does not contain "oml:estimationprocedure" as a list')

    return [
        prod["oml:name"]
        for prod in api_results["oml:estimationprocedures"]["oml:estimationprocedure"]
    ]

list_evaluation_measures #

list_evaluation_measures() -> list[str]

Return list of evaluation measures available.

The function performs an API call to retrieve the entire list of evaluation measures that are available.

RETURNS	DESCRIPTION
`list`

Source code in openml/evaluations/functions.py

def list_evaluation_measures() -> list[str]:
    """Return list of evaluation measures available.

    The function performs an API call to retrieve the entire list of
    evaluation measures that are available.

    Returns
    -------
    list

    """
    api_call = "evaluationmeasure/list"
    xml_string = openml._api_calls._perform_api_call(api_call, "get")
    qualities = xmltodict.parse(xml_string, force_list=("oml:measures"))
    # Minimalistic check if the XML is useful
    if "oml:evaluation_measures" not in qualities:
        raise ValueError('Error in return XML, does not contain "oml:evaluation_measures"')

    if not isinstance(qualities["oml:evaluation_measures"]["oml:measures"][0]["oml:measure"], list):
        raise TypeError('Error in return XML, does not contain "oml:measure" as a list')

    return qualities["oml:evaluation_measures"]["oml:measures"][0]["oml:measure"]

list_evaluations #

list_evaluations(function: str, offset: int | None = None, size: int | None = None, tasks: list[str | int] | None = None, setups: list[str | int] | None = None, flows: list[str | int] | None = None, runs: list[str | int] | None = None, uploaders: list[str | int] | None = None, tag: str | None = None, study: int | None = None, per_fold: bool | None = None, sort_order: str | None = None, output_format: Literal['dataframe'] = ...) -> DataFrame

list_evaluations(function: str, offset: int | None = None, size: int | None = None, tasks: list[str | int] | None = None, setups: list[str | int] | None = None, flows: list[str | int] | None = None, runs: list[str | int] | None = None, uploaders: list[str | int] | None = None, tag: str | None = None, study: int | None = None, per_fold: bool | None = None, sort_order: str | None = None, output_format: Literal['object'] = 'object') -> dict[int, OpenMLEvaluation]

list_evaluations(function: str, offset: int | None = None, size: int | None = None, tasks: list[str | int] | None = None, setups: list[str | int] | None = None, flows: list[str | int] | None = None, runs: list[str | int] | None = None, uploaders: list[str | int] | None = None, tag: str | None = None, study: int | None = None, per_fold: bool | None = None, sort_order: str | None = None, output_format: Literal['object', 'dataframe'] = 'object') -> dict[int, OpenMLEvaluation] | DataFrame

List all run-evaluation pairs matching all of the given filters.

(Supports large amount of results)

PARAMETER	DESCRIPTION
`function`	the evaluation function. e.g., predictive_accuracy TYPE: `str`
`offset`	the number of runs to skip, starting from the first TYPE: `int` DEFAULT: `None`
`size`	The maximum number of runs to show. If set to `None`, it returns all the results. TYPE: `int` DEFAULT: `10000`
`tasks`	the list of task IDs TYPE: `list[int, str]` DEFAULT: `None`
`setups`	the list of setup IDs TYPE: `list[str \| int] \| None` DEFAULT: `None`
`flows`	the list of flow IDs TYPE: `list[int, str]` DEFAULT: `None`
`runs`	the list of run IDs TYPE: `list[str \| int] \| None` DEFAULT: `None`
`uploaders`	the list of uploader IDs TYPE: `list[int, str]` DEFAULT: `None`
`tag`	filter evaluation based on given tag TYPE: `str` DEFAULT: `None`
`study`	TYPE: `int` DEFAULT: `None`
`per_fold`	TYPE: `bool` DEFAULT: `None`
`sort_order`	order of sorting evaluations, ascending ("asc") or descending ("desc") TYPE: `str` DEFAULT: `None`
`output_format`	The parameter decides the format of the output. - If 'object' the output is a dict of OpenMLEvaluation objects - If 'dataframe' the output is a pandas DataFrame TYPE: `Literal['object', 'dataframe']` DEFAULT: `'object'`

RETURNS	DESCRIPTION
`dict or dataframe`

Source code in openml/evaluations/functions.py

def list_evaluations(
    function: str,
    offset: int | None = None,
    size: int | None = None,
    tasks: list[str | int] | None = None,
    setups: list[str | int] | None = None,
    flows: list[str | int] | None = None,
    runs: list[str | int] | None = None,
    uploaders: list[str | int] | None = None,
    tag: str | None = None,
    study: int | None = None,
    per_fold: bool | None = None,
    sort_order: str | None = None,
    output_format: Literal["object", "dataframe"] = "object",
) -> dict[int, OpenMLEvaluation] | pd.DataFrame:
    """List all run-evaluation pairs matching all of the given filters.

    (Supports large amount of results)

    Parameters
    ----------
    function : str
        the evaluation function. e.g., predictive_accuracy
    offset : int, optional
        the number of runs to skip, starting from the first
    size : int, default 10000
        The maximum number of runs to show.
        If set to ``None``, it returns all the results.

    tasks : list[int,str], optional
        the list of task IDs
    setups: list[int,str], optional
        the list of setup IDs
    flows : list[int,str], optional
        the list of flow IDs
    runs :list[int,str], optional
        the list of run IDs
    uploaders : list[int,str], optional
        the list of uploader IDs
    tag : str, optional
        filter evaluation based on given tag

    study : int, optional

    per_fold : bool, optional

    sort_order : str, optional
       order of sorting evaluations, ascending ("asc") or descending ("desc")

    output_format: str, optional (default='object')
        The parameter decides the format of the output.
        - If 'object' the output is a dict of OpenMLEvaluation objects
        - If 'dataframe' the output is a pandas DataFrame

    Returns
    -------
    dict or dataframe
    """
    if output_format not in ("dataframe", "object"):
        raise ValueError("Invalid output format. Only 'object', 'dataframe'.")

    per_fold_str = None
    if per_fold is not None:
        per_fold_str = str(per_fold).lower()

    listing_call = partial(
        _list_evaluations,
        function=function,
        tasks=tasks,
        setups=setups,
        flows=flows,
        runs=runs,
        uploaders=uploaders,
        tag=tag,
        study=study,
        sort_order=sort_order,
        per_fold=per_fold_str,
    )
    eval_collection = openml.utils._list_all(listing_call, offset=offset, limit=size)

    flattened = list(chain.from_iterable(eval_collection))
    if output_format == "dataframe":
        records = [item._to_dict() for item in flattened]
        return pd.DataFrame.from_records(records)  # No index...

    return {e.run_id: e for e in flattened}

list_evaluations_setups #

list_evaluations_setups(function: str, offset: int | None = None, size: int | None = None, tasks: list | None = None, setups: list | None = None, flows: list | None = None, runs: list | None = None, uploaders: list | None = None, tag: str | None = None, per_fold: bool | None = None, sort_order: str | None = None, parameters_in_separate_columns: bool = False) -> DataFrame

List all run-evaluation pairs matching all of the given filters and their hyperparameter settings.

PARAMETER	DESCRIPTION
`function`	the evaluation function. e.g., predictive_accuracy TYPE: `str`
`offset`	the number of runs to skip, starting from the first TYPE: `int` DEFAULT: `None`
`size`	the maximum number of runs to show TYPE: `int` DEFAULT: `None`
`tasks`	the list of task IDs TYPE: `list[int]` DEFAULT: `None`
`setups`	the list of setup IDs TYPE: `list \| None` DEFAULT: `None`
`flows`	the list of flow IDs TYPE: `list[int]` DEFAULT: `None`
`runs`	the list of run IDs TYPE: `list[int]` DEFAULT: `None`
`uploaders`	the list of uploader IDs TYPE: `list[int]` DEFAULT: `None`
`tag`	filter evaluation based on given tag TYPE: `str` DEFAULT: `None`
`per_fold`	TYPE: `bool` DEFAULT: `None`
`sort_order`	order of sorting evaluations, ascending ("asc") or descending ("desc") TYPE: `str` DEFAULT: `None`
`parameters_in_separate_columns`	Returns hyperparameters in separate columns if set to True. Valid only for a single flow TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`dataframe with hyperparameter settings as a list of tuples.`

Source code in openml/evaluations/functions.py

def list_evaluations_setups(
    function: str,
    offset: int | None = None,
    size: int | None = None,
    tasks: list | None = None,
    setups: list | None = None,
    flows: list | None = None,
    runs: list | None = None,
    uploaders: list | None = None,
    tag: str | None = None,
    per_fold: bool | None = None,
    sort_order: str | None = None,
    parameters_in_separate_columns: bool = False,  # noqa: FBT002
) -> pd.DataFrame:
    """List all run-evaluation pairs matching all of the given filters
    and their hyperparameter settings.

    Parameters
    ----------
    function : str
        the evaluation function. e.g., predictive_accuracy
    offset : int, optional
        the number of runs to skip, starting from the first
    size : int, optional
        the maximum number of runs to show
    tasks : list[int], optional
        the list of task IDs
    setups: list[int], optional
        the list of setup IDs
    flows : list[int], optional
        the list of flow IDs
    runs : list[int], optional
        the list of run IDs
    uploaders : list[int], optional
        the list of uploader IDs
    tag : str, optional
        filter evaluation based on given tag
    per_fold : bool, optional
    sort_order : str, optional
       order of sorting evaluations, ascending ("asc") or descending ("desc")
    parameters_in_separate_columns: bool, optional (default= False)
        Returns hyperparameters in separate columns if set to True.
        Valid only for a single flow

    Returns
    -------
    dataframe with hyperparameter settings as a list of tuples.
    """
    if parameters_in_separate_columns and (flows is None or len(flows) != 1):
        raise ValueError("Can set parameters_in_separate_columns to true only for single flow_id")

    # List evaluations
    evals = list_evaluations(
        function=function,
        offset=offset,
        size=size,
        runs=runs,
        tasks=tasks,
        setups=setups,
        flows=flows,
        uploaders=uploaders,
        tag=tag,
        per_fold=per_fold,
        sort_order=sort_order,
        output_format="dataframe",
    )
    # List setups
    # list_setups by setup id does not support large sizes (exceeds URL length limit)
    # Hence we split the list of unique setup ids returned by list_evaluations into chunks of size N
    _df = pd.DataFrame()
    if len(evals) != 0:
        N = 100  # size of section
        uniq = np.asarray(evals["setup_id"].unique())
        length = len(uniq)

        # array_split - allows indices_or_sections to not equally divide the array
        # array_split -length % N sub-arrays of size length//N + 1 and the rest of size length//N.
        split_size = ((length - 1) // N) + 1
        setup_chunks = np.array_split(uniq, split_size)

        setup_data = pd.DataFrame()
        for _setups in setup_chunks:
            result = openml.setups.list_setups(setup=_setups, output_format="dataframe")
            assert isinstance(result, pd.DataFrame)
            result = result.drop("flow_id", axis=1)
            # concat resulting setup chunks into single datframe
            setup_data = pd.concat([setup_data, result])

        parameters = []
        # Convert parameters of setup into dict of (hyperparameter, value)
        for parameter_dict in setup_data["parameters"]:
            if parameter_dict is not None:
                parameters.append(
                    {param["full_name"]: param["value"] for param in parameter_dict.values()},
                )
            else:
                parameters.append({})
        setup_data["parameters"] = parameters
        # Merge setups with evaluations
        _df = evals.merge(setup_data, on="setup_id", how="left")

    if parameters_in_separate_columns:
        _df = pd.concat(
            [_df.drop("parameters", axis=1), _df["parameters"].apply(pd.Series)],
            axis=1,
        )

    return _df