Skip to content

trace

openml.runs.trace #

OpenMLRunTrace #

OpenMLRunTrace(run_id: int | None, trace_iterations: dict[tuple[int, int, int], OpenMLTraceIteration])

OpenML Run Trace: parsed output from Run Trace call

PARAMETER DESCRIPTION
run_id

OpenML run id.

TYPE: int

trace_iterations

Mapping from key (repeat, fold, iteration) to an object of OpenMLTraceIteration.

TYPE: dict

PARAMETER DESCRIPTION
run_id

Id for which the trace content is to be stored.

TYPE: int

trace_iterations

The trace content obtained by running a flow on a task.

TYPE: List[List]

Source code in openml/runs/trace.py
def __init__(
    self,
    run_id: int | None,
    trace_iterations: dict[tuple[int, int, int], OpenMLTraceIteration],
):
    """Object to hold the trace content of a run.

    Parameters
    ----------
    run_id : int
        Id for which the trace content is to be stored.
    trace_iterations : List[List]
        The trace content obtained by running a flow on a task.
    """
    self.run_id = run_id
    self.trace_iterations = trace_iterations

generate classmethod #

generate(attributes: list[tuple[str, str]], content: list[list[int | float | str]]) -> OpenMLRunTrace

Generates an OpenMLRunTrace.

Generates the trace object from the attributes and content extracted while running the underlying flow.

PARAMETER DESCRIPTION
attributes

List of tuples describing the arff attributes.

TYPE: list

content

List of lists containing information about the individual tuning runs.

TYPE: list

RETURNS DESCRIPTION
OpenMLRunTrace
Source code in openml/runs/trace.py
@classmethod
def generate(
    cls,
    attributes: list[tuple[str, str]],
    content: list[list[int | float | str]],
) -> OpenMLRunTrace:
    """Generates an OpenMLRunTrace.

    Generates the trace object from the attributes and content extracted
    while running the underlying flow.

    Parameters
    ----------
    attributes : list
        List of tuples describing the arff attributes.

    content : list
        List of lists containing information about the individual tuning
        runs.

    Returns
    -------
    OpenMLRunTrace
    """
    if content is None:
        raise ValueError("Trace content not available.")
    if attributes is None:
        raise ValueError("Trace attributes not available.")
    if len(content) == 0:
        raise ValueError("Trace content is empty.")
    if len(attributes) != len(content[0]):
        raise ValueError(
            f"Trace_attributes and trace_content not compatible: {attributes} vs {content[0]}",
        )

    return cls._trace_from_arff_struct(
        attributes=attributes,
        content=content,
        error_message="setup_string not allowed when constructing a "
        "trace object from run results.",
    )

get_selected_iteration #

get_selected_iteration(fold: int, repeat: int) -> int

Returns the trace iteration that was marked as selected. In case multiple are marked as selected (should not happen) the first of these is returned

PARAMETER DESCRIPTION
fold

TYPE: int

repeat

TYPE: int

RETURNS DESCRIPTION
int

The trace iteration from the given fold and repeat that was selected as the best iteration by the search procedure

Source code in openml/runs/trace.py
def get_selected_iteration(self, fold: int, repeat: int) -> int:
    """
    Returns the trace iteration that was marked as selected. In
    case multiple are marked as selected (should not happen) the
    first of these is returned

    Parameters
    ----------
    fold: int

    repeat: int

    Returns
    -------
    int
        The trace iteration from the given fold and repeat that was
        selected as the best iteration by the search procedure
    """
    for r, f, i in self.trace_iterations:
        if r == repeat and f == fold and self.trace_iterations[(r, f, i)].selected is True:
            return i
    raise ValueError(f"Could not find the selected iteration for rep/fold {repeat}/{fold}")

merge_traces classmethod #

merge_traces(traces: list[OpenMLRunTrace]) -> OpenMLRunTrace

Merge multiple traces into a single trace.

PARAMETER DESCRIPTION
cls

Type of the trace object to be created.

TYPE: type

traces

List of traces to merge.

TYPE: List[OpenMLRunTrace]

RETURNS DESCRIPTION
OpenMLRunTrace

A trace object representing the merged traces.

RAISES DESCRIPTION
ValueError

If the parameters in the iterations of the traces being merged are not equal. If a key (repeat, fold, iteration) is encountered twice while merging the traces.

Source code in openml/runs/trace.py
@classmethod
def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace:
    """Merge multiple traces into a single trace.

    Parameters
    ----------
    cls : type
        Type of the trace object to be created.
    traces : List[OpenMLRunTrace]
        List of traces to merge.

    Returns
    -------
    OpenMLRunTrace
        A trace object representing the merged traces.

    Raises
    ------
    ValueError
        If the parameters in the iterations of the traces being merged are not equal.
        If a key (repeat, fold, iteration) is encountered twice while merging the traces.
    """
    merged_trace: dict[tuple[int, int, int], OpenMLTraceIteration] = {}

    previous_iteration = None
    for trace in traces:
        for iteration in trace:
            key = (iteration.repeat, iteration.fold, iteration.iteration)

            assert iteration.parameters is not None
            param_keys = iteration.parameters.keys()

            if previous_iteration is not None:
                trace_itr = merged_trace[previous_iteration]

                assert trace_itr.parameters is not None
                trace_itr_keys = trace_itr.parameters.keys()

                if list(param_keys) != list(trace_itr_keys):
                    raise ValueError(
                        "Cannot merge traces because the parameters are not equal: "
                        f"{list(trace_itr.parameters.keys())} vs "
                        f"{list(iteration.parameters.keys())}",
                    )

            if key in merged_trace:
                raise ValueError(
                    f"Cannot merge traces because key '{key}' was encountered twice",
                )

            merged_trace[key] = iteration
            previous_iteration = key

    return cls(None, merged_trace)

trace_from_arff classmethod #

trace_from_arff(arff_obj: dict[str, Any]) -> OpenMLRunTrace

Generate trace from arff trace.

Creates a trace file from arff object (for example, generated by a local run).

PARAMETER DESCRIPTION
arff_obj

LIAC arff obj, dict containing attributes, relation, data.

TYPE: dict

RETURNS DESCRIPTION
OpenMLRunTrace
Source code in openml/runs/trace.py
@classmethod
def trace_from_arff(cls, arff_obj: dict[str, Any]) -> OpenMLRunTrace:
    """Generate trace from arff trace.

    Creates a trace file from arff object (for example, generated by a
    local run).

    Parameters
    ----------
    arff_obj : dict
        LIAC arff obj, dict containing attributes, relation, data.

    Returns
    -------
    OpenMLRunTrace
    """
    attributes = arff_obj["attributes"]
    content = arff_obj["data"]
    return cls._trace_from_arff_struct(
        attributes=attributes,
        content=content,
        error_message="setup_string not supported for arff serialization",
    )

trace_from_xml classmethod #

trace_from_xml(xml: str | Path | IO) -> OpenMLRunTrace

Generate trace from xml.

Creates a trace file from the xml description.

PARAMETER DESCRIPTION
xml

An xml description that can be either a string or a file-like object.

TYPE: string | file-like object

RETURNS DESCRIPTION
run

Object containing the run id and a dict containing the trace iterations.

TYPE: OpenMLRunTrace

Source code in openml/runs/trace.py
@classmethod
def trace_from_xml(cls, xml: str | Path | IO) -> OpenMLRunTrace:
    """Generate trace from xml.

    Creates a trace file from the xml description.

    Parameters
    ----------
    xml : string | file-like object
        An xml description that can be either a `string` or a file-like
        object.

    Returns
    -------
    run : OpenMLRunTrace
        Object containing the run id and a dict containing the trace
        iterations.
    """
    if isinstance(xml, Path):
        xml = str(xml.absolute())

    result_dict = xmltodict.parse(xml, force_list=("oml:trace_iteration",))["oml:trace"]

    run_id = result_dict["oml:run_id"]
    trace = OrderedDict()

    if "oml:trace_iteration" not in result_dict:
        raise ValueError("Run does not contain valid trace. ")
    if not isinstance(result_dict["oml:trace_iteration"], list):
        raise TypeError(type(result_dict["oml:trace_iteration"]))

    for itt in result_dict["oml:trace_iteration"]:
        repeat = int(itt["oml:repeat"])
        fold = int(itt["oml:fold"])
        iteration = int(itt["oml:iteration"])
        setup_string = json.loads(itt["oml:setup_string"])
        evaluation = float(itt["oml:evaluation"])
        selected_value = itt["oml:selected"]
        if selected_value == "true":
            selected = True
        elif selected_value == "false":
            selected = False
        else:
            raise ValueError(
                'expected {"true", "false"} value for '
                f"selected field, received: {selected_value}",
            )

        current = OpenMLTraceIteration(
            repeat=repeat,
            fold=fold,
            iteration=iteration,
            setup_string=setup_string,
            evaluation=evaluation,
            selected=selected,
        )
        trace[(repeat, fold, iteration)] = current

    return cls(run_id, trace)

trace_to_arff #

trace_to_arff() -> dict[str, Any]

Generate the arff dictionary for uploading predictions to the server.

Uses the trace object to generate an arff dictionary representation.

RETURNS DESCRIPTION
arff_dict

Dictionary representation of the ARFF file that will be uploaded. Contains information about the optimization trace.

TYPE: dict

Source code in openml/runs/trace.py
def trace_to_arff(self) -> dict[str, Any]:
    """Generate the arff dictionary for uploading predictions to the server.

    Uses the trace object to generate an arff dictionary representation.

    Returns
    -------
    arff_dict : dict
        Dictionary representation of the ARFF file that will be uploaded.
        Contains information about the optimization trace.
    """
    if self.trace_iterations is None:
        raise ValueError("trace_iterations missing from the trace object")

    # attributes that will be in trace arff
    trace_attributes = [
        ("repeat", "NUMERIC"),
        ("fold", "NUMERIC"),
        ("iteration", "NUMERIC"),
        ("evaluation", "NUMERIC"),
        ("selected", ["true", "false"]),
    ]
    trace_attributes.extend(
        [
            (PREFIX + parameter, "STRING")
            for parameter in next(iter(self.trace_iterations.values())).get_parameters()
        ],
    )

    arff_dict: dict[str, Any] = {}
    data = []
    for trace_iteration in self.trace_iterations.values():
        tmp_list = []
        for _attr, _ in trace_attributes:
            if _attr.startswith(PREFIX):
                attr = _attr[len(PREFIX) :]
                value = trace_iteration.get_parameters()[attr]
            else:
                attr = _attr
                value = getattr(trace_iteration, attr)

            if attr == "selected":
                tmp_list.append("true" if value else "false")
            else:
                tmp_list.append(value)
        data.append(tmp_list)

    arff_dict["attributes"] = trace_attributes
    arff_dict["data"] = data
    # TODO allow to pass a trace description when running a flow
    arff_dict["relation"] = "Trace"
    return arff_dict

OpenMLTraceIteration dataclass #

OpenMLTraceIteration(repeat: int, fold: int, iteration: int, evaluation: float, selected: bool, setup_string: dict[str, str] | None = None, parameters: dict[str, str | int | float] | None = None)

OpenML Trace Iteration: parsed output from Run Trace call Exactly one of setup_string or parameters must be provided.

PARAMETER DESCRIPTION
repeat

repeat number (in case of no repeats: 0)

TYPE: int

fold

fold number (in case of no folds: 0)

TYPE: int

iteration

iteration number of optimization procedure

TYPE: int

setup_string

json string representing the parameters If not provided, parameters should be set.

TYPE: str DEFAULT: None

evaluation

The evaluation that was awarded to this trace iteration. Measure is defined by the task

TYPE: double

selected

Whether this was the best of all iterations, and hence selected for making predictions. Per fold/repeat there should be only one iteration selected

TYPE: bool

parameters

Dictionary specifying parameter names and their values. If not provided, setup_string should be set.

TYPE: OrderedDict DEFAULT: None

get_parameters #

get_parameters() -> dict[str, Any]

Get the parameters of this trace iteration.

Source code in openml/runs/trace.py
def get_parameters(self) -> dict[str, Any]:
    """Get the parameters of this trace iteration."""
    # parameters have prefix 'parameter_'
    if self.setup_string:
        return {
            param[len(PREFIX) :]: json.loads(value)
            for param, value in self.setup_string.items()
        }

    assert self.parameters is not None
    return {param[len(PREFIX) :]: value for param, value in self.parameters.items()}