trace

openml.runs.trace #

OpenMLRunTrace #

OpenMLRunTrace(run_id: int | None, trace_iterations: dict[tuple[int, int, int], OpenMLTraceIteration])

OpenML Run Trace: parsed output from Run Trace call

PARAMETER	DESCRIPTION
`run_id`	OpenML run id. TYPE: `int`
`trace_iterations`	Mapping from key `(repeat, fold, iteration)` to an object of OpenMLTraceIteration. TYPE: `dict`

PARAMETER	DESCRIPTION
`run_id`	Id for which the trace content is to be stored. TYPE: `int`
`trace_iterations`	The trace content obtained by running a flow on a task. TYPE: `List[List]`

Source code in openml/runs/trace.py

def __init__(
    self,
    run_id: int | None,
    trace_iterations: dict[tuple[int, int, int], OpenMLTraceIteration],
):
    """Object to hold the trace content of a run.

    Parameters
    ----------
    run_id : int
        Id for which the trace content is to be stored.
    trace_iterations : List[List]
        The trace content obtained by running a flow on a task.
    """
    self.run_id = run_id
    self.trace_iterations = trace_iterations

generate `classmethod` #

generate(attributes: list[tuple[str, str]], content: list[list[int | float | str]]) -> OpenMLRunTrace

Generates an OpenMLRunTrace.

Generates the trace object from the attributes and content extracted while running the underlying flow.

PARAMETER	DESCRIPTION
`attributes`	List of tuples describing the arff attributes. TYPE: `list`
`content`	List of lists containing information about the individual tuning runs. TYPE: `list`

RETURNS	DESCRIPTION
`OpenMLRunTrace`

Source code in openml/runs/trace.py

@classmethod
def generate(
    cls,
    attributes: list[tuple[str, str]],
    content: list[list[int | float | str]],
) -> OpenMLRunTrace:
    """Generates an OpenMLRunTrace.

    Generates the trace object from the attributes and content extracted
    while running the underlying flow.

    Parameters
    ----------
    attributes : list
        List of tuples describing the arff attributes.

    content : list
        List of lists containing information about the individual tuning
        runs.

    Returns
    -------
    OpenMLRunTrace
    """
    if content is None:
        raise ValueError("Trace content not available.")
    if attributes is None:
        raise ValueError("Trace attributes not available.")
    if len(content) == 0:
        raise ValueError("Trace content is empty.")
    if len(attributes) != len(content[0]):
        raise ValueError(
            f"Trace_attributes and trace_content not compatible: {attributes} vs {content[0]}",
        )

    return cls._trace_from_arff_struct(
        attributes=attributes,
        content=content,
        error_message="setup_string not allowed when constructing a "
        "trace object from run results.",
    )

get_selected_iteration #

get_selected_iteration(fold: int, repeat: int) -> int

Returns the trace iteration that was marked as selected. In case multiple are marked as selected (should not happen) the first of these is returned

PARAMETER	DESCRIPTION
`fold`	TYPE: `int`
`repeat`	TYPE: `int`

RETURNS	DESCRIPTION
`int`	The trace iteration from the given fold and repeat that was selected as the best iteration by the search procedure

Source code in openml/runs/trace.py

def get_selected_iteration(self, fold: int, repeat: int) -> int:
    """
    Returns the trace iteration that was marked as selected. In
    case multiple are marked as selected (should not happen) the
    first of these is returned

    Parameters
    ----------
    fold: int

    repeat: int

    Returns
    -------
    int
        The trace iteration from the given fold and repeat that was
        selected as the best iteration by the search procedure
    """
    for r, f, i in self.trace_iterations:
        if r == repeat and f == fold and self.trace_iterations[(r, f, i)].selected is True:
            return i
    raise ValueError(f"Could not find the selected iteration for rep/fold {repeat}/{fold}")

merge_traces `classmethod` #

merge_traces(traces: list[OpenMLRunTrace]) -> OpenMLRunTrace

Merge multiple traces into a single trace.

PARAMETER	DESCRIPTION
`cls`	Type of the trace object to be created. TYPE: `type`
`traces`	List of traces to merge. TYPE: `List[OpenMLRunTrace]`

RETURNS	DESCRIPTION
`OpenMLRunTrace`	A trace object representing the merged traces.

RAISES	DESCRIPTION
`ValueError`	If the parameters in the iterations of the traces being merged are not equal. If a key (repeat, fold, iteration) is encountered twice while merging the traces.

Source code in openml/runs/trace.py

@classmethod
def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace:
    """Merge multiple traces into a single trace.

    Parameters
    ----------
    cls : type
        Type of the trace object to be created.
    traces : List[OpenMLRunTrace]
        List of traces to merge.

    Returns
    -------
    OpenMLRunTrace
        A trace object representing the merged traces.

    Raises
    ------
    ValueError
        If the parameters in the iterations of the traces being merged are not equal.
        If a key (repeat, fold, iteration) is encountered twice while merging the traces.
    """
    merged_trace: dict[tuple[int, int, int], OpenMLTraceIteration] = {}

    previous_iteration = None
    for trace in traces:
        for iteration in trace:
            key = (iteration.repeat, iteration.fold, iteration.iteration)

            if iteration.parameters is None:
                raise ValueError(
                    f"Iteration parameters cannot be None for repeat {iteration.repeat}, "
                    f"fold {iteration.fold}, iteration {iteration.iteration}"
                )
            param_keys = iteration.parameters.keys()

            if previous_iteration is not None:
                trace_itr = merged_trace[previous_iteration]

                if trace_itr.parameters is None:
                    raise ValueError(
                        f"Trace iteration parameters cannot be None "
                        f"for iteration {previous_iteration}"
                    )
                trace_itr_keys = trace_itr.parameters.keys()

                if list(param_keys) != list(trace_itr_keys):
                    raise ValueError(
                        "Cannot merge traces because the parameters are not equal: "
                        f"{list(trace_itr.parameters.keys())} vs "
                        f"{list(iteration.parameters.keys())}",
                    )

            if key in merged_trace:
                raise ValueError(
                    f"Cannot merge traces because key '{key}' was encountered twice",
                )

            merged_trace[key] = iteration
            previous_iteration = key

    return cls(None, merged_trace)

trace_from_arff `classmethod` #

trace_from_arff(arff_obj: dict[str, Any]) -> OpenMLRunTrace

Generate trace from arff trace.

Creates a trace file from arff object (for example, generated by a local run).

PARAMETER	DESCRIPTION
`arff_obj`	LIAC arff obj, dict containing attributes, relation, data. TYPE: `dict`

RETURNS	DESCRIPTION
`OpenMLRunTrace`

Source code in openml/runs/trace.py

@classmethod
def trace_from_arff(cls, arff_obj: dict[str, Any]) -> OpenMLRunTrace:
    """Generate trace from arff trace.

    Creates a trace file from arff object (for example, generated by a
    local run).

    Parameters
    ----------
    arff_obj : dict
        LIAC arff obj, dict containing attributes, relation, data.

    Returns
    -------
    OpenMLRunTrace
    """
    attributes = arff_obj["attributes"]
    content = arff_obj["data"]
    return cls._trace_from_arff_struct(
        attributes=attributes,
        content=content,
        error_message="setup_string not supported for arff serialization",
    )

trace_from_xml `classmethod` #

trace_from_xml(xml: str | Path | IO) -> OpenMLRunTrace

Generate trace from xml.

Creates a trace file from the xml description.

PARAMETER	DESCRIPTION
`xml`	An xml description that can be either a `string` or a file-like object. TYPE: `string \| file-like object`

RETURNS	DESCRIPTION
`run`	Object containing the run id and a dict containing the trace iterations. TYPE: `OpenMLRunTrace`

Source code in openml/runs/trace.py

@classmethod
def trace_from_xml(cls, xml: str | Path | IO) -> OpenMLRunTrace:
    """Generate trace from xml.

    Creates a trace file from the xml description.

    Parameters
    ----------
    xml : string | file-like object
        An xml description that can be either a `string` or a file-like
        object.

    Returns
    -------
    run : OpenMLRunTrace
        Object containing the run id and a dict containing the trace
        iterations.
    """
    if isinstance(xml, Path):
        xml = str(xml.absolute())

    result_dict = xmltodict.parse(xml, force_list=("oml:trace_iteration",))["oml:trace"]

    run_id = result_dict["oml:run_id"]
    trace = OrderedDict()

    if "oml:trace_iteration" not in result_dict:
        raise ValueError("Run does not contain valid trace. ")
    if not isinstance(result_dict["oml:trace_iteration"], list):
        raise TypeError(type(result_dict["oml:trace_iteration"]))

    for itt in result_dict["oml:trace_iteration"]:
        repeat = int(itt["oml:repeat"])
        fold = int(itt["oml:fold"])
        iteration = int(itt["oml:iteration"])
        setup_string = json.loads(itt["oml:setup_string"])
        evaluation = float(itt["oml:evaluation"])
        selected_value = itt["oml:selected"]
        if selected_value == "true":
            selected = True
        elif selected_value == "false":
            selected = False
        else:
            raise ValueError(
                'expected {"true", "false"} value for '
                f"selected field, received: {selected_value}",
            )

        current = OpenMLTraceIteration(
            repeat=repeat,
            fold=fold,
            iteration=iteration,
            setup_string=setup_string,
            evaluation=evaluation,
            selected=selected,
        )
        trace[(repeat, fold, iteration)] = current

    return cls(run_id, trace)

trace_to_arff #

trace_to_arff() -> dict[str, Any]

Generate the arff dictionary for uploading predictions to the server.

Uses the trace object to generate an arff dictionary representation.

RETURNS	DESCRIPTION
`arff_dict`	Dictionary representation of the ARFF file that will be uploaded. Contains information about the optimization trace. TYPE: `dict`

Source code in openml/runs/trace.py

def trace_to_arff(self) -> dict[str, Any]:
    """Generate the arff dictionary for uploading predictions to the server.

    Uses the trace object to generate an arff dictionary representation.

    Returns
    -------
    arff_dict : dict
        Dictionary representation of the ARFF file that will be uploaded.
        Contains information about the optimization trace.
    """
    if self.trace_iterations is None:
        raise ValueError("trace_iterations missing from the trace object")

    # attributes that will be in trace arff
    trace_attributes = [
        ("repeat", "NUMERIC"),
        ("fold", "NUMERIC"),
        ("iteration", "NUMERIC"),
        ("evaluation", "NUMERIC"),
        ("selected", ["true", "false"]),
    ]
    trace_attributes.extend(
        [
            (PREFIX + parameter, "STRING")
            for parameter in next(iter(self.trace_iterations.values())).get_parameters()
        ],
    )

    arff_dict: dict[str, Any] = {}
    data = []
    for trace_iteration in self.trace_iterations.values():
        tmp_list = []
        for _attr, _ in trace_attributes:
            if _attr.startswith(PREFIX):
                attr = _attr[len(PREFIX) :]
                value = trace_iteration.get_parameters()[attr]
            else:
                attr = _attr
                value = getattr(trace_iteration, attr)

            if attr == "selected":
                tmp_list.append("true" if value else "false")
            else:
                tmp_list.append(value)
        data.append(tmp_list)

    arff_dict["attributes"] = trace_attributes
    arff_dict["data"] = data
    # TODO allow to pass a trace description when running a flow
    arff_dict["relation"] = "Trace"
    return arff_dict

OpenMLTraceIteration `dataclass` #

OpenMLTraceIteration(repeat: int, fold: int, iteration: int, evaluation: float, selected: bool, setup_string: dict[str, str] | None = None, parameters: dict[str, str | int | float] | None = None)

OpenML Trace Iteration: parsed output from Run Trace call Exactly one of setup_string or parameters must be provided.

PARAMETER	DESCRIPTION
`repeat`	repeat number (in case of no repeats: 0) TYPE: `int`
`fold`	fold number (in case of no folds: 0) TYPE: `int`
`iteration`	iteration number of optimization procedure TYPE: `int`
`setup_string`	json string representing the parameters If not provided, `parameters` should be set. TYPE: `str` DEFAULT: `None`
`evaluation`	The evaluation that was awarded to this trace iteration. Measure is defined by the task TYPE: `double`
`selected`	Whether this was the best of all iterations, and hence selected for making predictions. Per fold/repeat there should be only one iteration selected TYPE: `bool`
`parameters`	Dictionary specifying parameter names and their values. If not provided, `setup_string` should be set. TYPE: `OrderedDict` DEFAULT: `None`

get_parameters #

get_parameters() -> dict[str, Any]

Get the parameters of this trace iteration.

Source code in openml/runs/trace.py

def get_parameters(self) -> dict[str, Any]:
    """Get the parameters of this trace iteration."""
    # parameters have prefix 'parameter_'
    if self.setup_string:
        return {
            param[len(PREFIX) :]: json.loads(value)
            for param, value in self.setup_string.items()
        }

    if self.parameters is None:
        raise ValueError("Parameters must be set before calling get_parameters().")
    return {param[len(PREFIX) :]: value for param, value in self.parameters.items()}

trace

openml.runs.trace #

OpenMLRunTrace #

generate classmethod #

get_selected_iteration #

merge_traces classmethod #

trace_from_arff classmethod #

trace_from_xml classmethod #

trace_to_arff #

OpenMLTraceIteration dataclass #

get_parameters #

generate `classmethod` #

merge_traces `classmethod` #

trace_from_arff `classmethod` #

trace_from_xml `classmethod` #

OpenMLTraceIteration `dataclass` #