Source code for flexeval.dependency_graph

"""Determines how configured metrics depend on each other."""

import json
from typing import Any

import networkx as nx

from flexeval.helpers import generate_hash
from flexeval.schema import eval_schema



[docs]
def create_metrics_graph(metrics: eval_schema.Metrics) -> list[Any]:
    """Input is the metrics dictionary with keys 'function' and 'rubric', each of which maps to a list
    Output is list of string representations of the nodes in the graph, in topological order

    Each entry and dependency will get an ID so they are easy to match later
    """

    # Create a directed graph
    G = nx.DiGraph()
    metric_graph_dict = {}

    # make an intermediate datastructure that adds IDs to all listed evaluations
    user_metrics_with_ids = {}
    for evaluation_type in ["function", "rubric"]:
        user_metrics_with_ids[evaluation_type] = []
        # add a hash to every metric in the list
        item_list: list[eval_schema.MetricItem] = getattr(metrics, evaluation_type)
        if item_list is not None:
            for item in item_list:
                metric_with_id = {"id": generate_hash()}
                for k, v in item.model_dump().items():
                    metric_with_id[k] = v
                user_metrics_with_ids[evaluation_type].append(metric_with_id)

    # now that all potential parents have IDs, find parents for each child
    for evaluation_type in ["function", "rubric"]:
        for metric_dict in user_metrics_with_ids[evaluation_type]:
            parent_metrics, depends_on_with_parent_ids = get_parent_metrics(
                all_metrics=user_metrics_with_ids, child=metric_dict
            )
            metric_dict["depends_on"] = depends_on_with_parent_ids

            child_metric_str, evaluation_name = get_metric_info(metric_dict)

            # Now construct the graph
            # Add an edge, which implicitly adds nodes where necessary
            if len(parent_metrics) > 0:
                for parent_metric_dict in parent_metrics:
                    parent_metric_str, _ = get_metric_info(parent_metric_dict)
                    G.add_edge(parent_metric_str, child_metric_str)
                # make 'canonical' representation of child
                metric_graph_dict[child_metric_str] = {
                    "evaluation_name": evaluation_name,  # function or rubric name
                    "evaluation_type": evaluation_type,
                }
                for k, v in metric_dict.items():
                    if k not in [
                        "function_name",
                        "rubric_name",
                        "type",
                        "name",
                    ]:
                        metric_graph_dict[child_metric_str][k] = v

                # # copy over details of parent metric that aren't already present
                # for k, v in parent_metric_dict.items():
                #     if k not in metric_graph_dict[child_metric]:
                #         metric_graph_dict[child_metric][k] = v
            else:
                # if there is no parent, just add a node by itself
                G.add_node(child_metric_str)
                metric_graph_dict[child_metric_str] = {
                    "evaluation_name": evaluation_name,  # function or rubric name
                    "evaluation_type": evaluation_type,
                }
                for k, v in metric_dict.items():
                    if k not in [
                        "function_name",
                        "rubric_name",
                        "type",
                        "name",
                    ]:
                        metric_graph_dict[child_metric_str][k] = v

    # Make string representation with all nodes for error printing in assertion
    graph_string = "Metric Dependencies:"
    for edge in G.edges():
        graph_string += f"\n{'' if edge[1] == 'root' else edge[1]} -> {edge[0]}"
    if not nx.is_directed_acyclic_graph(G):
        raise ValueError(
            "The set of metric dependencies must be acyclic! You have cyclical dependencies. {graph_string}"
        )

    # Set up sequence of evaluations
    # Perform topological sort
    # This is the order in which metrics will be evaluated
    # and the conditions under which they will be evaluated
    topological_order = list(nx.topological_sort(G))

    metric_graph = [metric_graph_dict[node] for node in topological_order]
    return metric_graph




[docs]
def get_metric_info(single_metric: dict) -> tuple[str, str]:
    """Input will be a single metric dictionary
    Output will be
    - string representation of metric using json.dumps
    - evaluation_name - function_name or rubric_name
    """
    return json.dumps(single_metric), single_metric.get("name")




[docs]
def get_parent_metrics(all_metrics: dict, child: dict) -> tuple[list, list]:
    """metrics_graph_ordered_list will be a list of metrics in order in which they should be run

    This function takes the eval represented by "child" and finds ALL evals in "all_metrics"
    that quality as the child's immediate parent

    An eval can qualify as a parent by having a matching name, type, context_only
    At this point, we won't have enough information to decide whether the child should be run
    (since the child might have additional requirements on the output of the parent)
    but this is enough to tell us that the child should be run AFTER the parent.
    """

    # if we use defaults in "depends_on", we might ends up with non-matches accidentally
    # for a dependency, multiple keys might be listed
    # We should find at least one parent that matches ALL of those key/value pairs, otherwise raise an error
    parents = []
    depends_on_with_id_added = []
    for requirement in child.get("depends_on", []):
        candidate_parents = []
        allowed_types = ["function", "rubric"]
        # if requirement has the type narrowed down, then narrow it down here too
        if "type" in requirement and requirement["type"] is not None:
            allowed_types = [requirement["type"]]
        for candidate_type in allowed_types:
            for candidate in all_metrics.get(candidate_type, []):
                # assume the candidate is a match unless demonstrated otherwise
                matches = True

                # if it's not the right type, don't match it
                if "type" in requirement and candidate_type not in allowed_types:
                    matches = False

                # if the conditionals are listed in the depends_on entry but don't match...
                # Only check conditionals that are explicitly specified (not None) in the requirement
                conditionals = ["metric_level", "context_only", "name", "kwargs"]
                for conditional in conditionals:
                    if (
                        conditional in requirement
                        and requirement.get(conditional) is not None
                        and requirement.get(conditional) != candidate.get(conditional)
                    ):
                        matches = False
                        break

                if matches:
                    candidate_parents.append(candidate)
                    requirement["parent_id"] = candidate["id"]
                    depends_on_with_id_added.append(requirement)
        if len(candidate_parents) == 0:
            raise ValueError(
                f"We were unable to locate any match for the `depends_on` entry `{json.dumps(requirement, indent=4)}` in the metric `{json.dumps(child, indent=4)}`. The full set of parent candidates is `{json.dumps(all_metrics, indent=4)}`."
            )
        if len(candidate_parents) > 1:
            raise ValueError(
                f"We located more than one match for the `depends_on` entry `{json.dumps(requirement, indent=4)}` in the metric `{json.dumps(child, indent=4)}`. The matches were `{json.dumps(candidate_parents, indent=4)}`. Please add another criterion to disambiguate."
            )
        parents += candidate_parents

    return parents, depends_on_with_id_added




[docs]
def apply_defaults(schema, data, path=None):
    # Initialize path as an empty list if None. This will store the navigation path in the schema.

    if path is None:
        path = []

    if data is None:
        # If data is None and defaults are specified, apply them
        return schema.get("default")

    if isinstance(data, dict):
        # Process dictionaries
        if "properties" in schema:
            # Loop over each schema property
            for key, subschema in schema["properties"].items():
                # Update path with current property
                new_path = path + [key]
                if key in data:
                    # Recursively apply defaults, pass the path along
                    data[key] = apply_defaults(subschema, data[key], new_path)
                elif "default" in subschema:
                    # Apply default if the key is not in the data
                    data[key] = subschema["default"]
                    # print("setting", path, key, subschema["default"])
        elif "items" in schema:
            if "properties" in schema["items"]:
                # Loop over each schema property
                for key, subschema in schema["items"]["properties"].items():
                    # Update path with current property
                    new_path = path + [key]
                    if key in data:
                        # Recursively apply defaults, pass the path along
                        data[key] = apply_defaults(subschema, data[key], new_path)
                    elif "default" in subschema:
                        # Apply default if the key is not in the data
                        data[key] = subschema["default"]

        if path == ["metrics", "function"]:
            data["type"] = "function"
        if path == ["metrics", "rubric"]:
            data["type"] = "rubric"

        return data

    if isinstance(data, list) and "items" in schema:
        # Process lists by applying defaults to each item
        item_schema = schema["items"]
        # Apply defaults to each item in the list, passing along the path
        return [apply_defaults(item_schema, item, path) for item in data]

    return data



# for verify installation
# if function_name is defined, rubric
# make sure "function" and "rubric" default to empty lists
# TODO - don't set defaults in "depends_on" to make matching more flexible
# evaluation_name: my_rubric
# evaluation_type: rubric
# metric_name: <