flexeval.schema.eval_schema#

pydantic model flexeval.schema.eval_schema.CompletionLlm[source]#

Bases: BaseModel

Show JSON schema

{
   "title": "CompletionLlm",
   "type": "object",
   "properties": {
      "function_name": {
         "description": "Completion function defined in `completion_functions.py` or available in the global namespace.",
         "title": "Function Name",
         "type": "string"
      },
      "include_system_prompt": {
         "default": true,
         "title": "Include System Prompt",
         "type": "boolean"
      },
      "kwargs": {
         "additionalProperties": true,
         "description": "Additional arguments that will be passed to the completion function. Must correspond to arguments in the named function.",
         "title": "Kwargs",
         "type": "object"
      }
   },
   "additionalProperties": false,
   "required": [
      "function_name"
   ]
}

Config:

extra: str = forbid

Fields:

function_name (str)
include_system_prompt (bool)
kwargs (Dict[str, Any])

field function_name: str [Required]#: Completion function defined in completion_functions.py or available in the global namespace.

field include_system_prompt: bool = True#

field kwargs: Dict[str, Any] [Optional]#: Additional arguments that will be passed to the completion function. Must correspond to arguments in the named function.

class Config[source]#

Bases: object

extra = 'forbid'#

pydantic model flexeval.schema.eval_schema.DependsOnItem[source]#

Bases: BaseModel

Defines a metric dependency.

Show JSON schema

{
   "title": "DependsOnItem",
   "description": "Defines a metric dependency.",
   "type": "object",
   "properties": {
      "name": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "Name of the dependency function or rubric.",
         "title": "Name"
      },
      "type": {
         "anyOf": [
            {
               "enum": [
                  "function",
                  "rubric"
               ],
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "One of 'function' or 'rubric' indicating the type of the dependency.",
         "title": "Type"
      },
      "kwargs": {
         "anyOf": [
            {
               "additionalProperties": true,
               "type": "object"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The keyword arguments for the dependency. If provided, used to match which evaluation this dependency is for, so must match the keyword args given for some evaluation.",
         "title": "Kwargs"
      },
      "metric_name": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "Name of the metric dependency. This may be different than function_name if the metric function returns a key/value pair - in which case, this will match the key.",
         "title": "Metric Name"
      },
      "metric_level": {
         "anyOf": [
            {
               "enum": [
                  "Message",
                  "Turn",
                  "Thread",
                  "ToolCall"
               ],
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The level of the metric to depend on, which must be equal to or 'greater' than the dependent metric's level. e.g. a Turn can depend on a Thread metric, but not the reverse.",
         "title": "Metric Level"
      },
      "relative_object_position": {
         "default": 0,
         "description": "The position of the object within the Thread. If 0 (default), this is the metric value for the current object. If -1, this is the metric value for the most recent object before this one.",
         "maximum": 0,
         "title": "Relative Object Position",
         "type": "integer"
      },
      "metric_min_value": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": -1.7976931348623157e+308,
         "description": "Minimum value of the dependency to consider it as satisfied.",
         "title": "Metric Min Value"
      },
      "metric_max_value": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": 1.7976931348623157e+308,
         "description": "Maximum value of the dependency to consider it as satisfied.",
         "title": "Metric Max Value"
      }
   },
   "additionalProperties": false
}

Config:

extra: str = forbid

Fields:

kwargs (Dict[str, Any] | None)
metric_level (Literal['Message', 'Turn', 'Thread', 'ToolCall'] | None)
metric_max_value (float | None)
metric_min_value (float | None)
metric_name (str | None)
name (str | None)
relative_object_position (int)
type (Literal['function', 'rubric'] | None)

field kwargs: Dict[str, Any] | None = None#: The keyword arguments for the dependency. If provided, used to match which evaluation this dependency is for, so must match the keyword args given for some evaluation.

field metric_level: MetricLevel | None = None#: The level of the metric to depend on, which must be equal to or ‘greater’ than the dependent metric’s level. e.g. a Turn can depend on a Thread metric, but not the reverse.

field metric_max_value: float | None = 1.7976931348623157e+308#: Maximum value of the dependency to consider it as satisfied.

field metric_min_value: float | None = -1.7976931348623157e+308#: Minimum value of the dependency to consider it as satisfied.

field metric_name: str | None = None#: Name of the metric dependency. This may be different than function_name if the metric function returns a key/value pair - in which case, this will match the key.

field name: str | None = None#: Name of the dependency function or rubric.

field relative_object_position: int = 0#

The position of the object within the Thread. If 0 (default), this is the metric value for the current object. If -1, this is the metric value for the most recent object before this one.

Constraints:

strict = True
le = 0

field type: Literal['function', 'rubric'] | None = None#: One of ‘function’ or ‘rubric’ indicating the type of the dependency.

class Config[source]#

Bases: object

extra = 'forbid'#

pydantic model flexeval.schema.eval_schema.Eval[source]#

Bases: BaseModel

Defines the evaluation that should be executed.

The key fields are metrics and grader_llm.

Show JSON schema

{
   "title": "Eval",
   "description": "Defines the evaluation that should be executed.\n\nThe key fields are :attr:`metrics` and :attr:`grader_llm`.",
   "type": "object",
   "properties": {
      "do_completion": {
         "default": false,
         "description": "Flag to determine if completions should be done in each thread. Set to 'true' if you are testing a new API and want to evaluate the API responses. Set to 'false' (default) if you are evaluating past conversations and do not need to generate new completions.",
         "title": "Do Completion",
         "type": "boolean"
      },
      "name": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "Name of the test suite. Used as metadata only. Does not need to match the key of the entry in the evals.yaml file.",
         "title": "Name"
      },
      "notes": {
         "default": "",
         "description": "Additional notes regarding the configuration. Used as metadata only.",
         "title": "Notes",
         "type": "string"
      },
      "metrics": {
         "$ref": "#/$defs/Metrics",
         "description": "Metrics to use in the evaluation."
      },
      "completion_llm": {
         "anyOf": [
            {
               "$ref": "#/$defs/CompletionLlm"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "Specification of the LLM or API used to perform new completions. Must be defined if `do_completions: true` is set."
      },
      "grader_llm": {
         "anyOf": [
            {
               "$ref": "#/$defs/GraderLlm"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "Specification of the LLM or API used to grade rubrics. Must be defined if any rubric_metrics are specified."
      }
   },
   "$defs": {
      "CompletionLlm": {
         "additionalProperties": false,
         "properties": {
            "function_name": {
               "description": "Completion function defined in `completion_functions.py` or available in the global namespace.",
               "title": "Function Name",
               "type": "string"
            },
            "include_system_prompt": {
               "default": true,
               "title": "Include System Prompt",
               "type": "boolean"
            },
            "kwargs": {
               "additionalProperties": true,
               "description": "Additional arguments that will be passed to the completion function. Must correspond to arguments in the named function.",
               "title": "Kwargs",
               "type": "object"
            }
         },
         "required": [
            "function_name"
         ],
         "title": "CompletionLlm",
         "type": "object"
      },
      "DependsOnItem": {
         "additionalProperties": false,
         "description": "Defines a metric dependency.",
         "properties": {
            "name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "Name of the dependency function or rubric.",
               "title": "Name"
            },
            "type": {
               "anyOf": [
                  {
                     "enum": [
                        "function",
                        "rubric"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "One of 'function' or 'rubric' indicating the type of the dependency.",
               "title": "Type"
            },
            "kwargs": {
               "anyOf": [
                  {
                     "additionalProperties": true,
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The keyword arguments for the dependency. If provided, used to match which evaluation this dependency is for, so must match the keyword args given for some evaluation.",
               "title": "Kwargs"
            },
            "metric_name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "Name of the metric dependency. This may be different than function_name if the metric function returns a key/value pair - in which case, this will match the key.",
               "title": "Metric Name"
            },
            "metric_level": {
               "anyOf": [
                  {
                     "enum": [
                        "Message",
                        "Turn",
                        "Thread",
                        "ToolCall"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The level of the metric to depend on, which must be equal to or 'greater' than the dependent metric's level. e.g. a Turn can depend on a Thread metric, but not the reverse.",
               "title": "Metric Level"
            },
            "relative_object_position": {
               "default": 0,
               "description": "The position of the object within the Thread. If 0 (default), this is the metric value for the current object. If -1, this is the metric value for the most recent object before this one.",
               "maximum": 0,
               "title": "Relative Object Position",
               "type": "integer"
            },
            "metric_min_value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": -1.7976931348623157e+308,
               "description": "Minimum value of the dependency to consider it as satisfied.",
               "title": "Metric Min Value"
            },
            "metric_max_value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": 1.7976931348623157e+308,
               "description": "Maximum value of the dependency to consider it as satisfied.",
               "title": "Metric Max Value"
            }
         },
         "title": "DependsOnItem",
         "type": "object"
      },
      "FunctionItem": {
         "description": "Defines a metric computed from a Python function.",
         "properties": {
            "name": {
               "description": "The function to call or name of rubric to use to compute this metric.",
               "title": "Name",
               "type": "string"
            },
            "depends_on": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/DependsOnItem"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "List of dependencies that must be satisfied for this metric to be computed.",
               "title": "Depends On"
            },
            "metric_level": {
               "anyOf": [
                  {
                     "enum": [
                        "Message",
                        "Turn",
                        "Thread",
                        "ToolCall"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": "Turn",
               "description": "What level of granularity (ToolCall, Message, Turn, or Thread) this rubric should be applied to",
               "title": "Metric Level"
            },
            "kwargs": {
               "additionalProperties": true,
               "description": "Keyword arguments for the function. Each key must correspond to an argument in the function. Extra keys will cause an error.",
               "title": "Kwargs",
               "type": "object"
            }
         },
         "required": [
            "name"
         ],
         "title": "FunctionItem",
         "type": "object"
      },
      "GraderLlm": {
         "additionalProperties": false,
         "description": "Defines the LLM used for evaluating rubrics.",
         "properties": {
            "function_name": {
               "description": "Function defined in `completion_functions.py`. We're not really completing a conversation, but we ARE asking an LLM to provide a response to an input - in this case, the rubric.",
               "title": "Function Name",
               "type": "string"
            },
            "kwargs": {
               "additionalProperties": true,
               "description": "Additional arguments that will be passed to the completion function. Must correspond to arguments in tne named function.",
               "title": "Kwargs",
               "type": "object"
            }
         },
         "required": [
            "function_name"
         ],
         "title": "GraderLlm",
         "type": "object"
      },
      "Metrics": {
         "description": "Defines the metrics to be evaluated.",
         "properties": {
            "function": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/FunctionItem"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "List of function-based metrics to be evaluated.",
               "title": "Function"
            },
            "rubric": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/RubricItem"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "List of rubrics to be evaluated.",
               "title": "Rubric"
            }
         },
         "title": "Metrics",
         "type": "object"
      },
      "RubricItem": {
         "description": "Defines a metric computed from a rubric.",
         "properties": {
            "name": {
               "description": "The function to call or name of rubric to use to compute this metric.",
               "title": "Name",
               "type": "string"
            },
            "depends_on": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/DependsOnItem"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "List of dependencies that must be satisfied for this metric to be computed.",
               "title": "Depends On"
            },
            "metric_level": {
               "anyOf": [
                  {
                     "enum": [
                        "Message",
                        "Turn",
                        "Thread",
                        "ToolCall"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": "Turn",
               "description": "What level of granularity (ToolCall, Message, Turn, or Thread) this rubric should be applied to",
               "title": "Metric Level"
            },
            "kwargs": {
               "anyOf": [
                  {
                     "additionalProperties": true,
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "Keyword arguments for the rubric evaluation.",
               "title": "Kwargs"
            }
         },
         "required": [
            "name"
         ],
         "title": "RubricItem",
         "type": "object"
      }
   },
   "additionalProperties": true
}

Config:

extra: str = allow

Fields:

completion_llm (flexeval.schema.eval_schema.CompletionLlm | None)
do_completion (bool)
grader_llm (flexeval.schema.eval_schema.GraderLlm | None)
metrics (flexeval.schema.eval_schema.Metrics)
name (str | None)
notes (str)

field completion_llm: CompletionLlm | None = None#: Specification of the LLM or API used to perform new completions. Must be defined if do_completions: true is set.

field do_completion: bool = False#: Flag to determine if completions should be done in each thread. Set to ‘true’ if you are testing a new API and want to evaluate the API responses. Set to ‘false’ (default) if you are evaluating past conversations and do not need to generate new completions.

field grader_llm: GraderLlm | None = None#: Specification of the LLM or API used to grade rubrics. Must be defined if any rubric_metrics are specified.

field metrics: Metrics [Optional]#: Metrics to use in the evaluation.

field name: str | None = None#: Name of the test suite. Used as metadata only. Does not need to match the key of the entry in the evals.yaml file.

field notes: str = ''#: Additional notes regarding the configuration. Used as metadata only.

class Config[source]#

Bases: object

extra = 'allow'#

pydantic model flexeval.schema.eval_schema.FunctionItem[source]#

Bases: MetricItem

Defines a metric computed from a Python function.

Show JSON schema

{
   "title": "FunctionItem",
   "description": "Defines a metric computed from a Python function.",
   "type": "object",
   "properties": {
      "name": {
         "description": "The function to call or name of rubric to use to compute this metric.",
         "title": "Name",
         "type": "string"
      },
      "depends_on": {
         "anyOf": [
            {
               "items": {
                  "$ref": "#/$defs/DependsOnItem"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "description": "List of dependencies that must be satisfied for this metric to be computed.",
         "title": "Depends On"
      },
      "metric_level": {
         "anyOf": [
            {
               "enum": [
                  "Message",
                  "Turn",
                  "Thread",
                  "ToolCall"
               ],
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": "Turn",
         "description": "What level of granularity (ToolCall, Message, Turn, or Thread) this rubric should be applied to",
         "title": "Metric Level"
      },
      "kwargs": {
         "additionalProperties": true,
         "description": "Keyword arguments for the function. Each key must correspond to an argument in the function. Extra keys will cause an error.",
         "title": "Kwargs",
         "type": "object"
      }
   },
   "$defs": {
      "DependsOnItem": {
         "additionalProperties": false,
         "description": "Defines a metric dependency.",
         "properties": {
            "name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "Name of the dependency function or rubric.",
               "title": "Name"
            },
            "type": {
               "anyOf": [
                  {
                     "enum": [
                        "function",
                        "rubric"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "One of 'function' or 'rubric' indicating the type of the dependency.",
               "title": "Type"
            },
            "kwargs": {
               "anyOf": [
                  {
                     "additionalProperties": true,
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The keyword arguments for the dependency. If provided, used to match which evaluation this dependency is for, so must match the keyword args given for some evaluation.",
               "title": "Kwargs"
            },
            "metric_name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "Name of the metric dependency. This may be different than function_name if the metric function returns a key/value pair - in which case, this will match the key.",
               "title": "Metric Name"
            },
            "metric_level": {
               "anyOf": [
                  {
                     "enum": [
                        "Message",
                        "Turn",
                        "Thread",
                        "ToolCall"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The level of the metric to depend on, which must be equal to or 'greater' than the dependent metric's level. e.g. a Turn can depend on a Thread metric, but not the reverse.",
               "title": "Metric Level"
            },
            "relative_object_position": {
               "default": 0,
               "description": "The position of the object within the Thread. If 0 (default), this is the metric value for the current object. If -1, this is the metric value for the most recent object before this one.",
               "maximum": 0,
               "title": "Relative Object Position",
               "type": "integer"
            },
            "metric_min_value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": -1.7976931348623157e+308,
               "description": "Minimum value of the dependency to consider it as satisfied.",
               "title": "Metric Min Value"
            },
            "metric_max_value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": 1.7976931348623157e+308,
               "description": "Maximum value of the dependency to consider it as satisfied.",
               "title": "Metric Max Value"
            }
         },
         "title": "DependsOnItem",
         "type": "object"
      }
   },
   "required": [
      "name"
   ]
}

Fields:

depends_on (List[flexeval.schema.eval_schema.DependsOnItem] | None)
kwargs (dict)
metric_level (Literal['Message', 'Turn', 'Thread', 'ToolCall'] | None)
name (str)

field kwargs: schema_utils.OptionalDict [Optional]#

Keyword arguments for the function. Each key must correspond to an argument in the function. Extra keys will cause an error.

Constraints:

func = <function convert_none_or_empty_string_to_dict at 0x7feafed5ab00>
json_schema_input_type = PydanticUndefined

pydantic model flexeval.schema.eval_schema.GraderLlm[source]#

Bases: BaseModel

Defines the LLM used for evaluating rubrics.

Show JSON schema

{
   "title": "GraderLlm",
   "description": "Defines the LLM used for evaluating rubrics.",
   "type": "object",
   "properties": {
      "function_name": {
         "description": "Function defined in `completion_functions.py`. We're not really completing a conversation, but we ARE asking an LLM to provide a response to an input - in this case, the rubric.",
         "title": "Function Name",
         "type": "string"
      },
      "kwargs": {
         "additionalProperties": true,
         "description": "Additional arguments that will be passed to the completion function. Must correspond to arguments in tne named function.",
         "title": "Kwargs",
         "type": "object"
      }
   },
   "additionalProperties": false,
   "required": [
      "function_name"
   ]
}

Config:

extra: str = forbid

Fields:

function_name (str)
kwargs (Dict[str, Any])

field function_name: str [Required]#: Function defined in completion_functions.py. We’re not really completing a conversation, but we ARE asking an LLM to provide a response to an input - in this case, the rubric.

field kwargs: Dict[str, Any] [Optional]#: Additional arguments that will be passed to the completion function. Must correspond to arguments in tne named function.

class Config[source]#

Bases: object

extra = 'forbid'#

pydantic model flexeval.schema.eval_schema.MetricItem[source]#

Bases: BaseModel

Defines a metric.

Show JSON schema

{
   "title": "MetricItem",
   "description": "Defines a metric.",
   "type": "object",
   "properties": {
      "name": {
         "description": "The function to call or name of rubric to use to compute this metric.",
         "title": "Name",
         "type": "string"
      },
      "depends_on": {
         "anyOf": [
            {
               "items": {
                  "$ref": "#/$defs/DependsOnItem"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "description": "List of dependencies that must be satisfied for this metric to be computed.",
         "title": "Depends On"
      },
      "metric_level": {
         "anyOf": [
            {
               "enum": [
                  "Message",
                  "Turn",
                  "Thread",
                  "ToolCall"
               ],
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": "Turn",
         "description": "What level of granularity (ToolCall, Message, Turn, or Thread) this rubric should be applied to",
         "title": "Metric Level"
      }
   },
   "$defs": {
      "DependsOnItem": {
         "additionalProperties": false,
         "description": "Defines a metric dependency.",
         "properties": {
            "name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "Name of the dependency function or rubric.",
               "title": "Name"
            },
            "type": {
               "anyOf": [
                  {
                     "enum": [
                        "function",
                        "rubric"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "One of 'function' or 'rubric' indicating the type of the dependency.",
               "title": "Type"
            },
            "kwargs": {
               "anyOf": [
                  {
                     "additionalProperties": true,
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The keyword arguments for the dependency. If provided, used to match which evaluation this dependency is for, so must match the keyword args given for some evaluation.",
               "title": "Kwargs"
            },
            "metric_name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "Name of the metric dependency. This may be different than function_name if the metric function returns a key/value pair - in which case, this will match the key.",
               "title": "Metric Name"
            },
            "metric_level": {
               "anyOf": [
                  {
                     "enum": [
                        "Message",
                        "Turn",
                        "Thread",
                        "ToolCall"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The level of the metric to depend on, which must be equal to or 'greater' than the dependent metric's level. e.g. a Turn can depend on a Thread metric, but not the reverse.",
               "title": "Metric Level"
            },
            "relative_object_position": {
               "default": 0,
               "description": "The position of the object within the Thread. If 0 (default), this is the metric value for the current object. If -1, this is the metric value for the most recent object before this one.",
               "maximum": 0,
               "title": "Relative Object Position",
               "type": "integer"
            },
            "metric_min_value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": -1.7976931348623157e+308,
               "description": "Minimum value of the dependency to consider it as satisfied.",
               "title": "Metric Min Value"
            },
            "metric_max_value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": 1.7976931348623157e+308,
               "description": "Maximum value of the dependency to consider it as satisfied.",
               "title": "Metric Max Value"
            }
         },
         "title": "DependsOnItem",
         "type": "object"
      }
   },
   "required": [
      "name"
   ]
}

Fields:

depends_on (List[flexeval.schema.eval_schema.DependsOnItem] | None)
metric_level (Literal['Message', 'Turn', 'Thread', 'ToolCall'] | None)
name (str)

field depends_on: List[DependsOnItem] | None [Optional]#: List of dependencies that must be satisfied for this metric to be computed.

field metric_level: MetricLevel | None = 'Turn'#: What level of granularity (ToolCall, Message, Turn, or Thread) this rubric should be applied to

field name: str [Required]#: The function to call or name of rubric to use to compute this metric.

pydantic model flexeval.schema.eval_schema.Metrics[source]#

Bases: BaseModel

Defines the metrics to be evaluated.

Show JSON schema

{
   "title": "Metrics",
   "description": "Defines the metrics to be evaluated.",
   "type": "object",
   "properties": {
      "function": {
         "anyOf": [
            {
               "items": {
                  "$ref": "#/$defs/FunctionItem"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "List of function-based metrics to be evaluated.",
         "title": "Function"
      },
      "rubric": {
         "anyOf": [
            {
               "items": {
                  "$ref": "#/$defs/RubricItem"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "List of rubrics to be evaluated.",
         "title": "Rubric"
      }
   },
   "$defs": {
      "DependsOnItem": {
         "additionalProperties": false,
         "description": "Defines a metric dependency.",
         "properties": {
            "name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "Name of the dependency function or rubric.",
               "title": "Name"
            },
            "type": {
               "anyOf": [
                  {
                     "enum": [
                        "function",
                        "rubric"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "One of 'function' or 'rubric' indicating the type of the dependency.",
               "title": "Type"
            },
            "kwargs": {
               "anyOf": [
                  {
                     "additionalProperties": true,
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The keyword arguments for the dependency. If provided, used to match which evaluation this dependency is for, so must match the keyword args given for some evaluation.",
               "title": "Kwargs"
            },
            "metric_name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "Name of the metric dependency. This may be different than function_name if the metric function returns a key/value pair - in which case, this will match the key.",
               "title": "Metric Name"
            },
            "metric_level": {
               "anyOf": [
                  {
                     "enum": [
                        "Message",
                        "Turn",
                        "Thread",
                        "ToolCall"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The level of the metric to depend on, which must be equal to or 'greater' than the dependent metric's level. e.g. a Turn can depend on a Thread metric, but not the reverse.",
               "title": "Metric Level"
            },
            "relative_object_position": {
               "default": 0,
               "description": "The position of the object within the Thread. If 0 (default), this is the metric value for the current object. If -1, this is the metric value for the most recent object before this one.",
               "maximum": 0,
               "title": "Relative Object Position",
               "type": "integer"
            },
            "metric_min_value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": -1.7976931348623157e+308,
               "description": "Minimum value of the dependency to consider it as satisfied.",
               "title": "Metric Min Value"
            },
            "metric_max_value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": 1.7976931348623157e+308,
               "description": "Maximum value of the dependency to consider it as satisfied.",
               "title": "Metric Max Value"
            }
         },
         "title": "DependsOnItem",
         "type": "object"
      },
      "FunctionItem": {
         "description": "Defines a metric computed from a Python function.",
         "properties": {
            "name": {
               "description": "The function to call or name of rubric to use to compute this metric.",
               "title": "Name",
               "type": "string"
            },
            "depends_on": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/DependsOnItem"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "List of dependencies that must be satisfied for this metric to be computed.",
               "title": "Depends On"
            },
            "metric_level": {
               "anyOf": [
                  {
                     "enum": [
                        "Message",
                        "Turn",
                        "Thread",
                        "ToolCall"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": "Turn",
               "description": "What level of granularity (ToolCall, Message, Turn, or Thread) this rubric should be applied to",
               "title": "Metric Level"
            },
            "kwargs": {
               "additionalProperties": true,
               "description": "Keyword arguments for the function. Each key must correspond to an argument in the function. Extra keys will cause an error.",
               "title": "Kwargs",
               "type": "object"
            }
         },
         "required": [
            "name"
         ],
         "title": "FunctionItem",
         "type": "object"
      },
      "RubricItem": {
         "description": "Defines a metric computed from a rubric.",
         "properties": {
            "name": {
               "description": "The function to call or name of rubric to use to compute this metric.",
               "title": "Name",
               "type": "string"
            },
            "depends_on": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/DependsOnItem"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "List of dependencies that must be satisfied for this metric to be computed.",
               "title": "Depends On"
            },
            "metric_level": {
               "anyOf": [
                  {
                     "enum": [
                        "Message",
                        "Turn",
                        "Thread",
                        "ToolCall"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": "Turn",
               "description": "What level of granularity (ToolCall, Message, Turn, or Thread) this rubric should be applied to",
               "title": "Metric Level"
            },
            "kwargs": {
               "anyOf": [
                  {
                     "additionalProperties": true,
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "Keyword arguments for the rubric evaluation.",
               "title": "Kwargs"
            }
         },
         "required": [
            "name"
         ],
         "title": "RubricItem",
         "type": "object"
      }
   }
}

Fields:

function (List[flexeval.schema.eval_schema.FunctionItem] | None)
rubric (List[flexeval.schema.eval_schema.RubricItem] | None)

field function: List[FunctionItem] | None = None#: List of function-based metrics to be evaluated.

field rubric: List[RubricItem] | None = None#: List of rubrics to be evaluated.

pydantic model flexeval.schema.eval_schema.RubricItem[source]#

Bases: MetricItem

Defines a metric computed from a rubric.

Show JSON schema

{
   "title": "RubricItem",
   "description": "Defines a metric computed from a rubric.",
   "type": "object",
   "properties": {
      "name": {
         "description": "The function to call or name of rubric to use to compute this metric.",
         "title": "Name",
         "type": "string"
      },
      "depends_on": {
         "anyOf": [
            {
               "items": {
                  "$ref": "#/$defs/DependsOnItem"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "description": "List of dependencies that must be satisfied for this metric to be computed.",
         "title": "Depends On"
      },
      "metric_level": {
         "anyOf": [
            {
               "enum": [
                  "Message",
                  "Turn",
                  "Thread",
                  "ToolCall"
               ],
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": "Turn",
         "description": "What level of granularity (ToolCall, Message, Turn, or Thread) this rubric should be applied to",
         "title": "Metric Level"
      },
      "kwargs": {
         "anyOf": [
            {
               "additionalProperties": true,
               "type": "object"
            },
            {
               "type": "null"
            }
         ],
         "description": "Keyword arguments for the rubric evaluation.",
         "title": "Kwargs"
      }
   },
   "$defs": {
      "DependsOnItem": {
         "additionalProperties": false,
         "description": "Defines a metric dependency.",
         "properties": {
            "name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "Name of the dependency function or rubric.",
               "title": "Name"
            },
            "type": {
               "anyOf": [
                  {
                     "enum": [
                        "function",
                        "rubric"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "One of 'function' or 'rubric' indicating the type of the dependency.",
               "title": "Type"
            },
            "kwargs": {
               "anyOf": [
                  {
                     "additionalProperties": true,
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The keyword arguments for the dependency. If provided, used to match which evaluation this dependency is for, so must match the keyword args given for some evaluation.",
               "title": "Kwargs"
            },
            "metric_name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "Name of the metric dependency. This may be different than function_name if the metric function returns a key/value pair - in which case, this will match the key.",
               "title": "Metric Name"
            },
            "metric_level": {
               "anyOf": [
                  {
                     "enum": [
                        "Message",
                        "Turn",
                        "Thread",
                        "ToolCall"
                     ],
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The level of the metric to depend on, which must be equal to or 'greater' than the dependent metric's level. e.g. a Turn can depend on a Thread metric, but not the reverse.",
               "title": "Metric Level"
            },
            "relative_object_position": {
               "default": 0,
               "description": "The position of the object within the Thread. If 0 (default), this is the metric value for the current object. If -1, this is the metric value for the most recent object before this one.",
               "maximum": 0,
               "title": "Relative Object Position",
               "type": "integer"
            },
            "metric_min_value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": -1.7976931348623157e+308,
               "description": "Minimum value of the dependency to consider it as satisfied.",
               "title": "Metric Min Value"
            },
            "metric_max_value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": 1.7976931348623157e+308,
               "description": "Maximum value of the dependency to consider it as satisfied.",
               "title": "Metric Max Value"
            }
         },
         "title": "DependsOnItem",
         "type": "object"
      }
   },
   "required": [
      "name"
   ]
}

Fields:

depends_on ()
kwargs (Optional[Dict[str, Any]])
metric_level ()
name ()

field kwargs: Dict[str, Any] | None [Optional]#: Keyword arguments for the rubric evaluation.

flexeval.schema.eval_schema#

This Page