Source code for flexeval.schema.evalrun_schema

"""The top-level :class:`~flexeval.schema.evalrun_schema.EvalRun` schema and associated sub-schema."""

import enum
from pathlib import Path
from typing import Annotated, Callable, Iterable, Literal, Union

from annotated_types import Len
from pydantic import BaseModel, Discriminator, Field, FilePath, Tag

from flexeval.configuration import function_metrics
from flexeval.schema import config_schema, eval_schema, rubric_schema, schema_utils


[docs] class DataSource(BaseModel): """Represents a source of data that can be used in evaluations.""" name: str | None = Field( None, description="Used as metadata. No uniqueness requirement." ) notes: str | None = Field( None, description="Used as metadata; put whatever you want here." )
[docs] class NamedDataSource(DataSource): """Look up a previously loaded DataSource by name. Must have a unique name.""" type: Literal["named"] = "named" name: str = Field(description="The name to match on.")
[docs] class IterableDataSource(DataSource): """Iterable of data items.""" type: Literal["iterable"] = "iterable" contents: Iterable = Field( default_factory=list, description="Iterable of data items. For now, each item must be a dictionary with role and content keys.", )
[docs] class FileFormatEnum(str, enum.Enum): jsonl = "jsonl" langgraph_sqlite = "langgraph_sqlite"
[docs] class FileDataSource(DataSource): """File to be used as a data source.""" type: Literal["file"] = "file" # TODO in the future, we could use cloudpathlib to support cloud paths path: FilePath = Field( description="Absolute or relative path to data file. Each file must be in jsonl format, with one conversation per line." ) format: FileFormatEnum = Field( FileFormatEnum.jsonl, description="Format of the data file. Default: JSONL" )
DataSourceType = Annotated[ Union[ Annotated[NamedDataSource, Tag("named")], Annotated[FileDataSource, Tag("file")], Annotated[IterableDataSource, Tag("iterable")], ], Discriminator("type"), ]
[docs] class FunctionsCollection(BaseModel): """Collection of functions that can be used as :class:`~flexeval.schema.eval_schema.FunctionItem` s.""" functions: list[Callable] = Field( default_factory=list, description="Callables that can be used as functions for evaluation.", )
[docs] def get_default_rubrics() -> list[Path | rubric_schema.RubricsCollection]: """Utility function to retrieve the default rubric collection.""" from flexeval import rubric return [rubric.get_default_rubric_collection()]
[docs] def get_default_function_metrics() -> list[ Path | FunctionsCollection | schema_utils.ModuleType ]: """Utility function to retrieve the default function collection.""" return [function_metrics]
[docs] class EvalRun(BaseModel): """EvalRun defines the schema that FlexEval expects. At a minimum, you need to provide a set of input data sources and an :class:`~flexeval.schema.eval_schema.Eval`. You can evaluate an EvalRun using :func:`~flexeval.runner.run`. Read more in the :ref:`user_guide`.""" data_sources: Annotated[list[DataSourceType], Len(min_length=1)] = Field( description="List of data sources.", ) database_path: Path = Field( Path("flexeval/results/results.db"), description="Output database path.", ) eval: eval_schema.Eval = Field( description="The evaluation to apply to the data sources." ) config: config_schema.Config = Field( default_factory=config_schema.Config, description="Configuration details." ) rubric_paths: list[Path | rubric_schema.RubricsCollection] = Field( default_factory=get_default_rubrics, description="Additional sources for rubrics. If a Path, should be a YAML file in the expected format.", ) function_modules: list[FilePath | FunctionsCollection | schema_utils.ModuleType] = ( Field( default_factory=get_default_function_metrics, description="Additional sources for functions.", ) ) add_default_functions: bool = Field( True, description="If the default functions at :mod:`flexeval.configuration.function_metrics` should be made available.", )