Source code for flexeval.schema.evalrun_schema

"""The top-level :class:`~flexeval.schema.evalrun_schema.EvalRun` schema and associated sub-schema."""

from pathlib import Path
from typing import Annotated, Callable, Iterable, Literal

from annotated_types import Len
from pydantic import BaseModel, Field, FilePath

from flexeval.configuration import function_metrics
from flexeval.schema import config_schema, eval_schema, rubric_schema, schema_utils


[docs] class DataSource(BaseModel): # TODO support more generic DataSource interface # for now, we need to use FileDataSource because we path the JSONL paths along name: str | None = Field(None, description="") notes: str | None = Field(None, description="")
[docs] class IterableDataSource(DataSource): """Not yet implemented.""" contents: Iterable = Field( default_factory=list, description="Iterable of data items, presumably in the jsonl format (for now).", )
[docs] class FileDataSource(DataSource): """File to be used as a data source.""" # TODO in the future, we could use cloudpathlib to support cloud paths path: FilePath = Field( description="Absolute or relative path to data file. Each file must be in jsonl format, with one conversation per line." ) format: Literal["jsonl"] = Field("jsonl", description="Format of the data file.")
[docs] class FunctionsCollection(BaseModel): """Collection of functions that can be used as :class:`~flexeval.schema.eval_schema.FunctionItem`\s.""" functions: list[Callable] = Field( default_factory=list, description="Callables that can be used as functions for evaluation.", )
[docs] def get_default_rubrics() -> list[Path | rubric_schema.RubricsCollection]: """Utility function to retrieve the default rubric collection.""" from flexeval import rubric return [rubric.get_default_rubric_collection()]
[docs] def get_default_function_metrics() -> list[ Path | FunctionsCollection | schema_utils.ModuleType ]: """Utility function to retrieve the default function collection.""" return [function_metrics]
[docs] class EvalRun(BaseModel): """EvalRun defines the schema that FlexEval expects. At a minimum, you need to provide a set of input data sources and an :class:`~flexeval.schema.eval_schema.Eval`. You can evaluate an EvalRun using :func:`~flexeval.runner.run`. Read more in the :ref:`user_guide`.""" data_sources: Annotated[list[FileDataSource], Len(min_length=1)] = Field( description="List of data sources.", ) database_path: Path = Field( Path("flexeval/results/results.db"), description="Output database path.", ) eval: eval_schema.Eval = Field( description="The evaluation to apply to the data sources." ) config: config_schema.Config = Field( default_factory=config_schema.Config, description="Configuration details." ) rubric_paths: list[Path | rubric_schema.RubricsCollection] = Field( default_factory=get_default_rubrics, description="Additional sources for rubrics. If a Path, should be a YAML file in the expected format.", ) function_modules: list[FilePath | FunctionsCollection | schema_utils.ModuleType] = ( Field( default_factory=get_default_function_metrics, description="Additional sources for functions.", ) ) add_default_functions: bool = Field( True, description="If the default functions at :mod:`flexeval.configuration.function_metrics` should be made available.", )