Source code for flexeval.schema.evalrun_schema
"""The top-level :class:`~flexeval.schema.evalrun_schema.EvalRun` schema and associated sub-schema."""
from pathlib import Path
from typing import Annotated, Callable, Iterable, Literal
from annotated_types import Len
from pydantic import BaseModel, Field, FilePath
from flexeval.configuration import function_metrics
from flexeval.schema import config_schema, eval_schema, rubric_schema, schema_utils
[docs]
class DataSource(BaseModel):
# TODO support more generic DataSource interface
# for now, we need to use FileDataSource because we path the JSONL paths along
name: str | None = Field(None, description="")
notes: str | None = Field(None, description="")
[docs]
class IterableDataSource(DataSource):
"""Not yet implemented."""
contents: Iterable = Field(
default_factory=list,
description="Iterable of data items, presumably in the jsonl format (for now).",
)
[docs]
class FileDataSource(DataSource):
"""File to be used as a data source."""
# TODO in the future, we could use cloudpathlib to support cloud paths
path: FilePath = Field(
description="Absolute or relative path to data file. Each file must be in jsonl format, with one conversation per line."
)
format: Literal["jsonl"] = Field("jsonl", description="Format of the data file.")
[docs]
class FunctionsCollection(BaseModel):
"""Collection of functions that can be used as :class:`~flexeval.schema.eval_schema.FunctionItem`\s."""
functions: list[Callable] = Field(
default_factory=list,
description="Callables that can be used as functions for evaluation.",
)
[docs]
def get_default_rubrics() -> list[Path | rubric_schema.RubricsCollection]:
"""Utility function to retrieve the default rubric collection."""
from flexeval import rubric
return [rubric.get_default_rubric_collection()]
[docs]
def get_default_function_metrics() -> list[
Path | FunctionsCollection | schema_utils.ModuleType
]:
"""Utility function to retrieve the default function collection."""
return [function_metrics]
[docs]
class EvalRun(BaseModel):
"""EvalRun defines the schema that FlexEval expects.
At a minimum, you need to provide a set of input data sources and an :class:`~flexeval.schema.eval_schema.Eval`.
You can evaluate an EvalRun using :func:`~flexeval.runner.run`.
Read more in the :ref:`user_guide`."""
data_sources: Annotated[list[FileDataSource], Len(min_length=1)] = Field(
description="List of data sources.",
)
database_path: Path = Field(
Path("flexeval/results/results.db"),
description="Output database path.",
)
eval: eval_schema.Eval = Field(
description="The evaluation to apply to the data sources."
)
config: config_schema.Config = Field(
default_factory=config_schema.Config, description="Configuration details."
)
rubric_paths: list[Path | rubric_schema.RubricsCollection] = Field(
default_factory=get_default_rubrics,
description="Additional sources for rubrics. If a Path, should be a YAML file in the expected format.",
)
function_modules: list[FilePath | FunctionsCollection | schema_utils.ModuleType] = (
Field(
default_factory=get_default_function_metrics,
description="Additional sources for functions.",
)
)
add_default_functions: bool = Field(
True,
description="If the default functions at :mod:`flexeval.configuration.function_metrics` should be made available.",
)