Source code for flexeval.classes.eval_runner

import logging
from datetime import datetime
from pathlib import Path

import dotenv
from peewee import SqliteDatabase

from flexeval import db_utils, dependency_graph
from flexeval.schema import EvalRun

logger = logging.getLogger(__name__)



[docs]
class EvalRunner:
    """Class for maintaining database connection, logs, and run state
    Does not need to write anything to database itself.
    """

    database: SqliteDatabase

    def __init__(
        self,
        evalrun: EvalRun,
    ):
        self.evalrun: EvalRun = evalrun

        self.initialize_logger()
        self.add_file_logger()
        self.load_env()
        self.initialize_database()
        self.load_evaluation_settings()

    def initialize_logger(self, add_stream_handler: bool = False):
        """Configure the logger for this class.

        Args:
            add_stream_handler (bool, optional): If True, will add a stream handler at the INFO level. Defaults to False.
        """
        self.logger = logging.getLogger("FlexEval")
        self.logger.setLevel(logging.DEBUG)

        if add_stream_handler:
            # TODO this stream handler logic should probably be removed
            # Create a console handler for lower level messages to output to console
            ch = logging.StreamHandler()
            ch.setLevel(logging.INFO)

            # Create a formatter and set it for the handlers
            formatter = logging.Formatter(
                "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
            )
            ch.setFormatter(formatter)

            # Add the handlers to the logger
            self.logger.addHandler(ch)

    def add_file_logger(self):
        if self.evalrun.config.logs_path is None:
            logger.info("No log path specified, so not doing any file logging.")
            return
        logs_path = self.evalrun.config.logs_path
        if logs_path.is_file():
            raise ValueError(
                f"Config logs_path expects a directory, but was set to existing file '{logs_path}'."
            )
        elif not logs_path.exists():
            if logs_path.suffix != "":
                logger.warning(
                    f"Creating logs_path '{logs_path}' as a directory, despite apparent suffix '{logs_path.suffix}'."
                )
            logs_path.mkdir(parents=True, exist_ok=True)

        # Get the current date to use in the filename
        current_date = datetime.now().strftime("%Y-%m-%d")

        # Create a file handler that logs debug and higher level messages to a date-based file
        log_filepath = logs_path / f"{current_date}_{self.evalrun.eval.name}.log"
        fh = logging.FileHandler(log_filepath)
        fh.setLevel(logging.DEBUG)

        # Create a formatter and set it for the handlers
        formatter = logging.Formatter(
            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
        )
        fh.setFormatter(formatter)
        self.logger.addHandler(fh)
        self.logger.info(f"Started logging to log file '{log_filepath}'.")

    def load_env(self):
        env_filepath = self.evalrun.config.env_filepath
        if env_filepath is not None and str(env_filepath).strip() != "":
            if not env_filepath.exists():
                raise ValueError(
                    f"Environment file not present at configured path '{env_filepath}'."
                )
            dotenv.load_dotenv(env_filepath, verbose=True)
            self.logger.debug(f"Finished loading .env file from '{env_filepath}'.")
        else:
            self.logger.debug(
                f"Skipping .env file loading as config env_filepath is '{env_filepath}'."
            )

    def get_database_path(self) -> Path:
        return self.evalrun.database_path

    def initialize_database(self):
        """Initializes database and tables. If config.clear_tables, then current contents of tables are dropped."""
        db_utils.initialize_database(
            self.evalrun.database_path, clear_tables=self.evalrun.config.clear_tables
        )

    def load_evaluation_settings(self):
        """This function parses our eval suite and puts it in the data structure we'll need
        for easy use at run-time
        """
        # if the current eval has a 'config' entry, overwrite configuration options with its entries
        if (
            self.evalrun.eval.model_extra is not None
            and len(self.evalrun.eval.model_extra) > 0
        ):
            model_extra = self.evalrun.eval.model_extra
            self.logger.debug(
                f"Extra configuration keys provided in eval: {list(model_extra.keys())}"
            )
            for field_name in model_extra.keys():
                if hasattr(self.evalrun.config, field_name):
                    old_value = getattr(self.evalrun.config, field_name)
                    new_value = model_extra[field_name]
                    self.logger.info(
                        f"Updating configuration setting: {field_name}={new_value} (old={old_value})"
                    )
                    setattr(self.evalrun.config, field_name, new_value)
                else:
                    self.logger.warning(
                        f"Unknown configuration field {field_name} was ignored."
                    )

        # TODO verify that applying defaults is done solely by pydantic and this step is no longer necessary
        # apply defaults to the schema
        # self.eval = apply_defaults(schema=target_schema, data=self.eval)

        # convert into graph structure
        self.metrics_graph_ordered_list = dependency_graph.create_metrics_graph(
            self.evalrun.eval.metrics
        )
        # validate: completion function defined
        if len(self.metrics_graph_ordered_list) > 0:
            if self.evalrun.eval.grader_llm is None:
                self.logger.warning(
                    f"{len(self.metrics_graph_ordered_list)} metrics defined, but no grader LLM defined."
                )

    def shutdown_logging(self):
        # remove logging handler so we don't get repeat logs if we call run() twice
        handlers = self.logger.handlers[:]
        for handler in handlers:
            handler.close()
            self.logger.removeHandler(handler)