Source code for flexeval.classes.eval_runner

import logging
from datetime import datetime
from pathlib import Path

import dotenv
from peewee import SqliteDatabase

from flexeval import db_utils, dependency_graph
from flexeval.schema import EvalRun

logger = logging.getLogger(__name__)


[docs] class EvalRunner: """Class for maintaining database connection, logs, and run state Does not need to write anything to database itself. """ database: SqliteDatabase def __init__( self, evalrun: EvalRun, ): self.evalrun: EvalRun = evalrun self.initialize_logger() self.add_file_logger() self.load_env() self.initialize_database() self.load_evaluation_settings() def initialize_logger(self, add_stream_handler: bool = False): """Configure the logger for this class. Args: add_stream_handler (bool, optional): If True, will add a stream handler at the INFO level. Defaults to False. """ self.logger = logging.getLogger("FlexEval") self.logger.setLevel(logging.DEBUG) if add_stream_handler: # TODO this stream handler logic should probably be removed # Create a console handler for lower level messages to output to console ch = logging.StreamHandler() ch.setLevel(logging.INFO) # Create a formatter and set it for the handlers formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) ch.setFormatter(formatter) # Add the handlers to the logger self.logger.addHandler(ch) def add_file_logger(self): if self.evalrun.config.logs_path is None: logger.info("No log path specified, so not doing any file logging.") return logs_path = self.evalrun.config.logs_path if logs_path.is_file(): raise ValueError( f"Config logs_path expects a directory, but was set to existing file '{logs_path}'." ) elif not logs_path.exists(): if logs_path.suffix != "": logger.warning( f"Creating logs_path '{logs_path}' as a directory, despite apparent suffix '{logs_path.suffix}'." ) logs_path.mkdir(parents=True, exist_ok=True) # Get the current date to use in the filename current_date = datetime.now().strftime("%Y-%m-%d") # Create a file handler that logs debug and higher level messages to a date-based file log_filepath = logs_path / f"{current_date}_{self.evalrun.eval.name}.log" fh = logging.FileHandler(log_filepath) fh.setLevel(logging.DEBUG) # Create a formatter and set it for the handlers formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) fh.setFormatter(formatter) self.logger.addHandler(fh) self.logger.info(f"Started logging to log file '{log_filepath}'.") def load_env(self): env_filepath = self.evalrun.config.env_filepath if env_filepath is not None and str(env_filepath).strip() != "": if not env_filepath.exists(): raise ValueError( f"Environment file not present at configured path '{env_filepath}'." ) dotenv.load_dotenv(env_filepath, verbose=True) self.logger.debug(f"Finished loading .env file from '{env_filepath}'.") else: self.logger.debug( f"Skipping .env file loading as config env_filepath is '{env_filepath}'." ) def get_database_path(self) -> Path: return self.evalrun.database_path def initialize_database(self): """Initializes database and tables. If config.clear_tables, then current contents of tables are dropped.""" db_utils.initialize_database( self.evalrun.database_path, clear_tables=self.evalrun.config.clear_tables ) def load_evaluation_settings(self): """This function parses our eval suite and puts it in the data structure we'll need for easy use at run-time """ # if the current eval has a 'config' entry, overwrite configuration options with its entries if ( self.evalrun.eval.model_extra is not None and len(self.evalrun.eval.model_extra) > 0 ): model_extra = self.evalrun.eval.model_extra self.logger.debug( f"Extra configuration keys provided in eval: {list(model_extra.keys())}" ) for field_name in model_extra.keys(): if hasattr(self.evalrun.config, field_name): old_value = getattr(self.evalrun.config, field_name) new_value = model_extra[field_name] self.logger.info( f"Updating configuration setting: {field_name}={new_value} (old={old_value})" ) setattr(self.evalrun.config, field_name, new_value) else: self.logger.warning( f"Unknown configuration field {field_name} was ignored." ) # TODO verify that applying defaults is done solely by pydantic and this step is no longer necessary # apply defaults to the schema # self.eval = apply_defaults(schema=target_schema, data=self.eval) # convert into graph structure self.metrics_graph_ordered_list = dependency_graph.create_metrics_graph( self.evalrun.eval.metrics ) # validate: completion function defined if len(self.metrics_graph_ordered_list) > 0: if self.evalrun.eval.grader_llm is None: self.logger.warning( f"{len(self.metrics_graph_ordered_list)} metrics defined, but no grader LLM defined." ) def shutdown_logging(self): # remove logging handler so we don't get repeat logs if we call run() twice handlers = self.logger.handlers[:] for handler in handlers: handler.close() self.logger.removeHandler(handler)