Basic rubric usage#

This vignette includes a rubric and uses a “grader function”. Rubrics are defined with a prompt template and a map of string outputs to numeric choice scores.

Python source: basic_rubric.py

 1import flexeval
 2from flexeval.metrics import access
 3from flexeval.schema import (
 4    Config,
 5    DependsOnItem,
 6    Eval,
 7    EvalRun,
 8    FileDataSource,
 9    FunctionItem,
10    GraderLlm,
11    Metrics,
12    Rubric,
13    RubricItem,
14    RubricsCollection,
15)
16
17data_sources = [FileDataSource(path="vignettes/conversations.jsonl")]
18rubric = Rubric(
19    prompt="Answer YES if the response is helpful, NO otherwise.",
20    choice_scores={"YES": 1, "NO": 2},
21)
22# using a placeholder grader, but you can use any supported completion function
23grader_llm = GraderLlm(function_name="echo_completion", kwargs={"response": "YES"})
24is_assistant_dependency = DependsOnItem(
25    name="is_role", kwargs={"role": "assistant"}, metric_min_value=1
26)
27eval = Eval(
28    name="basic_eval",
29    metrics=Metrics(
30        function=[
31            FunctionItem(name="is_role", kwargs={"role": "assistant"}),
32            FunctionItem(
33                name="flesch_reading_ease",
34                depends_on=[is_assistant_dependency],
35            ),
36        ],
37        rubric=[RubricItem(name="is_helpful", depends_on=[is_assistant_dependency])],
38    ),
39    grader_llm=grader_llm,
40)
41config = Config(clear_tables=True, logs_path="tmp")
42eval_run = EvalRun(
43    data_sources=data_sources,
44    database_path="eval_results.db",
45    eval=eval,
46    config=config,
47    rubric_paths=[RubricsCollection(rubrics={"is_helpful": rubric})],
48)
49flexeval.run(eval_run)
50for metric in access.get_all_metrics():
51    print(
52        f"{metric['thread']} {metric['turn']} {metric['evaluation_name']} {metric['metric_value']:.1f}"
53    )

conversations.jsonl contents:

1{"input": [{"role": "system", "content": "Be friendly and helpful."}, {"role": "user", "content": "I need help."}, {"role": "assistant", "content": "Help with what?"}, {"role": "user", "content": "My homework."}]}