Source code for flexeval.classes.thread

import peewee as pw

from flexeval.classes.base import BaseModel
from flexeval.classes.dataset import Dataset
from flexeval.classes.eval_set_run import EvalSetRun


[docs] class Thread(BaseModel): """Class for holding a single thread / conversation This corresponds to a single row in a jsonl file or a single 'thread_id' in a langgraph checkpoint database""" id = pw.IntegerField(primary_key=True) dataset = pw.ForeignKeyField(Dataset, backref="threads") evalsetrun = pw.ForeignKeyField(EvalSetRun, backref="threads") langgraph_thread_id = pw.TextField(null=True) eval_run_thread_id = pw.TextField(null=True) jsonl_thread_id = pw.TextField(null=True) system_prompt = pw.TextField(null=True) def __init__(self, **kwargs): super().__init__(**kwargs) self.metrics_to_evaluate = [] # TODO - test this! def format_input_for_rubric(self): input = self.get_formatted_prompt() output_minus_completion = "" for i in input[:-1]: output_minus_completion += f"{i['role']}: {i['content']}\n" completion = f"{input[-1]['role']}: {input[-1]['content']}\n" output = output_minus_completion + completion tool_call_text = "" for tc in self.toolcalls: tool_call_text += """ Function name: {function_name} Input arguments: {args} Function output: {response_content} """.format( function_name=tc.function_name, args=tc.args, response_content=tc.response_content, ) # output - all turns # output_minus_completion - all turns except the last # completion - last turn # tool_call_text - all tool calls return output, output_minus_completion, completion, tool_call_text def get_formatted_prompt(self, include_system_prompt=False): formatted_prompt = [] if include_system_prompt: formatted_prompt.append({"role": "system", "content": self.system_prompt}) formatted_prompt += self.get_content() return formatted_prompt def get_content(self, include_toolcalls=True): """ Content is a list of dictionaries where each dictionary contains the role and content of messages and tool calls in the turn. Each tool call appears after the message it's associated with. If toolcalls are not desired, pass False to include_toolcalls. """ content = [] for message in self.messages: content.append({"role": message.role, "content": message.content}) if include_toolcalls: for toolcall in message.toolcalls: content.append(toolcall.get_dict_representation()) return content