kiln_ai.datamodel

See our docs for details about our datamodel classes and hierarchy:

Developer docs: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html

User docs: https://docs.getkiln.ai/developers/kiln-datamodel

 1"""
 2See our docs for details about our datamodel classes and hierarchy:
 3
 4Developer docs: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html
 5
 6User docs: https://docs.getkiln.ai/developers/kiln-datamodel
 7"""
 8
 9# This component uses "flat" imports so we don't have too much internal structure exposed in the API.
10# for example you can just `from datamodel import Task, Project` instead of `from datamodel.task import Task; from datamodel.project import Project`
11
12from __future__ import annotations
13
14from kiln_ai.datamodel import dataset_split, eval, strict_mode
15from kiln_ai.datamodel.datamodel_enums import (
16    FineTuneStatusType,
17    Priority,
18    StructuredOutputMode,
19    TaskOutputRatingType,
20)
21from kiln_ai.datamodel.dataset_split import (
22    DatasetSplit,
23    DatasetSplitDefinition,
24)
25from kiln_ai.datamodel.finetune import (
26    Finetune,
27)
28from kiln_ai.datamodel.project import Project
29from kiln_ai.datamodel.prompt import BasePrompt, Prompt
30from kiln_ai.datamodel.prompt_id import (
31    PromptGenerators,
32    PromptId,
33    prompt_generator_values,
34)
35from kiln_ai.datamodel.task import Task, TaskRequirement
36from kiln_ai.datamodel.task_output import (
37    DataSource,
38    DataSourceProperty,
39    DataSourceType,
40    RequirementRating,
41    TaskOutput,
42    TaskOutputRating,
43)
44from kiln_ai.datamodel.task_run import (
45    TaskRun,
46    Usage,
47)
48
49__all__ = [
50    "strict_mode",
51    "dataset_split",
52    "eval",
53    "Task",
54    "Project",
55    "TaskRun",
56    "TaskOutput",
57    "Priority",
58    "DataSource",
59    "DataSourceType",
60    "DataSourceProperty",
61    "Finetune",
62    "FineTuneStatusType",
63    "TaskOutputRatingType",
64    "TaskRequirement",
65    "DatasetSplitDefinition",
66    "DatasetSplit",
67    "RequirementRating",
68    "TaskRequirement",
69    "BasePrompt",
70    "Prompt",
71    "TaskOutputRating",
72    "StructuredOutputMode",
73    "PromptId",
74    "PromptGenerators",
75    "prompt_generator_values",
76    "Usage",
77]
class Task(kiln_ai.datamodel.basemodel.KilnParentedModel, kiln_ai.datamodel.basemodel.KilnParentModel):
174class Task(
175    KilnParentedModel,
176    KilnParentModel,
177    parent_of={
178        "runs": TaskRun,
179        "dataset_splits": DatasetSplit,
180        "finetunes": Finetune,
181        "prompts": Prompt,
182        "evals": Eval,
183        "run_configs": TaskRunConfig,
184    },
185):
186    """
187    Represents a specific task to be performed, with associated requirements and validation rules.
188
189    Contains the task definition, requirements, input/output schemas, and maintains
190    a collection of task runs.
191    """
192
193    name: FilenameString = Field(description="The name of the task.")
194    description: str | None = Field(
195        default=None,
196        description="A description of the task for you and your team. Will not be used in prompts/training/validation.",
197    )
198    instruction: str = Field(
199        min_length=1,
200        description="The instructions for the task. Will be used in prompts/training/validation.",
201    )
202    requirements: List[TaskRequirement] = Field(default=[])
203    output_json_schema: JsonObjectSchema | None = None
204    input_json_schema: JsonObjectSchema | None = None
205    thinking_instruction: str | None = Field(
206        default=None,
207        description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting.",
208    )
209
210    def output_schema(self) -> Dict | None:
211        if self.output_json_schema is None:
212            return None
213        return schema_from_json_str(self.output_json_schema)
214
215    def input_schema(self) -> Dict | None:
216        if self.input_json_schema is None:
217            return None
218        return schema_from_json_str(self.input_json_schema)
219
220    # These wrappers help for typechecking. We should fix this in KilnParentModel
221    def runs(self, readonly: bool = False) -> list[TaskRun]:
222        return super().runs(readonly=readonly)  # type: ignore
223
224    def dataset_splits(self, readonly: bool = False) -> list[DatasetSplit]:
225        return super().dataset_splits(readonly=readonly)  # type: ignore
226
227    def finetunes(self, readonly: bool = False) -> list[Finetune]:
228        return super().finetunes(readonly=readonly)  # type: ignore
229
230    def prompts(self, readonly: bool = False) -> list[Prompt]:
231        return super().prompts(readonly=readonly)  # type: ignore
232
233    def evals(self, readonly: bool = False) -> list[Eval]:
234        return super().evals(readonly=readonly)  # type: ignore
235
236    def run_configs(self, readonly: bool = False) -> list[TaskRunConfig]:
237        return super().run_configs(readonly=readonly)  # type: ignore
238
239    # Workaround to return typed parent without importing Task
240    def parent_project(self) -> Union["Project", None]:
241        if self.parent is None or self.parent.__class__.__name__ != "Project":
242            return None
243        return self.parent  # type: ignore

Represents a specific task to be performed, with associated requirements and validation rules.

Contains the task definition, requirements, input/output schemas, and maintains a collection of task runs.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7fe1ec5165c0>, json_schema_input_type=PydanticUndefined)]
description: str | None
instruction: str
requirements: List[TaskRequirement]
output_json_schema: Optional[Annotated[str, AfterValidator(func=<function <lambda> at 0x7fe1ec5d6840>)]]
input_json_schema: Optional[Annotated[str, AfterValidator(func=<function <lambda> at 0x7fe1ec5d6840>)]]
thinking_instruction: str | None
def output_schema(self) -> Optional[Dict]:
210    def output_schema(self) -> Dict | None:
211        if self.output_json_schema is None:
212            return None
213        return schema_from_json_str(self.output_json_schema)
def input_schema(self) -> Optional[Dict]:
215    def input_schema(self) -> Dict | None:
216        if self.input_json_schema is None:
217            return None
218        return schema_from_json_str(self.input_json_schema)
def runs(self, readonly=False) -> List[TaskRun]:
446        def child_method(self, readonly: bool = False) -> list[child_class]:
447            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def dataset_splits( self, readonly=False) -> List[DatasetSplit]:
446        def child_method(self, readonly: bool = False) -> list[child_class]:
447            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def finetunes(self, readonly=False) -> List[Finetune]:
446        def child_method(self, readonly: bool = False) -> list[child_class]:
447            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def prompts(self, readonly=False) -> List[Prompt]:
446        def child_method(self, readonly: bool = False) -> list[child_class]:
447            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def evals(self, readonly=False) -> List[kiln_ai.datamodel.eval.Eval]:
446        def child_method(self, readonly: bool = False) -> list[child_class]:
447            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def run_configs(self, readonly=False) -> List[kiln_ai.datamodel.task.TaskRunConfig]:
446        def child_method(self, readonly: bool = False) -> list[child_class]:
447            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def parent_project(self) -> Optional[Project]:
240    def parent_project(self) -> Union["Project", None]:
241        if self.parent is None or self.parent.__class__.__name__ != "Project":
242            return None
243        return self.parent  # type: ignore
def relationship_name() -> str:
464        def relationship_name_method() -> str:
465            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
457        def parent_class_method() -> Type[KilnParentModel]:
458            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class Project(kiln_ai.datamodel.basemodel.KilnParentModel):
 8class Project(KilnParentModel, parent_of={"tasks": Task}):
 9    """
10    A collection of related tasks.
11
12    Projects organize tasks into logical groups and provide high-level descriptions
13    of the overall goals.
14    """
15
16    name: FilenameString = Field(description="The name of the project.")
17    description: str | None = Field(
18        default=None,
19        description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
20    )
21
22    # Needed for typechecking. We should fix this in KilnParentModel
23    def tasks(self) -> list[Task]:
24        return super().tasks()  # type: ignore

A collection of related tasks.

Projects organize tasks into logical groups and provide high-level descriptions of the overall goals.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7fe1ec5165c0>, json_schema_input_type=PydanticUndefined)]
description: str | None
def tasks(self, readonly=False) -> List[Task]:
446        def child_method(self, readonly: bool = False) -> list[child_class]:
447            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class TaskRun(kiln_ai.datamodel.basemodel.KilnParentedModel):
 40class TaskRun(KilnParentedModel):
 41    """
 42    Represents a single execution of a Task.
 43
 44    Contains the input used, its source, the output produced, and optional
 45    repair information if the output needed correction.
 46    """
 47
 48    input: str = Field(
 49        description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input."
 50    )
 51    input_source: DataSource | None = Field(
 52        default=None, description="The source of the input: human or synthetic."
 53    )
 54
 55    output: TaskOutput = Field(description="The output of the task run.")
 56    repair_instructions: str | None = Field(
 57        default=None,
 58        description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.",
 59    )
 60    repaired_output: TaskOutput | None = Field(
 61        default=None,
 62        description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.",
 63    )
 64    intermediate_outputs: Dict[str, str] | None = Field(
 65        default=None,
 66        description="Intermediate outputs from the task run. Keys are the names of the intermediate output steps (cot=chain of thought, etc), values are the output data.",
 67    )
 68    tags: List[str] = Field(
 69        default=[],
 70        description="Tags for the task run. Tags are used to categorize task runs for filtering and reporting.",
 71    )
 72    usage: Usage | None = Field(
 73        default=None,
 74        description="Usage information for the task run. This includes the number of input tokens, output tokens, and total tokens used.",
 75    )
 76
 77    def thinking_training_data(self) -> str | None:
 78        """
 79        Get the thinking training data from the task run.
 80        """
 81        if self.intermediate_outputs is None:
 82            return None
 83        return self.intermediate_outputs.get(
 84            "reasoning"
 85        ) or self.intermediate_outputs.get("chain_of_thought")
 86
 87    def has_thinking_training_data(self) -> bool:
 88        """
 89        Does this run have thinking data that we can use to train a thinking model?
 90        """
 91        return self.thinking_training_data() is not None
 92
 93    # Workaround to return typed parent without importing Task
 94    def parent_task(self) -> Union["Task", None]:
 95        if self.parent is None or self.parent.__class__.__name__ != "Task":
 96            return None
 97        return self.parent  # type: ignore
 98
 99    @model_validator(mode="after")
100    def validate_input_format(self, info: ValidationInfo) -> Self:
101        # Don't validate if loading from file (not new). Too slow.
102        # We don't allow changing task schema, so this is redundant validation.
103        # Note: we still validate if editing a loaded model
104        if self.loading_from_file(info):
105            # Consider loading an existing model as validated.
106            self._last_validated_input = self.input
107            return self
108
109        # Don't validate if input has not changed. Too slow to run this every time.
110        if (
111            hasattr(self, "_last_validated_input")
112            and self.input == self._last_validated_input
113        ):
114            return self
115
116        task = self.parent_task()
117        if task is None:
118            # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving)
119            return self
120
121        # validate input
122        if task.input_json_schema is not None:
123            try:
124                input_parsed = json.loads(self.input)
125            except json.JSONDecodeError:
126                raise ValueError("Input is not a valid JSON object")
127
128            validate_schema_with_value_error(
129                input_parsed,
130                task.input_json_schema,
131                "Input does not match task input schema.",
132            )
133
134        self._last_validated_input = self.input
135        return self
136
137    @model_validator(mode="after")
138    def validate_output_format(self, info: ValidationInfo) -> Self:
139        # Don't validate if loading from file (not new). Too slow.
140        # Note: we still validate if editing a loaded model's output.
141        if self.loading_from_file(info):
142            # Consider loading an existing model as validated.
143            self._last_validated_output = self.output.output if self.output else None
144            return self
145
146        # Don't validate unless output has changed since last validation.
147        # The validator is slow and costly, don't want it running when setting other fields.
148        if (
149            hasattr(self, "_last_validated_output")
150            and self.output is not None
151            and self.output.output == self._last_validated_output
152        ):
153            return self
154
155        task = self.parent_task()
156        if task is None:
157            return self
158
159        self.output.validate_output_format(task)
160        self._last_validated_output = self.output.output if self.output else None
161        return self
162
163    @model_validator(mode="after")
164    def validate_repaired_output(self) -> Self:
165        if self.repaired_output is not None:
166            if self.repaired_output.rating is not None:
167                raise ValueError(
168                    "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed."
169                )
170
171            task = self.parent_task()
172            if (
173                task is not None
174                and self.repaired_output.output is not None
175                and task.output_json_schema is not None
176            ):
177                try:
178                    output_parsed = json.loads(self.repaired_output.output)
179                except json.JSONDecodeError:
180                    raise ValueError("Repaired output is not a valid JSON object")
181
182                validate_schema_with_value_error(
183                    output_parsed,
184                    task.output_json_schema,
185                    "Repaired output does not match task output schema.",
186                )
187
188        if self.repair_instructions is None and self.repaired_output is not None:
189            raise ValueError(
190                "Repair instructions are required if providing a repaired output."
191            )
192        if self.repair_instructions is not None and self.repaired_output is None:
193            raise ValueError(
194                "A repaired output is required if providing repair instructions."
195            )
196
197        return self
198
199    @model_validator(mode="after")
200    def validate_input_source(self, info: ValidationInfo) -> Self:
201        # On strict mode and not loaded from file, we validate input_source is not None.
202        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
203        if not strict_mode():
204            return self
205        if self.loaded_from_file(info):
206            return self
207        if self.input_source is None:
208            raise ValueError("input_source is required when strict mode is enabled")
209        return self
210
211    @model_validator(mode="after")
212    def validate_tags(self) -> Self:
213        for tag in self.tags:
214            if not tag:
215                raise ValueError("Tags cannot be empty strings")
216            if " " in tag:
217                raise ValueError("Tags cannot contain spaces. Try underscores.")
218
219        return self

Represents a single execution of a Task.

Contains the input used, its source, the output produced, and optional repair information if the output needed correction.

input: str
input_source: DataSource | None
output: TaskOutput
repair_instructions: str | None
repaired_output: TaskOutput | None
intermediate_outputs: Optional[Dict[str, str]]
tags: List[str]
usage: Usage | None
def thinking_training_data(self) -> str | None:
77    def thinking_training_data(self) -> str | None:
78        """
79        Get the thinking training data from the task run.
80        """
81        if self.intermediate_outputs is None:
82            return None
83        return self.intermediate_outputs.get(
84            "reasoning"
85        ) or self.intermediate_outputs.get("chain_of_thought")

Get the thinking training data from the task run.

def has_thinking_training_data(self) -> bool:
87    def has_thinking_training_data(self) -> bool:
88        """
89        Does this run have thinking data that we can use to train a thinking model?
90        """
91        return self.thinking_training_data() is not None

Does this run have thinking data that we can use to train a thinking model?

def parent_task(self) -> Optional[Task]:
94    def parent_task(self) -> Union["Task", None]:
95        if self.parent is None or self.parent.__class__.__name__ != "Task":
96            return None
97        return self.parent  # type: ignore
@model_validator(mode='after')
def validate_input_format(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
 99    @model_validator(mode="after")
100    def validate_input_format(self, info: ValidationInfo) -> Self:
101        # Don't validate if loading from file (not new). Too slow.
102        # We don't allow changing task schema, so this is redundant validation.
103        # Note: we still validate if editing a loaded model
104        if self.loading_from_file(info):
105            # Consider loading an existing model as validated.
106            self._last_validated_input = self.input
107            return self
108
109        # Don't validate if input has not changed. Too slow to run this every time.
110        if (
111            hasattr(self, "_last_validated_input")
112            and self.input == self._last_validated_input
113        ):
114            return self
115
116        task = self.parent_task()
117        if task is None:
118            # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving)
119            return self
120
121        # validate input
122        if task.input_json_schema is not None:
123            try:
124                input_parsed = json.loads(self.input)
125            except json.JSONDecodeError:
126                raise ValueError("Input is not a valid JSON object")
127
128            validate_schema_with_value_error(
129                input_parsed,
130                task.input_json_schema,
131                "Input does not match task input schema.",
132            )
133
134        self._last_validated_input = self.input
135        return self
@model_validator(mode='after')
def validate_output_format(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
137    @model_validator(mode="after")
138    def validate_output_format(self, info: ValidationInfo) -> Self:
139        # Don't validate if loading from file (not new). Too slow.
140        # Note: we still validate if editing a loaded model's output.
141        if self.loading_from_file(info):
142            # Consider loading an existing model as validated.
143            self._last_validated_output = self.output.output if self.output else None
144            return self
145
146        # Don't validate unless output has changed since last validation.
147        # The validator is slow and costly, don't want it running when setting other fields.
148        if (
149            hasattr(self, "_last_validated_output")
150            and self.output is not None
151            and self.output.output == self._last_validated_output
152        ):
153            return self
154
155        task = self.parent_task()
156        if task is None:
157            return self
158
159        self.output.validate_output_format(task)
160        self._last_validated_output = self.output.output if self.output else None
161        return self
@model_validator(mode='after')
def validate_repaired_output(self) -> Self:
163    @model_validator(mode="after")
164    def validate_repaired_output(self) -> Self:
165        if self.repaired_output is not None:
166            if self.repaired_output.rating is not None:
167                raise ValueError(
168                    "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed."
169                )
170
171            task = self.parent_task()
172            if (
173                task is not None
174                and self.repaired_output.output is not None
175                and task.output_json_schema is not None
176            ):
177                try:
178                    output_parsed = json.loads(self.repaired_output.output)
179                except json.JSONDecodeError:
180                    raise ValueError("Repaired output is not a valid JSON object")
181
182                validate_schema_with_value_error(
183                    output_parsed,
184                    task.output_json_schema,
185                    "Repaired output does not match task output schema.",
186                )
187
188        if self.repair_instructions is None and self.repaired_output is not None:
189            raise ValueError(
190                "Repair instructions are required if providing a repaired output."
191            )
192        if self.repair_instructions is not None and self.repaired_output is None:
193            raise ValueError(
194                "A repaired output is required if providing repair instructions."
195            )
196
197        return self
@model_validator(mode='after')
def validate_input_source(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
199    @model_validator(mode="after")
200    def validate_input_source(self, info: ValidationInfo) -> Self:
201        # On strict mode and not loaded from file, we validate input_source is not None.
202        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
203        if not strict_mode():
204            return self
205        if self.loaded_from_file(info):
206            return self
207        if self.input_source is None:
208            raise ValueError("input_source is required when strict mode is enabled")
209        return self
@model_validator(mode='after')
def validate_tags(self) -> Self:
211    @model_validator(mode="after")
212    def validate_tags(self) -> Self:
213        for tag in self.tags:
214            if not tag:
215                raise ValueError("Tags cannot be empty strings")
216            if " " in tag:
217                raise ValueError("Tags cannot contain spaces. Try underscores.")
218
219        return self
def relationship_name() -> str:
464        def relationship_name_method() -> str:
465            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
457        def parent_class_method() -> Type[KilnParentModel]:
458            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class TaskOutput(kiln_ai.datamodel.basemodel.KilnBaseModel):
287class TaskOutput(KilnBaseModel):
288    """
289    An output for a specific task run.
290
291    Contains the actual output content, its source (human or synthetic),
292    and optional rating information.
293    """
294
295    output: str = Field(
296        description="The output of the task. JSON formatted for structured output, plaintext for unstructured output."
297    )
298    source: DataSource | None = Field(
299        description="The source of the output: human or synthetic.",
300        default=None,
301    )
302    rating: TaskOutputRating | None = Field(
303        default=None, description="The rating of the output"
304    )
305
306    def validate_output_format(self, task: "Task") -> Self:
307        # validate output
308        if task.output_json_schema is not None:
309            try:
310                output_parsed = json.loads(self.output)
311            except json.JSONDecodeError:
312                raise ValueError("Output is not a valid JSON object")
313
314            validate_schema_with_value_error(
315                output_parsed,
316                task.output_json_schema,
317                "This task requires a specific output schema. While the model produced JSON, that JSON didn't meet the schema. Search 'Troubleshooting Structured Data Issues' in our docs for more information.",
318            )
319        return self
320
321    @model_validator(mode="after")
322    def validate_output_source(self, info: ValidationInfo) -> Self:
323        # On strict mode and not loaded from file, we validate output_source is not None.
324        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
325        if not strict_mode():
326            return self
327        if self.loaded_from_file(info):
328            return self
329        if self.source is None:
330            raise ValueError("Output source is required when strict mode is enabled")
331        return self

An output for a specific task run.

Contains the actual output content, its source (human or synthetic), and optional rating information.

output: str
source: DataSource | None
rating: TaskOutputRating | None
def validate_output_format(self, task: Task) -> Self:
306    def validate_output_format(self, task: "Task") -> Self:
307        # validate output
308        if task.output_json_schema is not None:
309            try:
310                output_parsed = json.loads(self.output)
311            except json.JSONDecodeError:
312                raise ValueError("Output is not a valid JSON object")
313
314            validate_schema_with_value_error(
315                output_parsed,
316                task.output_json_schema,
317                "This task requires a specific output schema. While the model produced JSON, that JSON didn't meet the schema. Search 'Troubleshooting Structured Data Issues' in our docs for more information.",
318            )
319        return self
@model_validator(mode='after')
def validate_output_source(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
321    @model_validator(mode="after")
322    def validate_output_source(self, info: ValidationInfo) -> Self:
323        # On strict mode and not loaded from file, we validate output_source is not None.
324        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
325        if not strict_mode():
326            return self
327        if self.loaded_from_file(info):
328            return self
329        if self.source is None:
330            raise ValueError("Output source is required when strict mode is enabled")
331        return self
model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class Priority(enum.IntEnum):
 5class Priority(IntEnum):
 6    """Defines priority levels for tasks and requirements, where P0 is highest priority."""
 7
 8    p0 = 0
 9    p1 = 1
10    p2 = 2
11    p3 = 3

Defines priority levels for tasks and requirements, where P0 is highest priority.

p0 = <Priority.p0: 0>
p1 = <Priority.p1: 1>
p2 = <Priority.p2: 2>
p3 = <Priority.p3: 3>
class DataSource(pydantic.main.BaseModel):
190class DataSource(BaseModel):
191    """
192    Represents the origin of data, either human or synthetic, with associated properties.
193
194    Properties vary based on the source type - for synthetic sources this includes
195    model information, for human sources this includes creator information.
196    """
197
198    type: DataSourceType
199    properties: Dict[str, str | int | float] = Field(
200        default={},
201        description="Properties describing the data source. For synthetic things like model. For human, the human's name.",
202    )
203
204    _data_source_properties = [
205        DataSourceProperty(
206            name="created_by",
207            type=str,
208            required_for=[DataSourceType.human],
209            not_allowed_for=[DataSourceType.synthetic, DataSourceType.file_import],
210        ),
211        DataSourceProperty(
212            name="model_name",
213            type=str,
214            required_for=[DataSourceType.synthetic],
215            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
216        ),
217        DataSourceProperty(
218            name="model_provider",
219            type=str,
220            required_for=[DataSourceType.synthetic],
221            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
222        ),
223        DataSourceProperty(
224            name="adapter_name",
225            type=str,
226            required_for=[DataSourceType.synthetic],
227            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
228        ),
229        DataSourceProperty(
230            # Legacy field -- allow loading from old runs, but we shouldn't be setting it.
231            name="prompt_builder_name",
232            type=str,
233            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
234        ),
235        DataSourceProperty(
236            # The PromptId of the prompt. Can be a saved prompt, fine-tune, generator name, etc. See PromptId type for more details.
237            name="prompt_id",
238            type=str,
239            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
240        ),
241        DataSourceProperty(
242            name="file_name",
243            type=str,
244            required_for=[DataSourceType.file_import],
245            not_allowed_for=[DataSourceType.human, DataSourceType.synthetic],
246        ),
247    ]
248
249    @model_validator(mode="after")
250    def validate_type(self) -> "DataSource":
251        if self.type not in DataSourceType:
252            raise ValueError(f"Invalid data source type: {self.type}")
253        return self
254
255    @model_validator(mode="after")
256    def validate_properties(self) -> "DataSource":
257        for prop in self._data_source_properties:
258            # Check the property type is correct
259            if prop.name in self.properties:
260                if not isinstance(self.properties[prop.name], prop.type):
261                    raise ValueError(
262                        f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source"
263                    )
264            # Check the property is required for the data source type
265            if self.type in prop.required_for:
266                if prop.name not in self.properties:
267                    raise ValueError(
268                        f"'{prop.name}' is required for {self.type} data source"
269                    )
270            # Check the property is not allowed for the data source type
271            elif self.type in prop.not_allowed_for and prop.name in self.properties:
272                raise ValueError(
273                    f"'{prop.name}' is not allowed for {self.type} data source"
274                )
275        return self
276
277    @model_validator(mode="after")
278    def validate_no_empty_properties(self) -> Self:
279        for prop, value in self.properties.items():
280            if isinstance(value, str) and value == "":
281                raise ValueError(
282                    f"Property '{prop}' must be a non-empty string for {self.type} data source"
283                )
284        return self

Represents the origin of data, either human or synthetic, with associated properties.

Properties vary based on the source type - for synthetic sources this includes model information, for human sources this includes creator information.

properties: Dict[str, str | int | float]
@model_validator(mode='after')
def validate_type(self) -> DataSource:
249    @model_validator(mode="after")
250    def validate_type(self) -> "DataSource":
251        if self.type not in DataSourceType:
252            raise ValueError(f"Invalid data source type: {self.type}")
253        return self
@model_validator(mode='after')
def validate_properties(self) -> DataSource:
255    @model_validator(mode="after")
256    def validate_properties(self) -> "DataSource":
257        for prop in self._data_source_properties:
258            # Check the property type is correct
259            if prop.name in self.properties:
260                if not isinstance(self.properties[prop.name], prop.type):
261                    raise ValueError(
262                        f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source"
263                    )
264            # Check the property is required for the data source type
265            if self.type in prop.required_for:
266                if prop.name not in self.properties:
267                    raise ValueError(
268                        f"'{prop.name}' is required for {self.type} data source"
269                    )
270            # Check the property is not allowed for the data source type
271            elif self.type in prop.not_allowed_for and prop.name in self.properties:
272                raise ValueError(
273                    f"'{prop.name}' is not allowed for {self.type} data source"
274                )
275        return self
@model_validator(mode='after')
def validate_no_empty_properties(self) -> Self:
277    @model_validator(mode="after")
278    def validate_no_empty_properties(self) -> Self:
279        for prop, value in self.properties.items():
280            if isinstance(value, str) and value == "":
281                raise ValueError(
282                    f"Property '{prop}' must be a non-empty string for {self.type} data source"
283                )
284        return self
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
384def init_private_attributes(self: BaseModel, context: Any, /) -> None:
385    """This function is meant to behave like a BaseModel method to initialise private attributes.
386
387    It takes context as an argument since that's what pydantic-core passes when calling it.
388
389    Args:
390        self: The BaseModel instance.
391        context: The context.
392    """
393    if getattr(self, '__pydantic_private__', None) is None:
394        pydantic_private = {}
395        for name, private_attr in self.__private_attributes__.items():
396            default = private_attr.get_default()
397            if default is not PydanticUndefined:
398                pydantic_private[name] = default
399        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.

class DataSourceType(builtins.str, enum.Enum):
163class DataSourceType(str, Enum):
164    """
165    The source type of a piece of data.
166
167    Human: a human created the data
168    Synthetic: a model created the data
169    """
170
171    human = "human"
172    synthetic = "synthetic"
173    file_import = "file_import"

The source type of a piece of data.

Human: a human created the data Synthetic: a model created the data

human = <DataSourceType.human: 'human'>
synthetic = <DataSourceType.synthetic: 'synthetic'>
file_import = <DataSourceType.file_import: 'file_import'>
class DataSourceProperty(pydantic.main.BaseModel):
176class DataSourceProperty(BaseModel):
177    """
178    Defines a property that can be associated with a data source.
179
180    Includes validation rules for when properties are required or not allowed
181    based on the data source type.
182    """
183
184    name: str
185    type: Type[Union[str, int, float]]
186    required_for: List[DataSourceType] = []
187    not_allowed_for: List[DataSourceType] = []

Defines a property that can be associated with a data source.

Includes validation rules for when properties are required or not allowed based on the data source type.

name: str
type: Type[Union[str, int, float]]
required_for: List[DataSourceType]
not_allowed_for: List[DataSourceType]
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class Finetune(kiln_ai.datamodel.basemodel.KilnParentedModel):
 23class Finetune(KilnParentedModel):
 24    """
 25    The Kiln fine-tune datamodel.
 26
 27    Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.
 28    """
 29
 30    name: FilenameString = Field(description="The name of the fine-tune.")
 31    description: str | None = Field(
 32        default=None,
 33        description="A description of the fine-tune for you and your team. Not used in training.",
 34    )
 35    structured_output_mode: StructuredOutputMode | None = Field(
 36        default=None,
 37        description="The mode to use to train the model for structured output, if it was trained with structured output. Will determine how we call the tuned model, so we call with the matching mode.",
 38    )
 39    provider: str = Field(
 40        description="The provider to use for the fine-tune (e.g. 'openai')."
 41    )
 42    base_model_id: str = Field(
 43        description="The id of the base model to use for the fine-tune. This string relates to the provider's IDs for their own models, not Kiln IDs."
 44    )
 45    provider_id: str | None = Field(
 46        default=None,
 47        description="The ID of the fine-tune job on the provider's side. May not be the same as the fine_tune_model_id.",
 48    )
 49    fine_tune_model_id: str | None = Field(
 50        default=None,
 51        description="The ID of the fine-tuned model on the provider's side. May not be the same as the provider_id.",
 52    )
 53    dataset_split_id: str = Field(
 54        description="The ID of the dataset split to use for this fine-tune.",
 55    )
 56    train_split_name: str = Field(
 57        default="train",
 58        description="The name of the training split to use for this fine-tune.",
 59    )
 60    validation_split_name: str | None = Field(
 61        default=None,
 62        description="The name of the validation split to use for this fine-tune. Optional.",
 63    )
 64    parameters: dict[str, str | int | float | bool] = Field(
 65        default={},
 66        description="The parameters to use for this fine-tune. These are provider-specific.",
 67    )
 68    # These two fields are saved exactly used for training. Even if they map exactly to a custom prompt or generator, those can change, so we want to keep a record of the training prompt.
 69    system_message: str = Field(
 70        description="The system message to use for this fine-tune.",
 71    )
 72    thinking_instructions: str | None = Field(
 73        default=None,
 74        description="The thinking instructions to use for this fine-tune. Only used when data_strategy is final_and_intermediate.",
 75    )
 76    latest_status: FineTuneStatusType = Field(
 77        default=FineTuneStatusType.unknown,
 78        description="The latest known status of this fine-tune. Not updated in real time.",
 79    )
 80    properties: Dict[str, str | int | float] = Field(
 81        default={},
 82        description="Properties of the fine-tune. Different providers may use different properties.",
 83    )
 84    data_strategy: ChatStrategy = Field(
 85        default=ChatStrategy.single_turn,
 86        description="The strategy to use for training the model. 'final_only' will only train on the final response. 'final_and_intermediate' will train on the final response and intermediate outputs (chain of thought or reasoning).",
 87    )
 88
 89    # Workaround to return typed parent without importing Task
 90    def parent_task(self) -> Union["Task", None]:
 91        if self.parent is None or self.parent.__class__.__name__ != "Task":
 92            return None
 93        return self.parent  # type: ignore
 94
 95    @model_validator(mode="after")
 96    def validate_thinking_instructions(self) -> Self:
 97        if (
 98            self.thinking_instructions is not None
 99            and self.data_strategy not in DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS
100        ):
101            raise ValueError(
102                f"Thinking instructions can only be used when data_strategy is one of the following: {DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS}"
103            )
104        if (
105            self.thinking_instructions is None
106            and self.data_strategy in DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS
107        ):
108            raise ValueError(
109                f"Thinking instructions are required when data_strategy is one of the following: {DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS}"
110            )
111        return self

The Kiln fine-tune datamodel.

Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7fe1ec5165c0>, json_schema_input_type=PydanticUndefined)]
description: str | None
structured_output_mode: StructuredOutputMode | None
provider: str
base_model_id: str
provider_id: str | None
fine_tune_model_id: str | None
dataset_split_id: str
train_split_name: str
validation_split_name: str | None
parameters: dict[str, str | int | float | bool]
system_message: str
thinking_instructions: str | None
latest_status: FineTuneStatusType
properties: Dict[str, str | int | float]
data_strategy: kiln_ai.datamodel.datamodel_enums.ChatStrategy
def parent_task(self) -> Optional[Task]:
90    def parent_task(self) -> Union["Task", None]:
91        if self.parent is None or self.parent.__class__.__name__ != "Task":
92            return None
93        return self.parent  # type: ignore
@model_validator(mode='after')
def validate_thinking_instructions(self) -> Self:
 95    @model_validator(mode="after")
 96    def validate_thinking_instructions(self) -> Self:
 97        if (
 98            self.thinking_instructions is not None
 99            and self.data_strategy not in DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS
100        ):
101            raise ValueError(
102                f"Thinking instructions can only be used when data_strategy is one of the following: {DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS}"
103            )
104        if (
105            self.thinking_instructions is None
106            and self.data_strategy in DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS
107        ):
108            raise ValueError(
109                f"Thinking instructions are required when data_strategy is one of the following: {DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS}"
110            )
111        return self
def relationship_name() -> str:
464        def relationship_name_method() -> str:
465            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
457        def parent_class_method() -> Type[KilnParentModel]:
458            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class FineTuneStatusType(builtins.str, enum.Enum):
49class FineTuneStatusType(str, Enum):
50    """
51    The status type of a fine-tune (running, completed, failed, etc).
52    """
53
54    unknown = "unknown"  # server error
55    pending = "pending"
56    running = "running"
57    completed = "completed"
58    failed = "failed"

The status type of a fine-tune (running, completed, failed, etc).

unknown = <FineTuneStatusType.unknown: 'unknown'>
pending = <FineTuneStatusType.pending: 'pending'>
running = <FineTuneStatusType.running: 'running'>
completed = <FineTuneStatusType.completed: 'completed'>
failed = <FineTuneStatusType.failed: 'failed'>
class TaskOutputRatingType(builtins.str, enum.Enum):
15class TaskOutputRatingType(str, Enum):
16    """Defines the types of rating systems available for task outputs."""
17
18    five_star = "five_star"
19    pass_fail = "pass_fail"
20    pass_fail_critical = "pass_fail_critical"
21    custom = "custom"

Defines the types of rating systems available for task outputs.

five_star = <TaskOutputRatingType.five_star: 'five_star'>
pass_fail = <TaskOutputRatingType.pass_fail: 'pass_fail'>
pass_fail_critical = <TaskOutputRatingType.pass_fail_critical: 'pass_fail_critical'>
custom = <TaskOutputRatingType.custom: 'custom'>
class TaskRequirement(pydantic.main.BaseModel):
33class TaskRequirement(BaseModel):
34    """
35    Defines a specific requirement that should be met by task outputs.
36
37    Includes an identifier, name, description, instruction for meeting the requirement,
38    priority level, and rating type (five_star, pass_fail, pass_fail_critical, custom).
39    """
40
41    id: ID_TYPE = ID_FIELD
42    name: FilenameStringShort = Field(description="The name of the task requirement.")
43    description: str | None = Field(default=None)
44    instruction: str = Field(min_length=1)
45    priority: Priority = Field(default=Priority.p2)
46    type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)

Defines a specific requirement that should be met by task outputs.

Includes an identifier, name, description, instruction for meeting the requirement, priority level, and rating type (five_star, pass_fail, pass_fail_critical, custom).

id: Optional[str]
name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7fe1ec516520>, json_schema_input_type=PydanticUndefined)]
description: str | None
instruction: str
priority: Priority
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class DatasetSplitDefinition(pydantic.main.BaseModel):
23class DatasetSplitDefinition(BaseModel):
24    """
25    A definition of a split in a dataset.
26
27    Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)
28    """
29
30    name: FilenameString = Field(
31        description="The name of the dataset split definition."
32    )
33    description: str | None = Field(
34        default=None,
35        description="A description of the dataset for you and your team. Not used in training.",
36    )
37    percentage: float = Field(
38        ge=0.0,
39        le=1.0,
40        description="The percentage of the dataset that this split represents (between 0 and 1).",
41    )

A definition of a split in a dataset.

Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7fe1ec5165c0>, json_schema_input_type=PydanticUndefined)]
description: str | None
percentage: float
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class DatasetSplit(kiln_ai.datamodel.basemodel.KilnParentedModel):
 67class DatasetSplit(KilnParentedModel):
 68    """
 69    A collection of task runs, with optional splits (train, test, validation).
 70
 71    Used to freeze a dataset into train/test/validation splits for repeatable fine-tuning or other tasks.
 72
 73    Maintains a list of IDs for each split, to avoid data duplication.
 74    """
 75
 76    name: FilenameString = Field(description="The name of the dataset split.")
 77    description: str | None = Field(
 78        default=None,
 79        description="A description of the dataset for you and your team. Not used in training.",
 80    )
 81    splits: list[DatasetSplitDefinition] = Field(
 82        default_factory=list,
 83        description="The splits in the dataset.",
 84    )
 85    split_contents: dict[str, list[str]] = Field(
 86        description="The contents of each split in the dataset. The key is the split name, and the value is a list of task run IDs.",
 87    )
 88    filter: DatasetFilterId | None = Field(
 89        default=None,
 90        description="The filter used to build the dataset.",
 91    )
 92
 93    @model_validator(mode="after")
 94    def validate_split_percentages(self) -> "DatasetSplit":
 95        total = sum(split.percentage for split in self.splits)
 96        if not math.isclose(total, 1.0, rel_tol=1e-9):
 97            raise ValueError(f"The sum of split percentages must be 1.0 (got {total})")
 98        return self
 99
100    @classmethod
101    def from_task(
102        cls,
103        name: str,
104        task: "Task",
105        splits: list[DatasetSplitDefinition],
106        filter_id: DatasetFilterId = "all",
107        description: str | None = None,
108    ):
109        """
110        Build a dataset split from a task.
111        """
112        filter = dataset_filter_from_id(filter_id)
113        split_contents = cls.build_split_contents(task, splits, filter)
114        return cls(
115            parent=task,
116            name=name,
117            description=description,
118            splits=splits,
119            split_contents=split_contents,
120            filter=filter_id,
121        )
122
123    @classmethod
124    def build_split_contents(
125        cls,
126        task: "Task",
127        splits: list[DatasetSplitDefinition],
128        filter: DatasetFilter,
129    ) -> dict[str, list[str]]:
130        valid_ids = []
131        for task_run in task.runs():
132            if filter(task_run):
133                valid_ids.append(task_run.id)
134
135        # Shuffle and split by split percentage
136        random.shuffle(valid_ids)
137        split_contents = {}
138        start_idx = 0
139        remaining_items = len(valid_ids)
140
141        # Handle all splits except the last one
142        for split in splits[:-1]:
143            split_size = round(len(valid_ids) * split.percentage)
144            split_contents[split.name] = valid_ids[start_idx : start_idx + split_size]
145            start_idx += split_size
146            remaining_items -= split_size
147
148        # Last split gets all remaining items (for rounding)
149        if splits:
150            split_contents[splits[-1].name] = valid_ids[start_idx:]
151
152        return split_contents
153
154    def parent_task(self) -> "Task | None":
155        # inline import to avoid circular import
156        from kiln_ai.datamodel import Task
157
158        if not isinstance(self.parent, Task):
159            return None
160        return self.parent
161
162    def missing_count(self) -> int:
163        """
164        Returns:
165            int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset
166        """
167        parent = self.parent_task()
168        if parent is None:
169            raise ValueError("DatasetSplit has no parent task")
170
171        runs = parent.runs(readonly=True)
172        all_ids = set(run.id for run in runs)
173        all_ids_in_splits = set()
174        for ids in self.split_contents.values():
175            all_ids_in_splits.update(ids)
176        missing = all_ids_in_splits - all_ids
177        return len(missing)

A collection of task runs, with optional splits (train, test, validation).

Used to freeze a dataset into train/test/validation splits for repeatable fine-tuning or other tasks.

Maintains a list of IDs for each split, to avoid data duplication.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7fe1ec5165c0>, json_schema_input_type=PydanticUndefined)]
description: str | None
splits: list[DatasetSplitDefinition]
split_contents: dict[str, list[str]]
filter: Optional[Annotated[str, AfterValidator(func=<function <lambda> at 0x7fe1ec246ac0>)]]
@model_validator(mode='after')
def validate_split_percentages(self) -> DatasetSplit:
93    @model_validator(mode="after")
94    def validate_split_percentages(self) -> "DatasetSplit":
95        total = sum(split.percentage for split in self.splits)
96        if not math.isclose(total, 1.0, rel_tol=1e-9):
97            raise ValueError(f"The sum of split percentages must be 1.0 (got {total})")
98        return self
@classmethod
def from_task( cls, name: str, task: Task, splits: list[DatasetSplitDefinition], filter_id: Annotated[str, AfterValidator(func=<function <lambda>>)] = 'all', description: str | None = None):
100    @classmethod
101    def from_task(
102        cls,
103        name: str,
104        task: "Task",
105        splits: list[DatasetSplitDefinition],
106        filter_id: DatasetFilterId = "all",
107        description: str | None = None,
108    ):
109        """
110        Build a dataset split from a task.
111        """
112        filter = dataset_filter_from_id(filter_id)
113        split_contents = cls.build_split_contents(task, splits, filter)
114        return cls(
115            parent=task,
116            name=name,
117            description=description,
118            splits=splits,
119            split_contents=split_contents,
120            filter=filter_id,
121        )

Build a dataset split from a task.

@classmethod
def build_split_contents( cls, task: Task, splits: list[DatasetSplitDefinition], filter: kiln_ai.datamodel.dataset_filters.DatasetFilter) -> dict[str, list[str]]:
123    @classmethod
124    def build_split_contents(
125        cls,
126        task: "Task",
127        splits: list[DatasetSplitDefinition],
128        filter: DatasetFilter,
129    ) -> dict[str, list[str]]:
130        valid_ids = []
131        for task_run in task.runs():
132            if filter(task_run):
133                valid_ids.append(task_run.id)
134
135        # Shuffle and split by split percentage
136        random.shuffle(valid_ids)
137        split_contents = {}
138        start_idx = 0
139        remaining_items = len(valid_ids)
140
141        # Handle all splits except the last one
142        for split in splits[:-1]:
143            split_size = round(len(valid_ids) * split.percentage)
144            split_contents[split.name] = valid_ids[start_idx : start_idx + split_size]
145            start_idx += split_size
146            remaining_items -= split_size
147
148        # Last split gets all remaining items (for rounding)
149        if splits:
150            split_contents[splits[-1].name] = valid_ids[start_idx:]
151
152        return split_contents
def parent_task(self) -> Task | None:
154    def parent_task(self) -> "Task | None":
155        # inline import to avoid circular import
156        from kiln_ai.datamodel import Task
157
158        if not isinstance(self.parent, Task):
159            return None
160        return self.parent
def missing_count(self) -> int:
162    def missing_count(self) -> int:
163        """
164        Returns:
165            int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset
166        """
167        parent = self.parent_task()
168        if parent is None:
169            raise ValueError("DatasetSplit has no parent task")
170
171        runs = parent.runs(readonly=True)
172        all_ids = set(run.id for run in runs)
173        all_ids_in_splits = set()
174        for ids in self.split_contents.values():
175            all_ids_in_splits.update(ids)
176        missing = all_ids_in_splits - all_ids
177        return len(missing)

Returns: int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset

def relationship_name() -> str:
464        def relationship_name_method() -> str:
465            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
457        def parent_class_method() -> Type[KilnParentModel]:
458            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class RequirementRating(pydantic.main.BaseModel):
19class RequirementRating(BaseModel):
20    """Rating for a specific requirement within a task output."""
21
22    value: float = Field(
23        description="The rating value. Interpretation depends on rating type"
24    )
25    type: TaskOutputRatingType = Field(description="The type of rating")

Rating for a specific requirement within a task output.

value: float
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class BasePrompt(pydantic.main.BaseModel):
 7class BasePrompt(BaseModel):
 8    """
 9    A prompt for a task. This is the basic data storage format which can be used throughout a project.
10
11    The "Prompt" model name is reserved for the custom prompts parented by a task.
12    """
13
14    name: FilenameString = Field(description="The name of the prompt.")
15    description: str | None = Field(
16        default=None,
17        description="A more detailed description of the prompt.",
18    )
19    generator_id: str | None = Field(
20        default=None,
21        description="The id of the generator that created this prompt.",
22    )
23    prompt: str = Field(
24        description="The prompt for the task.",
25        min_length=1,
26    )
27    chain_of_thought_instructions: str | None = Field(
28        default=None,
29        description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting. COT will not be used unless this is provided.",
30    )

A prompt for a task. This is the basic data storage format which can be used throughout a project.

The "Prompt" model name is reserved for the custom prompts parented by a task.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7fe1ec5165c0>, json_schema_input_type=PydanticUndefined)]
description: str | None
generator_id: str | None
prompt: str
chain_of_thought_instructions: str | None
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class Prompt(kiln_ai.datamodel.basemodel.KilnParentedModel, kiln_ai.datamodel.BasePrompt):
33class Prompt(KilnParentedModel, BasePrompt):
34    """
35    A prompt for a task. This is the custom prompt parented by a task.
36    """
37
38    pass

A prompt for a task. This is the custom prompt parented by a task.

def relationship_name() -> str:
464        def relationship_name_method() -> str:
465            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
457        def parent_class_method() -> Type[KilnParentModel]:
458            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class TaskOutputRating(kiln_ai.datamodel.basemodel.KilnBaseModel):
 49class TaskOutputRating(KilnBaseModel):
 50    """
 51    A rating for a task output, including an overall rating and ratings for each requirement.
 52
 53    Supports:
 54    - five_star: 1-5 star ratings
 55    - pass_fail: boolean pass/fail (1.0 = pass, 0.0 = fail)
 56    - pass_fail_critical: tri-state (1.0 = pass, 0.0 = fail, -1.0 = critical fail)
 57    """
 58
 59    type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)
 60    value: float | None = Field(
 61        description="The rating value. Interpretation depends on rating type:\n- five_star: 1-5 stars\n- pass_fail: 1.0 (pass) or 0.0 (fail)\n- pass_fail_critical: 1.0 (pass), 0.0 (fail), or -1.0 (critical fail)",
 62        default=None,
 63    )
 64    requirement_ratings: Dict[ID_TYPE, RequirementRating] = Field(
 65        default={},
 66        description="The ratings of the requirements of the task. The ID can be either a task_requirement_id or a named rating for an eval_output_score name (in format 'named::<name>').",
 67    )
 68
 69    # Previously we stored rating values as a dict of floats, but now we store them as RequirementRating objects.
 70    @model_validator(mode="before")
 71    def upgrade_old_format(cls, data: dict) -> dict:
 72        if not isinstance(data, dict):
 73            return data
 74
 75        # Check if we have the old format (dict of floats)
 76        req_ratings = data.get("requirement_ratings", {})
 77        if req_ratings and all(
 78            isinstance(v, (int, float)) for v in req_ratings.values()
 79        ):
 80            # Convert each float to a RequirementRating object
 81            # all ratings are five star at the point we used this format
 82            data["requirement_ratings"] = {
 83                k: {"value": v, "type": TaskOutputRatingType.five_star}
 84                for k, v in req_ratings.items()
 85            }
 86
 87        return data
 88
 89    # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc)
 90    def is_high_quality(self) -> bool:
 91        if self.value is None:
 92            return False
 93
 94        if self.type == TaskOutputRatingType.five_star:
 95            return self.value >= 4
 96        elif self.type == TaskOutputRatingType.pass_fail:
 97            return self.value == 1.0
 98        elif self.type == TaskOutputRatingType.pass_fail_critical:
 99            return self.value == 1.0
100        return False
101
102    @model_validator(mode="after")
103    def validate_rating(self) -> Self:
104        if self.type not in TaskOutputRatingType:
105            raise ValueError(f"Invalid rating type: {self.type}")
106
107        # Overall rating is optional
108        if self.value is not None:
109            self._validate_rating(self.type, self.value, "overall rating")
110
111        for req_id, req_rating in self.requirement_ratings.items():
112            self._validate_rating(
113                req_rating.type,
114                req_rating.value,
115                f"requirement rating for req ID: {req_id}",
116            )
117
118        return self
119
120    def _validate_rating(
121        self, type: TaskOutputRatingType, rating: float | None, rating_name: str
122    ) -> None:
123        if type == TaskOutputRatingType.five_star:
124            self._validate_five_star(rating, rating_name)
125        elif type == TaskOutputRatingType.pass_fail:
126            self._validate_pass_fail(rating, rating_name)
127        elif type == TaskOutputRatingType.pass_fail_critical:
128            self._validate_pass_fail_critical(rating, rating_name)
129
130    def _validate_five_star(self, rating: float | None, rating_name: str) -> None:
131        if rating is None or not isinstance(rating, float) or not rating.is_integer():
132            raise ValueError(
133                f"{rating_name.capitalize()} of type five_star must be an integer value (1-5)"
134            )
135        if rating < 1 or rating > 5:
136            raise ValueError(
137                f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars"
138            )
139
140    def _validate_pass_fail(self, rating: float | None, rating_name: str) -> None:
141        if rating is None or not isinstance(rating, float) or not rating.is_integer():
142            raise ValueError(
143                f"{rating_name.capitalize()} of type pass_fail must be an integer value (0 or 1)"
144            )
145        if rating not in [0, 1]:
146            raise ValueError(
147                f"{rating_name.capitalize()} of type pass_fail must be 0 (fail) or 1 (pass)"
148            )
149
150    def _validate_pass_fail_critical(
151        self, rating: float | None, rating_name: str
152    ) -> None:
153        if rating is None or not isinstance(rating, float) or not rating.is_integer():
154            raise ValueError(
155                f"{rating_name.capitalize()} of type pass_fail_critical must be an integer value (-1, 0, or 1)"
156            )
157        if rating not in [-1, 0, 1]:
158            raise ValueError(
159                f"{rating_name.capitalize()} of type pass_fail_critical must be -1 (critical fail), 0 (fail), or 1 (pass)"
160            )

A rating for a task output, including an overall rating and ratings for each requirement.

Supports:

  • five_star: 1-5 star ratings
  • pass_fail: boolean pass/fail (1.0 = pass, 0.0 = fail)
  • pass_fail_critical: tri-state (1.0 = pass, 0.0 = fail, -1.0 = critical fail)
value: float | None
requirement_ratings: Dict[Optional[str], RequirementRating]
@model_validator(mode='before')
def upgrade_old_format(cls, data: dict) -> dict:
70    @model_validator(mode="before")
71    def upgrade_old_format(cls, data: dict) -> dict:
72        if not isinstance(data, dict):
73            return data
74
75        # Check if we have the old format (dict of floats)
76        req_ratings = data.get("requirement_ratings", {})
77        if req_ratings and all(
78            isinstance(v, (int, float)) for v in req_ratings.values()
79        ):
80            # Convert each float to a RequirementRating object
81            # all ratings are five star at the point we used this format
82            data["requirement_ratings"] = {
83                k: {"value": v, "type": TaskOutputRatingType.five_star}
84                for k, v in req_ratings.items()
85            }
86
87        return data
def is_high_quality(self) -> bool:
 90    def is_high_quality(self) -> bool:
 91        if self.value is None:
 92            return False
 93
 94        if self.type == TaskOutputRatingType.five_star:
 95            return self.value >= 4
 96        elif self.type == TaskOutputRatingType.pass_fail:
 97            return self.value == 1.0
 98        elif self.type == TaskOutputRatingType.pass_fail_critical:
 99            return self.value == 1.0
100        return False
@model_validator(mode='after')
def validate_rating(self) -> Self:
102    @model_validator(mode="after")
103    def validate_rating(self) -> Self:
104        if self.type not in TaskOutputRatingType:
105            raise ValueError(f"Invalid rating type: {self.type}")
106
107        # Overall rating is optional
108        if self.value is not None:
109            self._validate_rating(self.type, self.value, "overall rating")
110
111        for req_id, req_rating in self.requirement_ratings.items():
112            self._validate_rating(
113                req_rating.type,
114                req_rating.value,
115                f"requirement rating for req ID: {req_id}",
116            )
117
118        return self
model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class StructuredOutputMode(builtins.str, enum.Enum):
24class StructuredOutputMode(str, Enum):
25    """
26    Enumeration of supported structured output modes.
27
28    - json_schema: request json using API capabilities for json_schema
29    - function_calling: request json using API capabilities for function calling
30    - json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
31    - json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
32    - json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
33    - json_custom_instructions: The model should output JSON, but custom instructions are already included in the system prompt. Don't append additional JSON instructions.
34    - default: let the adapter decide (legacy, do not use for new use cases)
35    - unknown: used for cases where the structured output mode is not known (on old models where it wasn't saved). Should lookup best option at runtime.
36    """
37
38    default = "default"
39    json_schema = "json_schema"
40    function_calling_weak = "function_calling_weak"
41    function_calling = "function_calling"
42    json_mode = "json_mode"
43    json_instructions = "json_instructions"
44    json_instruction_and_object = "json_instruction_and_object"
45    json_custom_instructions = "json_custom_instructions"
46    unknown = "unknown"

Enumeration of supported structured output modes.

  • json_schema: request json using API capabilities for json_schema
  • function_calling: request json using API capabilities for function calling
  • json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
  • json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
  • json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
  • json_custom_instructions: The model should output JSON, but custom instructions are already included in the system prompt. Don't append additional JSON instructions.
  • default: let the adapter decide (legacy, do not use for new use cases)
  • unknown: used for cases where the structured output mode is not known (on old models where it wasn't saved). Should lookup best option at runtime.
default = <StructuredOutputMode.default: 'default'>
json_schema = <StructuredOutputMode.json_schema: 'json_schema'>
function_calling_weak = <StructuredOutputMode.function_calling_weak: 'function_calling_weak'>
function_calling = <StructuredOutputMode.function_calling: 'function_calling'>
json_mode = <StructuredOutputMode.json_mode: 'json_mode'>
json_instructions = <StructuredOutputMode.json_instructions: 'json_instructions'>
json_instruction_and_object = <StructuredOutputMode.json_instruction_and_object: 'json_instruction_and_object'>
json_custom_instructions = <StructuredOutputMode.json_custom_instructions: 'json_custom_instructions'>
unknown = <StructuredOutputMode.unknown: 'unknown'>
PromptId = typing.Annotated[str, AfterValidator(func=<function <lambda>>)]
class PromptGenerators(builtins.str, enum.Enum):
 9class PromptGenerators(str, Enum):
10    SIMPLE = "simple_prompt_builder"
11    MULTI_SHOT = "multi_shot_prompt_builder"
12    FEW_SHOT = "few_shot_prompt_builder"
13    REPAIRS = "repairs_prompt_builder"
14    SIMPLE_CHAIN_OF_THOUGHT = "simple_chain_of_thought_prompt_builder"
15    FEW_SHOT_CHAIN_OF_THOUGHT = "few_shot_chain_of_thought_prompt_builder"
16    MULTI_SHOT_CHAIN_OF_THOUGHT = "multi_shot_chain_of_thought_prompt_builder"
17    SHORT = "short_prompt_builder"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

SIMPLE = <PromptGenerators.SIMPLE: 'simple_prompt_builder'>
MULTI_SHOT = <PromptGenerators.MULTI_SHOT: 'multi_shot_prompt_builder'>
FEW_SHOT = <PromptGenerators.FEW_SHOT: 'few_shot_prompt_builder'>
REPAIRS = <PromptGenerators.REPAIRS: 'repairs_prompt_builder'>
SIMPLE_CHAIN_OF_THOUGHT = <PromptGenerators.SIMPLE_CHAIN_OF_THOUGHT: 'simple_chain_of_thought_prompt_builder'>
FEW_SHOT_CHAIN_OF_THOUGHT = <PromptGenerators.FEW_SHOT_CHAIN_OF_THOUGHT: 'few_shot_chain_of_thought_prompt_builder'>
MULTI_SHOT_CHAIN_OF_THOUGHT = <PromptGenerators.MULTI_SHOT_CHAIN_OF_THOUGHT: 'multi_shot_chain_of_thought_prompt_builder'>
SHORT = <PromptGenerators.SHORT: 'short_prompt_builder'>
prompt_generator_values = ['simple_prompt_builder', 'multi_shot_prompt_builder', 'few_shot_prompt_builder', 'repairs_prompt_builder', 'simple_chain_of_thought_prompt_builder', 'few_shot_chain_of_thought_prompt_builder', 'multi_shot_chain_of_thought_prompt_builder', 'short_prompt_builder']
class Usage(pydantic.main.BaseModel):
17class Usage(BaseModel):
18    input_tokens: int | None = Field(
19        default=None,
20        description="The number of input tokens used in the task run.",
21        ge=0,
22    )
23    output_tokens: int | None = Field(
24        default=None,
25        description="The number of output tokens used in the task run.",
26        ge=0,
27    )
28    total_tokens: int | None = Field(
29        default=None,
30        description="The total number of tokens used in the task run.",
31        ge=0,
32    )
33    cost: float | None = Field(
34        default=None,
35        description="The cost of the task run in US dollars, saved at runtime (prices can change over time).",
36        ge=0,
37    )

Usage docs: https://docs.pydantic.dev/2.10/concepts/models/

A base class for creating Pydantic models.

Attributes: __class_vars__: The names of the class variables defined on the model. __private_attributes__: Metadata about the private attributes of the model. __signature__: The synthesized __init__ [Signature][inspect.Signature] of the model.

__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
    This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
    __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [`RootModel`][pydantic.root_model.RootModel].
__pydantic_serializer__: The `pydantic-core` `SchemaSerializer` used to dump instances of the model.
__pydantic_validator__: The `pydantic-core` `SchemaValidator` used to validate instances of the model.

__pydantic_fields__: A dictionary of field names and their corresponding [`FieldInfo`][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [`ComputedFieldInfo`][pydantic.fields.ComputedFieldInfo] objects.

__pydantic_extra__: A dictionary containing extra values, if [`extra`][pydantic.config.ConfigDict.extra]
    is set to `'allow'`.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.
input_tokens: int | None
output_tokens: int | None
total_tokens: int | None
cost: float | None
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].