kiln_ai.datamodel

See our docs for details about our datamodel classes and hierarchy:

Developer docs: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html

User docs: https://docs.getkiln.ai/developers/kiln-datamodel

 1"""
 2See our docs for details about our datamodel classes and hierarchy:
 3
 4Developer docs: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html
 5
 6User docs: https://docs.getkiln.ai/developers/kiln-datamodel
 7"""
 8
 9# This component uses "flat" imports so we don't have too much internal structure exposed in the API.
10# for example you can just `from datamodel import Task, Project` instead of `from datamodel.task import Task; from datamodel.project import Project`
11
12from __future__ import annotations
13
14from kiln_ai.datamodel import dataset_split, eval, strict_mode
15from kiln_ai.datamodel.datamodel_enums import (
16    FinetuneDataStrategy,
17    FineTuneStatusType,
18    Priority,
19    StructuredOutputMode,
20    TaskOutputRatingType,
21)
22from kiln_ai.datamodel.dataset_split import (
23    DatasetSplit,
24    DatasetSplitDefinition,
25)
26from kiln_ai.datamodel.finetune import (
27    Finetune,
28)
29from kiln_ai.datamodel.project import Project
30from kiln_ai.datamodel.prompt import BasePrompt, Prompt
31from kiln_ai.datamodel.prompt_id import (
32    PromptGenerators,
33    PromptId,
34    prompt_generator_values,
35)
36from kiln_ai.datamodel.task import Task, TaskRequirement
37from kiln_ai.datamodel.task_output import (
38    DataSource,
39    DataSourceProperty,
40    DataSourceType,
41    RequirementRating,
42    TaskOutput,
43    TaskOutputRating,
44)
45from kiln_ai.datamodel.task_run import (
46    TaskRun,
47)
48
49__all__ = [
50    "strict_mode",
51    "dataset_split",
52    "eval",
53    "Task",
54    "Project",
55    "TaskRun",
56    "TaskOutput",
57    "Priority",
58    "DataSource",
59    "DataSourceType",
60    "DataSourceProperty",
61    "Finetune",
62    "FineTuneStatusType",
63    "TaskOutputRatingType",
64    "TaskRequirement",
65    "DatasetSplitDefinition",
66    "DatasetSplit",
67    "RequirementRating",
68    "TaskRequirement",
69    "BasePrompt",
70    "Prompt",
71    "TaskOutputRating",
72    "StructuredOutputMode",
73    "FinetuneDataStrategy",
74    "PromptId",
75    "PromptGenerators",
76    "prompt_generator_values",
77]
class Task(kiln_ai.datamodel.basemodel.KilnParentedModel, kiln_ai.datamodel.basemodel.KilnParentModel):
113class Task(
114    KilnParentedModel,
115    KilnParentModel,
116    parent_of={
117        "runs": TaskRun,
118        "dataset_splits": DatasetSplit,
119        "finetunes": Finetune,
120        "prompts": Prompt,
121        "evals": Eval,
122        "run_configs": TaskRunConfig,
123    },
124):
125    """
126    Represents a specific task to be performed, with associated requirements and validation rules.
127
128    Contains the task definition, requirements, input/output schemas, and maintains
129    a collection of task runs.
130    """
131
132    name: str = NAME_FIELD
133    description: str | None = Field(
134        default=None,
135        description="A description of the task for you and your team. Will not be used in prompts/training/validation.",
136    )
137    instruction: str = Field(
138        min_length=1,
139        description="The instructions for the task. Will be used in prompts/training/validation.",
140    )
141    requirements: List[TaskRequirement] = Field(default=[])
142    output_json_schema: JsonObjectSchema | None = None
143    input_json_schema: JsonObjectSchema | None = None
144    thinking_instruction: str | None = Field(
145        default=None,
146        description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting.",
147    )
148
149    def output_schema(self) -> Dict | None:
150        if self.output_json_schema is None:
151            return None
152        return schema_from_json_str(self.output_json_schema)
153
154    def input_schema(self) -> Dict | None:
155        if self.input_json_schema is None:
156            return None
157        return schema_from_json_str(self.input_json_schema)
158
159    # These wrappers help for typechecking. TODO P2: fix this in KilnParentModel
160    def runs(self, readonly: bool = False) -> list[TaskRun]:
161        return super().runs(readonly=readonly)  # type: ignore
162
163    def dataset_splits(self, readonly: bool = False) -> list[DatasetSplit]:
164        return super().dataset_splits(readonly=readonly)  # type: ignore
165
166    def finetunes(self, readonly: bool = False) -> list[Finetune]:
167        return super().finetunes(readonly=readonly)  # type: ignore
168
169    def prompts(self, readonly: bool = False) -> list[Prompt]:
170        return super().prompts(readonly=readonly)  # type: ignore
171
172    def evals(self, readonly: bool = False) -> list[Eval]:
173        return super().evals(readonly=readonly)  # type: ignore
174
175    def run_configs(self, readonly: bool = False) -> list[TaskRunConfig]:
176        return super().run_configs(readonly=readonly)  # type: ignore
177
178    # Workaround to return typed parent without importing Task
179    def parent_project(self) -> Union["Project", None]:
180        if self.parent is None or self.parent.__class__.__name__ != "Project":
181            return None
182        return self.parent  # type: ignore

Represents a specific task to be performed, with associated requirements and validation rules.

Contains the task definition, requirements, input/output schemas, and maintains a collection of task runs.

name: str
description: str | None
instruction: str
requirements: List[TaskRequirement]
output_json_schema: Optional[Annotated[str, AfterValidator(func=<function <lambda> at 0x7fcb133a9b20>)]]
input_json_schema: Optional[Annotated[str, AfterValidator(func=<function <lambda> at 0x7fcb133a9b20>)]]
thinking_instruction: str | None
def output_schema(self) -> Optional[Dict]:
149    def output_schema(self) -> Dict | None:
150        if self.output_json_schema is None:
151            return None
152        return schema_from_json_str(self.output_json_schema)
def input_schema(self) -> Optional[Dict]:
154    def input_schema(self) -> Dict | None:
155        if self.input_json_schema is None:
156            return None
157        return schema_from_json_str(self.input_json_schema)
def runs(self, readonly=False) -> List[TaskRun]:
420        def child_method(self, readonly: bool = False) -> list[child_class]:
421            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def dataset_splits( self, readonly=False) -> List[DatasetSplit]:
420        def child_method(self, readonly: bool = False) -> list[child_class]:
421            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def finetunes(self, readonly=False) -> List[Finetune]:
420        def child_method(self, readonly: bool = False) -> list[child_class]:
421            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def prompts(self, readonly=False) -> List[Prompt]:
420        def child_method(self, readonly: bool = False) -> list[child_class]:
421            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def evals(self, readonly=False) -> List[kiln_ai.datamodel.eval.Eval]:
420        def child_method(self, readonly: bool = False) -> list[child_class]:
421            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def run_configs(self, readonly=False) -> List[kiln_ai.datamodel.task.TaskRunConfig]:
420        def child_method(self, readonly: bool = False) -> list[child_class]:
421            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def parent_project(self) -> Optional[Project]:
179    def parent_project(self) -> Union["Project", None]:
180        if self.parent is None or self.parent.__class__.__name__ != "Project":
181            return None
182        return self.parent  # type: ignore
def relationship_name() -> str:
438        def relationship_name_method() -> str:
439            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
431        def parent_class_method() -> Type[KilnParentModel]:
432            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class Project(kiln_ai.datamodel.basemodel.KilnParentModel):
 8class Project(KilnParentModel, parent_of={"tasks": Task}):
 9    """
10    A collection of related tasks.
11
12    Projects organize tasks into logical groups and provide high-level descriptions
13    of the overall goals.
14    """
15
16    name: str = NAME_FIELD
17    description: str | None = Field(
18        default=None,
19        description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
20    )
21
22    # Needed for typechecking. TODO P2: fix this in KilnParentModel
23    def tasks(self) -> list[Task]:
24        return super().tasks()  # type: ignore

A collection of related tasks.

Projects organize tasks into logical groups and provide high-level descriptions of the overall goals.

name: str
description: str | None
def tasks(self, readonly=False) -> List[Task]:
420        def child_method(self, readonly: bool = False) -> list[child_class]:
421            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class TaskRun(kiln_ai.datamodel.basemodel.KilnParentedModel):
 19class TaskRun(KilnParentedModel):
 20    """
 21    Represents a single execution of a Task.
 22
 23    Contains the input used, its source, the output produced, and optional
 24    repair information if the output needed correction.
 25    """
 26
 27    input: str = Field(
 28        description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input."
 29    )
 30    input_source: DataSource | None = Field(
 31        default=None, description="The source of the input: human or synthetic."
 32    )
 33
 34    output: TaskOutput = Field(description="The output of the task run.")
 35    repair_instructions: str | None = Field(
 36        default=None,
 37        description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.",
 38    )
 39    repaired_output: TaskOutput | None = Field(
 40        default=None,
 41        description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.",
 42    )
 43    intermediate_outputs: Dict[str, str] | None = Field(
 44        default=None,
 45        description="Intermediate outputs from the task run. Keys are the names of the intermediate output steps (cot=chain of thought, etc), values are the output data.",
 46    )
 47    tags: List[str] = Field(
 48        default=[],
 49        description="Tags for the task run. Tags are used to categorize task runs for filtering and reporting.",
 50    )
 51
 52    def has_thinking_training_data(self) -> bool:
 53        """
 54        Does this run have thinking data that we can use to train a thinking model?
 55        """
 56        if self.intermediate_outputs is None:
 57            return False
 58        return (
 59            "chain_of_thought" in self.intermediate_outputs
 60            or "reasoning" in self.intermediate_outputs
 61        )
 62
 63    # Workaround to return typed parent without importing Task
 64    def parent_task(self) -> Union["Task", None]:
 65        if self.parent is None or self.parent.__class__.__name__ != "Task":
 66            return None
 67        return self.parent  # type: ignore
 68
 69    @model_validator(mode="after")
 70    def validate_input_format(self, info: ValidationInfo) -> Self:
 71        # Don't validate if loading from file (not new). Too slow.
 72        # We don't allow changing task schema, so this is redundant validation.
 73        # Note: we still validate if editing a loaded model
 74        if self.loading_from_file(info):
 75            # Consider loading an existing model as validated.
 76            self._last_validated_input = self.input
 77            return self
 78
 79        # Don't validate if input has not changed. Too slow to run this every time.
 80        if (
 81            hasattr(self, "_last_validated_input")
 82            and self.input == self._last_validated_input
 83        ):
 84            return self
 85
 86        task = self.parent_task()
 87        if task is None:
 88            # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving)
 89            return self
 90
 91        # validate output
 92        if task.input_json_schema is not None:
 93            try:
 94                validate_schema(json.loads(self.input), task.input_json_schema)
 95            except json.JSONDecodeError:
 96                raise ValueError("Input is not a valid JSON object")
 97            except jsonschema.exceptions.ValidationError as e:
 98                raise ValueError(f"Input does not match task input schema: {e}")
 99        self._last_validated_input = self.input
100        return self
101
102    @model_validator(mode="after")
103    def validate_output_format(self, info: ValidationInfo) -> Self:
104        # Don't validate if loading from file (not new). Too slow.
105        # Note: we still validate if editing a loaded model's output.
106        if self.loading_from_file(info):
107            # Consider loading an existing model as validated.
108            self._last_validated_output = self.output.output if self.output else None
109            return self
110
111        # Don't validate unless output has changed since last validation.
112        # The validator is slow and costly, don't want it running when setting other fields.
113        if (
114            hasattr(self, "_last_validated_output")
115            and self.output is not None
116            and self.output.output == self._last_validated_output
117        ):
118            return self
119
120        task = self.parent_task()
121        if task is None:
122            return self
123
124        self.output.validate_output_format(task)
125        self._last_validated_output = self.output.output if self.output else None
126        return self
127
128    @model_validator(mode="after")
129    def validate_repaired_output(self) -> Self:
130        if self.repaired_output is not None:
131            if self.repaired_output.rating is not None:
132                raise ValueError(
133                    "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed."
134                )
135        if self.repair_instructions is None and self.repaired_output is not None:
136            raise ValueError(
137                "Repair instructions are required if providing a repaired output."
138            )
139        if self.repair_instructions is not None and self.repaired_output is None:
140            raise ValueError(
141                "A repaired output is required if providing repair instructions."
142            )
143        return self
144
145    @model_validator(mode="after")
146    def validate_input_source(self, info: ValidationInfo) -> Self:
147        # On strict mode and not loaded from file, we validate input_source is not None.
148        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
149        if not strict_mode():
150            return self
151        if self.loaded_from_file(info):
152            return self
153        if self.input_source is None:
154            raise ValueError("input_source is required when strict mode is enabled")
155        return self
156
157    @model_validator(mode="after")
158    def validate_tags(self) -> Self:
159        for tag in self.tags:
160            if not tag:
161                raise ValueError("Tags cannot be empty strings")
162            if " " in tag:
163                raise ValueError("Tags cannot contain spaces. Try underscores.")
164
165        return self

Represents a single execution of a Task.

Contains the input used, its source, the output produced, and optional repair information if the output needed correction.

input: str
input_source: DataSource | None
output: TaskOutput
repair_instructions: str | None
repaired_output: TaskOutput | None
intermediate_outputs: Optional[Dict[str, str]]
tags: List[str]
def has_thinking_training_data(self) -> bool:
52    def has_thinking_training_data(self) -> bool:
53        """
54        Does this run have thinking data that we can use to train a thinking model?
55        """
56        if self.intermediate_outputs is None:
57            return False
58        return (
59            "chain_of_thought" in self.intermediate_outputs
60            or "reasoning" in self.intermediate_outputs
61        )

Does this run have thinking data that we can use to train a thinking model?

def parent_task(self) -> Optional[Task]:
64    def parent_task(self) -> Union["Task", None]:
65        if self.parent is None or self.parent.__class__.__name__ != "Task":
66            return None
67        return self.parent  # type: ignore
@model_validator(mode='after')
def validate_input_format(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
 69    @model_validator(mode="after")
 70    def validate_input_format(self, info: ValidationInfo) -> Self:
 71        # Don't validate if loading from file (not new). Too slow.
 72        # We don't allow changing task schema, so this is redundant validation.
 73        # Note: we still validate if editing a loaded model
 74        if self.loading_from_file(info):
 75            # Consider loading an existing model as validated.
 76            self._last_validated_input = self.input
 77            return self
 78
 79        # Don't validate if input has not changed. Too slow to run this every time.
 80        if (
 81            hasattr(self, "_last_validated_input")
 82            and self.input == self._last_validated_input
 83        ):
 84            return self
 85
 86        task = self.parent_task()
 87        if task is None:
 88            # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving)
 89            return self
 90
 91        # validate output
 92        if task.input_json_schema is not None:
 93            try:
 94                validate_schema(json.loads(self.input), task.input_json_schema)
 95            except json.JSONDecodeError:
 96                raise ValueError("Input is not a valid JSON object")
 97            except jsonschema.exceptions.ValidationError as e:
 98                raise ValueError(f"Input does not match task input schema: {e}")
 99        self._last_validated_input = self.input
100        return self
@model_validator(mode='after')
def validate_output_format(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
102    @model_validator(mode="after")
103    def validate_output_format(self, info: ValidationInfo) -> Self:
104        # Don't validate if loading from file (not new). Too slow.
105        # Note: we still validate if editing a loaded model's output.
106        if self.loading_from_file(info):
107            # Consider loading an existing model as validated.
108            self._last_validated_output = self.output.output if self.output else None
109            return self
110
111        # Don't validate unless output has changed since last validation.
112        # The validator is slow and costly, don't want it running when setting other fields.
113        if (
114            hasattr(self, "_last_validated_output")
115            and self.output is not None
116            and self.output.output == self._last_validated_output
117        ):
118            return self
119
120        task = self.parent_task()
121        if task is None:
122            return self
123
124        self.output.validate_output_format(task)
125        self._last_validated_output = self.output.output if self.output else None
126        return self
@model_validator(mode='after')
def validate_repaired_output(self) -> Self:
128    @model_validator(mode="after")
129    def validate_repaired_output(self) -> Self:
130        if self.repaired_output is not None:
131            if self.repaired_output.rating is not None:
132                raise ValueError(
133                    "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed."
134                )
135        if self.repair_instructions is None and self.repaired_output is not None:
136            raise ValueError(
137                "Repair instructions are required if providing a repaired output."
138            )
139        if self.repair_instructions is not None and self.repaired_output is None:
140            raise ValueError(
141                "A repaired output is required if providing repair instructions."
142            )
143        return self
@model_validator(mode='after')
def validate_input_source(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
145    @model_validator(mode="after")
146    def validate_input_source(self, info: ValidationInfo) -> Self:
147        # On strict mode and not loaded from file, we validate input_source is not None.
148        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
149        if not strict_mode():
150            return self
151        if self.loaded_from_file(info):
152            return self
153        if self.input_source is None:
154            raise ValueError("input_source is required when strict mode is enabled")
155        return self
@model_validator(mode='after')
def validate_tags(self) -> Self:
157    @model_validator(mode="after")
158    def validate_tags(self) -> Self:
159        for tag in self.tags:
160            if not tag:
161                raise ValueError("Tags cannot be empty strings")
162            if " " in tag:
163                raise ValueError("Tags cannot contain spaces. Try underscores.")
164
165        return self
def relationship_name() -> str:
438        def relationship_name_method() -> str:
439            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
431        def parent_class_method() -> Type[KilnParentModel]:
432            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class TaskOutput(kiln_ai.datamodel.basemodel.KilnBaseModel):
289class TaskOutput(KilnBaseModel):
290    """
291    An output for a specific task run.
292
293    Contains the actual output content, its source (human or synthetic),
294    and optional rating information.
295    """
296
297    output: str = Field(
298        description="The output of the task. JSON formatted for structured output, plaintext for unstructured output."
299    )
300    source: DataSource | None = Field(
301        description="The source of the output: human or synthetic.",
302        default=None,
303    )
304    rating: TaskOutputRating | None = Field(
305        default=None, description="The rating of the output"
306    )
307
308    def validate_output_format(self, task: "Task") -> Self:
309        # validate output
310        if task.output_json_schema is not None:
311            try:
312                validate_schema(json.loads(self.output), task.output_json_schema)
313            except json.JSONDecodeError:
314                raise ValueError("Output is not a valid JSON object")
315            except jsonschema.exceptions.ValidationError as e:
316                raise ValueError(f"Output does not match task output schema: {e}")
317        return self
318
319    @model_validator(mode="after")
320    def validate_output_source(self, info: ValidationInfo) -> Self:
321        # On strict mode and not loaded from file, we validate output_source is not None.
322        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
323        if not strict_mode():
324            return self
325        if self.loaded_from_file(info):
326            return self
327        if self.source is None:
328            raise ValueError("Output source is required when strict mode is enabled")
329        return self

An output for a specific task run.

Contains the actual output content, its source (human or synthetic), and optional rating information.

output: str
source: DataSource | None
rating: TaskOutputRating | None
def validate_output_format(self, task: Task) -> Self:
308    def validate_output_format(self, task: "Task") -> Self:
309        # validate output
310        if task.output_json_schema is not None:
311            try:
312                validate_schema(json.loads(self.output), task.output_json_schema)
313            except json.JSONDecodeError:
314                raise ValueError("Output is not a valid JSON object")
315            except jsonschema.exceptions.ValidationError as e:
316                raise ValueError(f"Output does not match task output schema: {e}")
317        return self
@model_validator(mode='after')
def validate_output_source(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
319    @model_validator(mode="after")
320    def validate_output_source(self, info: ValidationInfo) -> Self:
321        # On strict mode and not loaded from file, we validate output_source is not None.
322        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
323        if not strict_mode():
324            return self
325        if self.loaded_from_file(info):
326            return self
327        if self.source is None:
328            raise ValueError("Output source is required when strict mode is enabled")
329        return self
model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class Priority(enum.IntEnum):
 5class Priority(IntEnum):
 6    """Defines priority levels for tasks and requirements, where P0 is highest priority."""
 7
 8    p0 = 0
 9    p1 = 1
10    p2 = 2
11    p3 = 3

Defines priority levels for tasks and requirements, where P0 is highest priority.

p0 = <Priority.p0: 0>
p1 = <Priority.p1: 1>
p2 = <Priority.p2: 2>
p3 = <Priority.p3: 3>
class DataSource(pydantic.main.BaseModel):
192class DataSource(BaseModel):
193    """
194    Represents the origin of data, either human or synthetic, with associated properties.
195
196    Properties vary based on the source type - for synthetic sources this includes
197    model information, for human sources this includes creator information.
198    """
199
200    type: DataSourceType
201    properties: Dict[str, str | int | float] = Field(
202        default={},
203        description="Properties describing the data source. For synthetic things like model. For human, the human's name.",
204    )
205
206    _data_source_properties = [
207        DataSourceProperty(
208            name="created_by",
209            type=str,
210            required_for=[DataSourceType.human],
211            not_allowed_for=[DataSourceType.synthetic, DataSourceType.file_import],
212        ),
213        DataSourceProperty(
214            name="model_name",
215            type=str,
216            required_for=[DataSourceType.synthetic],
217            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
218        ),
219        DataSourceProperty(
220            name="model_provider",
221            type=str,
222            required_for=[DataSourceType.synthetic],
223            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
224        ),
225        DataSourceProperty(
226            name="adapter_name",
227            type=str,
228            required_for=[DataSourceType.synthetic],
229            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
230        ),
231        DataSourceProperty(
232            # Legacy field -- allow loading from old runs, but we shouldn't be setting it.
233            name="prompt_builder_name",
234            type=str,
235            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
236        ),
237        DataSourceProperty(
238            # The PromptId of the prompt. Can be a saved prompt, fine-tune, generator name, etc. See PromptId type for more details.
239            name="prompt_id",
240            type=str,
241            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
242        ),
243        DataSourceProperty(
244            name="file_name",
245            type=str,
246            required_for=[DataSourceType.file_import],
247            not_allowed_for=[DataSourceType.human, DataSourceType.synthetic],
248        ),
249    ]
250
251    @model_validator(mode="after")
252    def validate_type(self) -> "DataSource":
253        if self.type not in DataSourceType:
254            raise ValueError(f"Invalid data source type: {self.type}")
255        return self
256
257    @model_validator(mode="after")
258    def validate_properties(self) -> "DataSource":
259        for prop in self._data_source_properties:
260            # Check the property type is correct
261            if prop.name in self.properties:
262                if not isinstance(self.properties[prop.name], prop.type):
263                    raise ValueError(
264                        f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source"
265                    )
266            # Check the property is required for the data source type
267            if self.type in prop.required_for:
268                if prop.name not in self.properties:
269                    raise ValueError(
270                        f"'{prop.name}' is required for {self.type} data source"
271                    )
272            # Check the property is not allowed for the data source type
273            elif self.type in prop.not_allowed_for and prop.name in self.properties:
274                raise ValueError(
275                    f"'{prop.name}' is not allowed for {self.type} data source"
276                )
277        return self
278
279    @model_validator(mode="after")
280    def validate_no_empty_properties(self) -> Self:
281        for prop, value in self.properties.items():
282            if isinstance(value, str) and value == "":
283                raise ValueError(
284                    f"Property '{prop}' must be a non-empty string for {self.type} data source"
285                )
286        return self

Represents the origin of data, either human or synthetic, with associated properties.

Properties vary based on the source type - for synthetic sources this includes model information, for human sources this includes creator information.

properties: Dict[str, str | int | float]
@model_validator(mode='after')
def validate_type(self) -> DataSource:
251    @model_validator(mode="after")
252    def validate_type(self) -> "DataSource":
253        if self.type not in DataSourceType:
254            raise ValueError(f"Invalid data source type: {self.type}")
255        return self
@model_validator(mode='after')
def validate_properties(self) -> DataSource:
257    @model_validator(mode="after")
258    def validate_properties(self) -> "DataSource":
259        for prop in self._data_source_properties:
260            # Check the property type is correct
261            if prop.name in self.properties:
262                if not isinstance(self.properties[prop.name], prop.type):
263                    raise ValueError(
264                        f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source"
265                    )
266            # Check the property is required for the data source type
267            if self.type in prop.required_for:
268                if prop.name not in self.properties:
269                    raise ValueError(
270                        f"'{prop.name}' is required for {self.type} data source"
271                    )
272            # Check the property is not allowed for the data source type
273            elif self.type in prop.not_allowed_for and prop.name in self.properties:
274                raise ValueError(
275                    f"'{prop.name}' is not allowed for {self.type} data source"
276                )
277        return self
@model_validator(mode='after')
def validate_no_empty_properties(self) -> Self:
279    @model_validator(mode="after")
280    def validate_no_empty_properties(self) -> Self:
281        for prop, value in self.properties.items():
282            if isinstance(value, str) and value == "":
283                raise ValueError(
284                    f"Property '{prop}' must be a non-empty string for {self.type} data source"
285                )
286        return self
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
384def init_private_attributes(self: BaseModel, context: Any, /) -> None:
385    """This function is meant to behave like a BaseModel method to initialise private attributes.
386
387    It takes context as an argument since that's what pydantic-core passes when calling it.
388
389    Args:
390        self: The BaseModel instance.
391        context: The context.
392    """
393    if getattr(self, '__pydantic_private__', None) is None:
394        pydantic_private = {}
395        for name, private_attr in self.__private_attributes__.items():
396            default = private_attr.get_default()
397            if default is not PydanticUndefined:
398                pydantic_private[name] = default
399        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.

class DataSourceType(builtins.str, enum.Enum):
165class DataSourceType(str, Enum):
166    """
167    The source type of a piece of data.
168
169    Human: a human created the data
170    Synthetic: a model created the data
171    """
172
173    human = "human"
174    synthetic = "synthetic"
175    file_import = "file_import"

The source type of a piece of data.

Human: a human created the data Synthetic: a model created the data

human = <DataSourceType.human: 'human'>
synthetic = <DataSourceType.synthetic: 'synthetic'>
file_import = <DataSourceType.file_import: 'file_import'>
class DataSourceProperty(pydantic.main.BaseModel):
178class DataSourceProperty(BaseModel):
179    """
180    Defines a property that can be associated with a data source.
181
182    Includes validation rules for when properties are required or not allowed
183    based on the data source type.
184    """
185
186    name: str
187    type: Type[Union[str, int, float]]
188    required_for: List[DataSourceType] = []
189    not_allowed_for: List[DataSourceType] = []

Defines a property that can be associated with a data source.

Includes validation rules for when properties are required or not allowed based on the data source type.

name: str
type: Type[Union[str, int, float]]
required_for: List[DataSourceType]
not_allowed_for: List[DataSourceType]
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class Finetune(kiln_ai.datamodel.basemodel.KilnParentedModel):
 18class Finetune(KilnParentedModel):
 19    """
 20    The Kiln fine-tune datamodel.
 21
 22    Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.
 23    """
 24
 25    name: str = NAME_FIELD
 26    description: str | None = Field(
 27        default=None,
 28        description="A description of the fine-tune for you and your team. Not used in training.",
 29    )
 30    structured_output_mode: StructuredOutputMode | None = Field(
 31        default=None,
 32        description="The mode to use to train the model for structured output, if it was trained with structured output. Will determine how we call the tuned model, so we call with the matching mode.",
 33    )
 34    provider: str = Field(
 35        description="The provider to use for the fine-tune (e.g. 'openai')."
 36    )
 37    base_model_id: str = Field(
 38        description="The id of the base model to use for the fine-tune. This string relates to the provider's IDs for their own models, not Kiln IDs."
 39    )
 40    provider_id: str | None = Field(
 41        default=None,
 42        description="The ID of the fine-tune job on the provider's side. May not be the same as the fine_tune_model_id.",
 43    )
 44    fine_tune_model_id: str | None = Field(
 45        default=None,
 46        description="The ID of the fine-tuned model on the provider's side. May not be the same as the provider_id.",
 47    )
 48    dataset_split_id: str = Field(
 49        description="The ID of the dataset split to use for this fine-tune.",
 50    )
 51    train_split_name: str = Field(
 52        default="train",
 53        description="The name of the training split to use for this fine-tune.",
 54    )
 55    validation_split_name: str | None = Field(
 56        default=None,
 57        description="The name of the validation split to use for this fine-tune. Optional.",
 58    )
 59    parameters: dict[str, str | int | float | bool] = Field(
 60        default={},
 61        description="The parameters to use for this fine-tune. These are provider-specific.",
 62    )
 63    # These two fields are saved exactly used for training. Even if they map exactly to a custom prompt or generator, those can change, so we want to keep a record of the training prompt.
 64    system_message: str = Field(
 65        description="The system message to use for this fine-tune.",
 66    )
 67    thinking_instructions: str | None = Field(
 68        default=None,
 69        description="The thinking instructions to use for this fine-tune. Only used when data_strategy is final_and_intermediate.",
 70    )
 71    latest_status: FineTuneStatusType = Field(
 72        default=FineTuneStatusType.unknown,
 73        description="The latest known status of this fine-tune. Not updated in real time.",
 74    )
 75    properties: Dict[str, str | int | float] = Field(
 76        default={},
 77        description="Properties of the fine-tune. Different providers may use different properties.",
 78    )
 79    data_strategy: FinetuneDataStrategy = Field(
 80        default=FinetuneDataStrategy.final_only,
 81        description="The strategy to use for training the model. 'final_only' will only train on the final response. 'final_and_intermediate' will train on the final response and intermediate outputs (chain of thought or reasoning).",
 82    )
 83
 84    # Workaround to return typed parent without importing Task
 85    def parent_task(self) -> Union["Task", None]:
 86        if self.parent is None or self.parent.__class__.__name__ != "Task":
 87            return None
 88        return self.parent  # type: ignore
 89
 90    @model_validator(mode="after")
 91    def validate_thinking_instructions(self) -> Self:
 92        if (
 93            self.thinking_instructions is not None
 94            and self.data_strategy != FinetuneDataStrategy.final_and_intermediate
 95        ):
 96            raise ValueError(
 97                "Thinking instructions can only be used when data_strategy is final_and_intermediate"
 98            )
 99        if (
100            self.thinking_instructions is None
101            and self.data_strategy == FinetuneDataStrategy.final_and_intermediate
102        ):
103            raise ValueError(
104                "Thinking instructions are required when data_strategy is final_and_intermediate"
105            )
106        return self

The Kiln fine-tune datamodel.

Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.

name: str
description: str | None
structured_output_mode: StructuredOutputMode | None
provider: str
base_model_id: str
provider_id: str | None
fine_tune_model_id: str | None
dataset_split_id: str
train_split_name: str
validation_split_name: str | None
parameters: dict[str, str | int | float | bool]
system_message: str
thinking_instructions: str | None
latest_status: FineTuneStatusType
properties: Dict[str, str | int | float]
data_strategy: FinetuneDataStrategy
def parent_task(self) -> Optional[Task]:
85    def parent_task(self) -> Union["Task", None]:
86        if self.parent is None or self.parent.__class__.__name__ != "Task":
87            return None
88        return self.parent  # type: ignore
@model_validator(mode='after')
def validate_thinking_instructions(self) -> Self:
 90    @model_validator(mode="after")
 91    def validate_thinking_instructions(self) -> Self:
 92        if (
 93            self.thinking_instructions is not None
 94            and self.data_strategy != FinetuneDataStrategy.final_and_intermediate
 95        ):
 96            raise ValueError(
 97                "Thinking instructions can only be used when data_strategy is final_and_intermediate"
 98            )
 99        if (
100            self.thinking_instructions is None
101            and self.data_strategy == FinetuneDataStrategy.final_and_intermediate
102        ):
103            raise ValueError(
104                "Thinking instructions are required when data_strategy is final_and_intermediate"
105            )
106        return self
def relationship_name() -> str:
438        def relationship_name_method() -> str:
439            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
431        def parent_class_method() -> Type[KilnParentModel]:
432            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class FineTuneStatusType(builtins.str, enum.Enum):
47class FineTuneStatusType(str, Enum):
48    """
49    The status type of a fine-tune (running, completed, failed, etc).
50    """
51
52    unknown = "unknown"  # server error
53    pending = "pending"
54    running = "running"
55    completed = "completed"
56    failed = "failed"

The status type of a fine-tune (running, completed, failed, etc).

unknown = <FineTuneStatusType.unknown: 'unknown'>
pending = <FineTuneStatusType.pending: 'pending'>
running = <FineTuneStatusType.running: 'running'>
completed = <FineTuneStatusType.completed: 'completed'>
failed = <FineTuneStatusType.failed: 'failed'>
class TaskOutputRatingType(builtins.str, enum.Enum):
15class TaskOutputRatingType(str, Enum):
16    """Defines the types of rating systems available for task outputs."""
17
18    five_star = "five_star"
19    pass_fail = "pass_fail"
20    pass_fail_critical = "pass_fail_critical"
21    custom = "custom"

Defines the types of rating systems available for task outputs.

five_star = <TaskOutputRatingType.five_star: 'five_star'>
pass_fail = <TaskOutputRatingType.pass_fail: 'pass_fail'>
pass_fail_critical = <TaskOutputRatingType.pass_fail_critical: 'pass_fail_critical'>
custom = <TaskOutputRatingType.custom: 'custom'>
class TaskRequirement(pydantic.main.BaseModel):
27class TaskRequirement(BaseModel):
28    """
29    Defines a specific requirement that should be met by task outputs.
30
31    Includes an identifier, name, description, instruction for meeting the requirement,
32    priority level, and rating type (five_star, pass_fail, pass_fail_critical, custom).
33    """
34
35    id: ID_TYPE = ID_FIELD
36    name: str = SHORT_NAME_FIELD
37    description: str | None = Field(default=None)
38    instruction: str = Field(min_length=1)
39    priority: Priority = Field(default=Priority.p2)
40    type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)

Defines a specific requirement that should be met by task outputs.

Includes an identifier, name, description, instruction for meeting the requirement, priority level, and rating type (five_star, pass_fail, pass_fail_critical, custom).

id: Optional[str]
name: str
description: str | None
instruction: str
priority: Priority
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class DatasetSplitDefinition(pydantic.main.BaseModel):
23class DatasetSplitDefinition(BaseModel):
24    """
25    A definition of a split in a dataset.
26
27    Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)
28    """
29
30    name: str = NAME_FIELD
31    description: str | None = Field(
32        default=None,
33        description="A description of the dataset for you and your team. Not used in training.",
34    )
35    percentage: float = Field(
36        ge=0.0,
37        le=1.0,
38        description="The percentage of the dataset that this split represents (between 0 and 1).",
39    )

A definition of a split in a dataset.

Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)

name: str
description: str | None
percentage: float
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class DatasetSplit(kiln_ai.datamodel.basemodel.KilnParentedModel):
 61class DatasetSplit(KilnParentedModel):
 62    """
 63    A collection of task runs, with optional splits (train, test, validation).
 64
 65    Used to freeze a dataset into train/test/validation splits for repeatable fine-tuning or other tasks.
 66
 67    Maintains a list of IDs for each split, to avoid data duplication.
 68    """
 69
 70    name: str = NAME_FIELD
 71    description: str | None = Field(
 72        default=None,
 73        description="A description of the dataset for you and your team. Not used in training.",
 74    )
 75    splits: list[DatasetSplitDefinition] = Field(
 76        default_factory=list,
 77        description="The splits in the dataset.",
 78    )
 79    split_contents: dict[str, list[str]] = Field(
 80        description="The contents of each split in the dataset. The key is the split name, and the value is a list of task run IDs.",
 81    )
 82    filter: DatasetFilterId | None = Field(
 83        default=None,
 84        description="The filter used to build the dataset.",
 85    )
 86
 87    @model_validator(mode="after")
 88    def validate_split_percentages(self) -> "DatasetSplit":
 89        total = sum(split.percentage for split in self.splits)
 90        if not math.isclose(total, 1.0, rel_tol=1e-9):
 91            raise ValueError(f"The sum of split percentages must be 1.0 (got {total})")
 92        return self
 93
 94    @classmethod
 95    def from_task(
 96        cls,
 97        name: str,
 98        task: "Task",
 99        splits: list[DatasetSplitDefinition],
100        filter_id: DatasetFilterId = "all",
101        description: str | None = None,
102    ):
103        """
104        Build a dataset split from a task.
105        """
106        filter = dataset_filter_from_id(filter_id)
107        split_contents = cls.build_split_contents(task, splits, filter)
108        return cls(
109            parent=task,
110            name=name,
111            description=description,
112            splits=splits,
113            split_contents=split_contents,
114            filter=filter_id,
115        )
116
117    @classmethod
118    def build_split_contents(
119        cls,
120        task: "Task",
121        splits: list[DatasetSplitDefinition],
122        filter: DatasetFilter,
123    ) -> dict[str, list[str]]:
124        valid_ids = []
125        for task_run in task.runs():
126            if filter(task_run):
127                valid_ids.append(task_run.id)
128
129        # Shuffle and split by split percentage
130        random.shuffle(valid_ids)
131        split_contents = {}
132        start_idx = 0
133        remaining_items = len(valid_ids)
134
135        # Handle all splits except the last one
136        for split in splits[:-1]:
137            split_size = round(len(valid_ids) * split.percentage)
138            split_contents[split.name] = valid_ids[start_idx : start_idx + split_size]
139            start_idx += split_size
140            remaining_items -= split_size
141
142        # Last split gets all remaining items (for rounding)
143        if splits:
144            split_contents[splits[-1].name] = valid_ids[start_idx:]
145
146        return split_contents
147
148    def parent_task(self) -> "Task | None":
149        # inline import to avoid circular import
150        from kiln_ai.datamodel import Task
151
152        if not isinstance(self.parent, Task):
153            return None
154        return self.parent
155
156    def missing_count(self) -> int:
157        """
158        Returns:
159            int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset
160        """
161        parent = self.parent_task()
162        if parent is None:
163            raise ValueError("DatasetSplit has no parent task")
164
165        runs = parent.runs(readonly=True)
166        all_ids = set(run.id for run in runs)
167        all_ids_in_splits = set()
168        for ids in self.split_contents.values():
169            all_ids_in_splits.update(ids)
170        missing = all_ids_in_splits - all_ids
171        return len(missing)

A collection of task runs, with optional splits (train, test, validation).

Used to freeze a dataset into train/test/validation splits for repeatable fine-tuning or other tasks.

Maintains a list of IDs for each split, to avoid data duplication.

name: str
description: str | None
splits: list[DatasetSplitDefinition]
split_contents: dict[str, list[str]]
filter: Optional[Annotated[str, AfterValidator(func=<function <lambda> at 0x7fcb133b3ba0>)]]
@model_validator(mode='after')
def validate_split_percentages(self) -> DatasetSplit:
87    @model_validator(mode="after")
88    def validate_split_percentages(self) -> "DatasetSplit":
89        total = sum(split.percentage for split in self.splits)
90        if not math.isclose(total, 1.0, rel_tol=1e-9):
91            raise ValueError(f"The sum of split percentages must be 1.0 (got {total})")
92        return self
@classmethod
def from_task( cls, name: str, task: Task, splits: list[DatasetSplitDefinition], filter_id: Annotated[str, AfterValidator(func=<function <lambda>>)] = 'all', description: str | None = None):
 94    @classmethod
 95    def from_task(
 96        cls,
 97        name: str,
 98        task: "Task",
 99        splits: list[DatasetSplitDefinition],
100        filter_id: DatasetFilterId = "all",
101        description: str | None = None,
102    ):
103        """
104        Build a dataset split from a task.
105        """
106        filter = dataset_filter_from_id(filter_id)
107        split_contents = cls.build_split_contents(task, splits, filter)
108        return cls(
109            parent=task,
110            name=name,
111            description=description,
112            splits=splits,
113            split_contents=split_contents,
114            filter=filter_id,
115        )

Build a dataset split from a task.

@classmethod
def build_split_contents( cls, task: Task, splits: list[DatasetSplitDefinition], filter: kiln_ai.datamodel.dataset_filters.DatasetFilter) -> dict[str, list[str]]:
117    @classmethod
118    def build_split_contents(
119        cls,
120        task: "Task",
121        splits: list[DatasetSplitDefinition],
122        filter: DatasetFilter,
123    ) -> dict[str, list[str]]:
124        valid_ids = []
125        for task_run in task.runs():
126            if filter(task_run):
127                valid_ids.append(task_run.id)
128
129        # Shuffle and split by split percentage
130        random.shuffle(valid_ids)
131        split_contents = {}
132        start_idx = 0
133        remaining_items = len(valid_ids)
134
135        # Handle all splits except the last one
136        for split in splits[:-1]:
137            split_size = round(len(valid_ids) * split.percentage)
138            split_contents[split.name] = valid_ids[start_idx : start_idx + split_size]
139            start_idx += split_size
140            remaining_items -= split_size
141
142        # Last split gets all remaining items (for rounding)
143        if splits:
144            split_contents[splits[-1].name] = valid_ids[start_idx:]
145
146        return split_contents
def parent_task(self) -> Task | None:
148    def parent_task(self) -> "Task | None":
149        # inline import to avoid circular import
150        from kiln_ai.datamodel import Task
151
152        if not isinstance(self.parent, Task):
153            return None
154        return self.parent
def missing_count(self) -> int:
156    def missing_count(self) -> int:
157        """
158        Returns:
159            int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset
160        """
161        parent = self.parent_task()
162        if parent is None:
163            raise ValueError("DatasetSplit has no parent task")
164
165        runs = parent.runs(readonly=True)
166        all_ids = set(run.id for run in runs)
167        all_ids_in_splits = set()
168        for ids in self.split_contents.values():
169            all_ids_in_splits.update(ids)
170        missing = all_ids_in_splits - all_ids
171        return len(missing)

Returns: int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset

def relationship_name() -> str:
438        def relationship_name_method() -> str:
439            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
431        def parent_class_method() -> Type[KilnParentModel]:
432            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class RequirementRating(pydantic.main.BaseModel):
21class RequirementRating(BaseModel):
22    """Rating for a specific requirement within a task output."""
23
24    value: float = Field(
25        description="The rating value. Interpretation depends on rating type"
26    )
27    type: TaskOutputRatingType = Field(description="The type of rating")

Rating for a specific requirement within a task output.

value: float
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class BasePrompt(pydantic.main.BaseModel):
 7class BasePrompt(BaseModel):
 8    """
 9    A prompt for a task. This is the basic data storage format which can be used throughout a project.
10
11    The "Prompt" model name is reserved for the custom prompts parented by a task.
12    """
13
14    name: str = NAME_FIELD
15    description: str | None = Field(
16        default=None,
17        description="A more detailed description of the prompt.",
18    )
19    generator_id: str | None = Field(
20        default=None,
21        description="The id of the generator that created this prompt.",
22    )
23    prompt: str = Field(
24        description="The prompt for the task.",
25        min_length=1,
26    )
27    chain_of_thought_instructions: str | None = Field(
28        default=None,
29        description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting. COT will not be used unless this is provided.",
30    )

A prompt for a task. This is the basic data storage format which can be used throughout a project.

The "Prompt" model name is reserved for the custom prompts parented by a task.

name: str
description: str | None
generator_id: str | None
prompt: str
chain_of_thought_instructions: str | None
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class Prompt(kiln_ai.datamodel.basemodel.KilnParentedModel, kiln_ai.datamodel.BasePrompt):
33class Prompt(KilnParentedModel, BasePrompt):
34    """
35    A prompt for a task. This is the custom prompt parented by a task.
36    """
37
38    pass

A prompt for a task. This is the custom prompt parented by a task.

def relationship_name() -> str:
438        def relationship_name_method() -> str:
439            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
431        def parent_class_method() -> Type[KilnParentModel]:
432            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class TaskOutputRating(kiln_ai.datamodel.basemodel.KilnBaseModel):
 51class TaskOutputRating(KilnBaseModel):
 52    """
 53    A rating for a task output, including an overall rating and ratings for each requirement.
 54
 55    Supports:
 56    - five_star: 1-5 star ratings
 57    - pass_fail: boolean pass/fail (1.0 = pass, 0.0 = fail)
 58    - pass_fail_critical: tri-state (1.0 = pass, 0.0 = fail, -1.0 = critical fail)
 59    """
 60
 61    type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)
 62    value: float | None = Field(
 63        description="The rating value. Interpretation depends on rating type:\n- five_star: 1-5 stars\n- pass_fail: 1.0 (pass) or 0.0 (fail)\n- pass_fail_critical: 1.0 (pass), 0.0 (fail), or -1.0 (critical fail)",
 64        default=None,
 65    )
 66    requirement_ratings: Dict[ID_TYPE, RequirementRating] = Field(
 67        default={},
 68        description="The ratings of the requirements of the task.",
 69    )
 70
 71    # Previously we stored rating values as a dict of floats, but now we store them as RequirementRating objects.
 72    @model_validator(mode="before")
 73    def upgrade_old_format(cls, data: dict) -> dict:
 74        if not isinstance(data, dict):
 75            return data
 76
 77        # Check if we have the old format (dict of floats)
 78        req_ratings = data.get("requirement_ratings", {})
 79        if req_ratings and all(
 80            isinstance(v, (int, float)) for v in req_ratings.values()
 81        ):
 82            # Convert each float to a RequirementRating object
 83            # all ratings are five star at the point we used this format
 84            data["requirement_ratings"] = {
 85                k: {"value": v, "type": TaskOutputRatingType.five_star}
 86                for k, v in req_ratings.items()
 87            }
 88
 89        return data
 90
 91    # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc)
 92    def is_high_quality(self) -> bool:
 93        if self.value is None:
 94            return False
 95
 96        if self.type == TaskOutputRatingType.five_star:
 97            return self.value >= 4
 98        elif self.type == TaskOutputRatingType.pass_fail:
 99            return self.value == 1.0
100        elif self.type == TaskOutputRatingType.pass_fail_critical:
101            return self.value == 1.0
102        return False
103
104    @model_validator(mode="after")
105    def validate_rating(self) -> Self:
106        if self.type not in TaskOutputRatingType:
107            raise ValueError(f"Invalid rating type: {self.type}")
108
109        # Overall rating is optional
110        if self.value is not None:
111            self._validate_rating(self.type, self.value, "overall rating")
112
113        for req_id, req_rating in self.requirement_ratings.items():
114            self._validate_rating(
115                req_rating.type,
116                req_rating.value,
117                f"requirement rating for req ID: {req_id}",
118            )
119
120        return self
121
122    def _validate_rating(
123        self, type: TaskOutputRatingType, rating: float | None, rating_name: str
124    ) -> None:
125        if type == TaskOutputRatingType.five_star:
126            self._validate_five_star(rating, rating_name)
127        elif type == TaskOutputRatingType.pass_fail:
128            self._validate_pass_fail(rating, rating_name)
129        elif type == TaskOutputRatingType.pass_fail_critical:
130            self._validate_pass_fail_critical(rating, rating_name)
131
132    def _validate_five_star(self, rating: float | None, rating_name: str) -> None:
133        if rating is None or not isinstance(rating, float) or not rating.is_integer():
134            raise ValueError(
135                f"{rating_name.capitalize()} of type five_star must be an integer value (1-5)"
136            )
137        if rating < 1 or rating > 5:
138            raise ValueError(
139                f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars"
140            )
141
142    def _validate_pass_fail(self, rating: float | None, rating_name: str) -> None:
143        if rating is None or not isinstance(rating, float) or not rating.is_integer():
144            raise ValueError(
145                f"{rating_name.capitalize()} of type pass_fail must be an integer value (0 or 1)"
146            )
147        if rating not in [0, 1]:
148            raise ValueError(
149                f"{rating_name.capitalize()} of type pass_fail must be 0 (fail) or 1 (pass)"
150            )
151
152    def _validate_pass_fail_critical(
153        self, rating: float | None, rating_name: str
154    ) -> None:
155        if rating is None or not isinstance(rating, float) or not rating.is_integer():
156            raise ValueError(
157                f"{rating_name.capitalize()} of type pass_fail_critical must be an integer value (-1, 0, or 1)"
158            )
159        if rating not in [-1, 0, 1]:
160            raise ValueError(
161                f"{rating_name.capitalize()} of type pass_fail_critical must be -1 (critical fail), 0 (fail), or 1 (pass)"
162            )

A rating for a task output, including an overall rating and ratings for each requirement.

Supports:

  • five_star: 1-5 star ratings
  • pass_fail: boolean pass/fail (1.0 = pass, 0.0 = fail)
  • pass_fail_critical: tri-state (1.0 = pass, 0.0 = fail, -1.0 = critical fail)
value: float | None
requirement_ratings: Dict[Optional[str], RequirementRating]
@model_validator(mode='before')
def upgrade_old_format(cls, data: dict) -> dict:
72    @model_validator(mode="before")
73    def upgrade_old_format(cls, data: dict) -> dict:
74        if not isinstance(data, dict):
75            return data
76
77        # Check if we have the old format (dict of floats)
78        req_ratings = data.get("requirement_ratings", {})
79        if req_ratings and all(
80            isinstance(v, (int, float)) for v in req_ratings.values()
81        ):
82            # Convert each float to a RequirementRating object
83            # all ratings are five star at the point we used this format
84            data["requirement_ratings"] = {
85                k: {"value": v, "type": TaskOutputRatingType.five_star}
86                for k, v in req_ratings.items()
87            }
88
89        return data
def is_high_quality(self) -> bool:
 92    def is_high_quality(self) -> bool:
 93        if self.value is None:
 94            return False
 95
 96        if self.type == TaskOutputRatingType.five_star:
 97            return self.value >= 4
 98        elif self.type == TaskOutputRatingType.pass_fail:
 99            return self.value == 1.0
100        elif self.type == TaskOutputRatingType.pass_fail_critical:
101            return self.value == 1.0
102        return False
@model_validator(mode='after')
def validate_rating(self) -> Self:
104    @model_validator(mode="after")
105    def validate_rating(self) -> Self:
106        if self.type not in TaskOutputRatingType:
107            raise ValueError(f"Invalid rating type: {self.type}")
108
109        # Overall rating is optional
110        if self.value is not None:
111            self._validate_rating(self.type, self.value, "overall rating")
112
113        for req_id, req_rating in self.requirement_ratings.items():
114            self._validate_rating(
115                req_rating.type,
116                req_rating.value,
117                f"requirement rating for req ID: {req_id}",
118            )
119
120        return self
model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class StructuredOutputMode(builtins.str, enum.Enum):
24class StructuredOutputMode(str, Enum):
25    """
26    Enumeration of supported structured output modes.
27
28    - default: let the adapter decide
29    - json_schema: request json using API capabilities for json_schema
30    - function_calling: request json using API capabilities for function calling
31    - json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
32    - json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
33    - json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
34    - json_custom_instructions: The model should output JSON, but custom instructions are already included in the system prompt. Don't append additional JSON instructions.
35    """
36
37    default = "default"
38    json_schema = "json_schema"
39    function_calling_weak = "function_calling_weak"
40    function_calling = "function_calling"
41    json_mode = "json_mode"
42    json_instructions = "json_instructions"
43    json_instruction_and_object = "json_instruction_and_object"
44    json_custom_instructions = "json_custom_instructions"

Enumeration of supported structured output modes.

  • default: let the adapter decide
  • json_schema: request json using API capabilities for json_schema
  • function_calling: request json using API capabilities for function calling
  • json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
  • json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
  • json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
  • json_custom_instructions: The model should output JSON, but custom instructions are already included in the system prompt. Don't append additional JSON instructions.
default = <StructuredOutputMode.default: 'default'>
json_schema = <StructuredOutputMode.json_schema: 'json_schema'>
function_calling_weak = <StructuredOutputMode.function_calling_weak: 'function_calling_weak'>
function_calling = <StructuredOutputMode.function_calling: 'function_calling'>
json_mode = <StructuredOutputMode.json_mode: 'json_mode'>
json_instructions = <StructuredOutputMode.json_instructions: 'json_instructions'>
json_instruction_and_object = <StructuredOutputMode.json_instruction_and_object: 'json_instruction_and_object'>
json_custom_instructions = <StructuredOutputMode.json_custom_instructions: 'json_custom_instructions'>
class FinetuneDataStrategy(builtins.str, enum.Enum):
59class FinetuneDataStrategy(str, Enum):
60    final_only = "final_only"
61    final_and_intermediate = "final_and_intermediate"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

final_only = <FinetuneDataStrategy.final_only: 'final_only'>
final_and_intermediate = <FinetuneDataStrategy.final_and_intermediate: 'final_and_intermediate'>
PromptId = typing.Annotated[str, AfterValidator(func=<function <lambda>>)]
class PromptGenerators(builtins.str, enum.Enum):
 9class PromptGenerators(str, Enum):
10    SIMPLE = "simple_prompt_builder"
11    MULTI_SHOT = "multi_shot_prompt_builder"
12    FEW_SHOT = "few_shot_prompt_builder"
13    REPAIRS = "repairs_prompt_builder"
14    SIMPLE_CHAIN_OF_THOUGHT = "simple_chain_of_thought_prompt_builder"
15    FEW_SHOT_CHAIN_OF_THOUGHT = "few_shot_chain_of_thought_prompt_builder"
16    MULTI_SHOT_CHAIN_OF_THOUGHT = "multi_shot_chain_of_thought_prompt_builder"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

SIMPLE = <PromptGenerators.SIMPLE: 'simple_prompt_builder'>
MULTI_SHOT = <PromptGenerators.MULTI_SHOT: 'multi_shot_prompt_builder'>
FEW_SHOT = <PromptGenerators.FEW_SHOT: 'few_shot_prompt_builder'>
REPAIRS = <PromptGenerators.REPAIRS: 'repairs_prompt_builder'>
SIMPLE_CHAIN_OF_THOUGHT = <PromptGenerators.SIMPLE_CHAIN_OF_THOUGHT: 'simple_chain_of_thought_prompt_builder'>
FEW_SHOT_CHAIN_OF_THOUGHT = <PromptGenerators.FEW_SHOT_CHAIN_OF_THOUGHT: 'few_shot_chain_of_thought_prompt_builder'>
MULTI_SHOT_CHAIN_OF_THOUGHT = <PromptGenerators.MULTI_SHOT_CHAIN_OF_THOUGHT: 'multi_shot_chain_of_thought_prompt_builder'>
prompt_generator_values = ['simple_prompt_builder', 'multi_shot_prompt_builder', 'few_shot_prompt_builder', 'repairs_prompt_builder', 'simple_chain_of_thought_prompt_builder', 'few_shot_chain_of_thought_prompt_builder', 'multi_shot_chain_of_thought_prompt_builder']