kiln_ai.datamodel.rag

 1from typing import TYPE_CHECKING, Union
 2
 3from pydantic import Field, model_validator
 4
 5from kiln_ai.datamodel.basemodel import ID_TYPE, FilenameString, KilnParentedModel
 6from kiln_ai.utils.validation import ToolNameString
 7
 8if TYPE_CHECKING:
 9    from kiln_ai.datamodel.project import Project
10
11
12class RagConfig(KilnParentedModel):
13    name: FilenameString = Field(
14        description="A name to identify this RAG configuration for your own reference.",
15    )
16
17    is_archived: bool = Field(
18        default=False,
19        description="Whether the RAG configuration is archived. Archived RAG configurations are not shown in the UI and are not available for use.",
20    )
21
22    description: str | None = Field(
23        default=None,
24        description="A description of the RAG configuration for you and your team. Will not be used in prompts/training/validation.",
25    )
26
27    tool_name: ToolNameString = Field(
28        description="A name for the model to identify the Search Tool in conversations.",
29    )
30
31    tool_description: str = Field(
32        description="A description of the purpose of the tool. The model will use this description to understand the tool's capabilities.",
33        max_length=128,
34    )
35
36    extractor_config_id: ID_TYPE = Field(
37        description="The ID of the extractor config used to extract the documents.",
38    )
39
40    chunker_config_id: ID_TYPE = Field(
41        description="The ID of the chunker config used to chunk the documents.",
42    )
43
44    embedding_config_id: ID_TYPE = Field(
45        description="The ID of the embedding config used to embed the documents.",
46    )
47
48    vector_store_config_id: ID_TYPE = Field(
49        description="The ID of the vector store config used to store the documents.",
50    )
51
52    tags: list[str] | None = Field(
53        default=None,
54        description="List of document tags to filter by. If None, all documents in the project are used.",
55    )
56
57    # Workaround to return typed parent without importing Project
58    def parent_project(self) -> Union["Project", None]:
59        if self.parent is None or self.parent.__class__.__name__ != "Project":
60            return None
61        return self.parent  # type: ignore
62
63    @model_validator(mode="after")
64    def validate_tags(self):
65        if self.tags is not None:
66            if len(self.tags) == 0:
67                raise ValueError("Tags cannot be an empty list.")
68            for tag in self.tags:
69                if not tag:
70                    raise ValueError("Tags cannot be empty.")
71                if " " in tag:
72                    raise ValueError("Tags cannot contain spaces. Try underscores.")
73
74        if self.tool_name.strip() == "":
75            raise ValueError("Tool name cannot be empty.")
76        if self.tool_description.strip() == "":
77            raise ValueError("Tool description cannot be empty.")
78
79        return self
class RagConfig(kiln_ai.datamodel.basemodel.KilnParentedModel):
13class RagConfig(KilnParentedModel):
14    name: FilenameString = Field(
15        description="A name to identify this RAG configuration for your own reference.",
16    )
17
18    is_archived: bool = Field(
19        default=False,
20        description="Whether the RAG configuration is archived. Archived RAG configurations are not shown in the UI and are not available for use.",
21    )
22
23    description: str | None = Field(
24        default=None,
25        description="A description of the RAG configuration for you and your team. Will not be used in prompts/training/validation.",
26    )
27
28    tool_name: ToolNameString = Field(
29        description="A name for the model to identify the Search Tool in conversations.",
30    )
31
32    tool_description: str = Field(
33        description="A description of the purpose of the tool. The model will use this description to understand the tool's capabilities.",
34        max_length=128,
35    )
36
37    extractor_config_id: ID_TYPE = Field(
38        description="The ID of the extractor config used to extract the documents.",
39    )
40
41    chunker_config_id: ID_TYPE = Field(
42        description="The ID of the chunker config used to chunk the documents.",
43    )
44
45    embedding_config_id: ID_TYPE = Field(
46        description="The ID of the embedding config used to embed the documents.",
47    )
48
49    vector_store_config_id: ID_TYPE = Field(
50        description="The ID of the vector store config used to store the documents.",
51    )
52
53    tags: list[str] | None = Field(
54        default=None,
55        description="List of document tags to filter by. If None, all documents in the project are used.",
56    )
57
58    # Workaround to return typed parent without importing Project
59    def parent_project(self) -> Union["Project", None]:
60        if self.parent is None or self.parent.__class__.__name__ != "Project":
61            return None
62        return self.parent  # type: ignore
63
64    @model_validator(mode="after")
65    def validate_tags(self):
66        if self.tags is not None:
67            if len(self.tags) == 0:
68                raise ValueError("Tags cannot be an empty list.")
69            for tag in self.tags:
70                if not tag:
71                    raise ValueError("Tags cannot be empty.")
72                if " " in tag:
73                    raise ValueError("Tags cannot contain spaces. Try underscores.")
74
75        if self.tool_name.strip() == "":
76            raise ValueError("Tool name cannot be empty.")
77        if self.tool_description.strip() == "":
78            raise ValueError("Tool description cannot be empty.")
79
80        return self

Base model for Kiln models that have a parent-child relationship. This base class is for child models.

This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.

Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7fc0765f0900>, json_schema_input_type=PydanticUndefined)]
is_archived: bool
description: str | None
tool_name: Annotated[str, BeforeValidator(func=<function tool_name_validator at 0x7fc07563d8a0>, json_schema_input_type=PydanticUndefined)]
tool_description: str
extractor_config_id: Optional[str]
chunker_config_id: Optional[str]
embedding_config_id: Optional[str]
vector_store_config_id: Optional[str]
tags: list[str] | None
def parent_project(self) -> Optional[kiln_ai.datamodel.Project]:
59    def parent_project(self) -> Union["Project", None]:
60        if self.parent is None or self.parent.__class__.__name__ != "Project":
61            return None
62        return self.parent  # type: ignore
@model_validator(mode='after')
def validate_tags(self):
64    @model_validator(mode="after")
65    def validate_tags(self):
66        if self.tags is not None:
67            if len(self.tags) == 0:
68                raise ValueError("Tags cannot be an empty list.")
69            for tag in self.tags:
70                if not tag:
71                    raise ValueError("Tags cannot be empty.")
72                if " " in tag:
73                    raise ValueError("Tags cannot contain spaces. Try underscores.")
74
75        if self.tool_name.strip() == "":
76            raise ValueError("Tool name cannot be empty.")
77        if self.tool_description.strip() == "":
78            raise ValueError("Tool description cannot be empty.")
79
80        return self
def relationship_name() -> str:
661        def relationship_name_method() -> str:
662            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
654        def parent_class_method() -> Type[KilnParentModel]:
655            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.