kiln_ai.datamodel.rag

View Source

 1from typing import TYPE_CHECKING, Union
 2
 3from pydantic import Field, model_validator
 4
 5from kiln_ai.datamodel.basemodel import ID_TYPE, FilenameString, KilnParentedModel
 6from kiln_ai.utils.validation import ToolNameString
 7
 8if TYPE_CHECKING:
 9    from kiln_ai.datamodel.project import Project
10
11
12class RagConfig(KilnParentedModel):
13    name: FilenameString = Field(
14        description="A name to identify this RAG configuration for your own reference.",
15    )
16
17    is_archived: bool = Field(
18        default=False,
19        description="Whether the RAG configuration is archived. Archived RAG configurations are not shown in the UI and are not available for use.",
20    )
21
22    description: str | None = Field(
23        default=None,
24        description="A description of the RAG configuration for you and your team. Will not be used in prompts/training/validation.",
25    )
26
27    tool_name: ToolNameString = Field(
28        description="A name for the model to identify the Search Tool in conversations.",
29    )
30
31    tool_description: str = Field(
32        description="A description of the purpose of the tool. The model will use this description to understand the tool's capabilities.",
33        max_length=128,
34    )
35
36    extractor_config_id: ID_TYPE = Field(
37        description="The ID of the extractor config used to extract the documents.",
38    )
39
40    chunker_config_id: ID_TYPE = Field(
41        description="The ID of the chunker config used to chunk the documents.",
42    )
43
44    embedding_config_id: ID_TYPE = Field(
45        description="The ID of the embedding config used to embed the documents.",
46    )
47
48    vector_store_config_id: ID_TYPE = Field(
49        description="The ID of the vector store config used to store the documents.",
50    )
51
52    reranker_config_id: ID_TYPE | None = Field(
53        default=None,
54        description="The ID of the reranker config used to rerank the documents. If None, no reranking will be performed.",
55    )
56
57    tags: list[str] | None = Field(
58        default=None,
59        description="List of document tags to filter by. If None, all documents in the project are used.",
60    )
61
62    # Workaround to return typed parent without importing Project
63    def parent_project(self) -> Union["Project", None]:
64        if self.parent is None or self.parent.__class__.__name__ != "Project":
65            return None
66        return self.parent  # type: ignore
67
68    @model_validator(mode="after")
69    def validate_tags(self):
70        if self.tags is not None:
71            if len(self.tags) == 0:
72                raise ValueError("Tags cannot be an empty list.")
73            for tag in self.tags:
74                if not tag:
75                    raise ValueError("Tags cannot be empty.")
76                if " " in tag:
77                    raise ValueError("Tags cannot contain spaces. Try underscores.")
78
79        if self.tool_name.strip() == "":
80            raise ValueError("Tool name cannot be empty.")
81        if self.tool_description.strip() == "":
82            raise ValueError("Tool description cannot be empty.")
83
84        return self

class RagConfig(kiln_ai.datamodel.basemodel.KilnParentedModel): View Source

13class RagConfig(KilnParentedModel):
14    name: FilenameString = Field(
15        description="A name to identify this RAG configuration for your own reference.",
16    )
17
18    is_archived: bool = Field(
19        default=False,
20        description="Whether the RAG configuration is archived. Archived RAG configurations are not shown in the UI and are not available for use.",
21    )
22
23    description: str | None = Field(
24        default=None,
25        description="A description of the RAG configuration for you and your team. Will not be used in prompts/training/validation.",
26    )
27
28    tool_name: ToolNameString = Field(
29        description="A name for the model to identify the Search Tool in conversations.",
30    )
31
32    tool_description: str = Field(
33        description="A description of the purpose of the tool. The model will use this description to understand the tool's capabilities.",
34        max_length=128,
35    )
36
37    extractor_config_id: ID_TYPE = Field(
38        description="The ID of the extractor config used to extract the documents.",
39    )
40
41    chunker_config_id: ID_TYPE = Field(
42        description="The ID of the chunker config used to chunk the documents.",
43    )
44
45    embedding_config_id: ID_TYPE = Field(
46        description="The ID of the embedding config used to embed the documents.",
47    )
48
49    vector_store_config_id: ID_TYPE = Field(
50        description="The ID of the vector store config used to store the documents.",
51    )
52
53    reranker_config_id: ID_TYPE | None = Field(
54        default=None,
55        description="The ID of the reranker config used to rerank the documents. If None, no reranking will be performed.",
56    )
57
58    tags: list[str] | None = Field(
59        default=None,
60        description="List of document tags to filter by. If None, all documents in the project are used.",
61    )
62
63    # Workaround to return typed parent without importing Project
64    def parent_project(self) -> Union["Project", None]:
65        if self.parent is None or self.parent.__class__.__name__ != "Project":
66            return None
67        return self.parent  # type: ignore
68
69    @model_validator(mode="after")
70    def validate_tags(self):
71        if self.tags is not None:
72            if len(self.tags) == 0:
73                raise ValueError("Tags cannot be an empty list.")
74            for tag in self.tags:
75                if not tag:
76                    raise ValueError("Tags cannot be empty.")
77                if " " in tag:
78                    raise ValueError("Tags cannot contain spaces. Try underscores.")
79
80        if self.tool_name.strip() == "":
81            raise ValueError("Tool name cannot be empty.")
82        if self.tool_description.strip() == "":
83            raise ValueError("Tool description cannot be empty.")
84
85        return self

Base model for Kiln models that have a parent-child relationship. This base class is for child models.

This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.

Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7f2f1ec0c9a0>, json_schema_input_type=PydanticUndefined)]

is_archived: bool

description: str | None

tool_name: Annotated[str, BeforeValidator(func=<function tool_name_validator at 0x7f2f1dc82f20>, json_schema_input_type=PydanticUndefined)]

tool_description: str

extractor_config_id: Optional[str]

chunker_config_id: Optional[str]

embedding_config_id: Optional[str]

vector_store_config_id: Optional[str]

reranker_config_id: Optional[str]

tags: list[str] | None

def parent_project(self) -> Optional[kiln_ai.datamodel.Project]: View Source

64    def parent_project(self) -> Union["Project", None]:
65        if self.parent is None or self.parent.__class__.__name__ != "Project":
66            return None
67        return self.parent  # type: ignore

@model_validator(mode='after')

def validate_tags(self): View Source

69    @model_validator(mode="after")
70    def validate_tags(self):
71        if self.tags is not None:
72            if len(self.tags) == 0:
73                raise ValueError("Tags cannot be an empty list.")
74            for tag in self.tags:
75                if not tag:
76                    raise ValueError("Tags cannot be empty.")
77                if " " in tag:
78                    raise ValueError("Tags cannot contain spaces. Try underscores.")
79
80        if self.tool_name.strip() == "":
81            raise ValueError("Tool name cannot be empty.")
82        if self.tool_description.strip() == "":
83            raise ValueError("Tool description cannot be empty.")
84
85        return self

def relationship_name() -> str: View Source

713        def relationship_name_method() -> str:
714            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]: View Source

706        def parent_class_method() -> Type[KilnParentModel]:
707            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None: View Source

337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.