kiln_ai.datamodel.rag

 1from typing import TYPE_CHECKING, Union
 2
 3from pydantic import Field, model_validator
 4
 5from kiln_ai.datamodel.basemodel import ID_TYPE, FilenameString, KilnParentedModel
 6from kiln_ai.utils.validation import ToolNameString
 7
 8if TYPE_CHECKING:
 9    from kiln_ai.datamodel.project import Project
10
11
12class RagConfig(KilnParentedModel):
13    """Configuration for Retrieval-Augmented Generation (RAG) on a project's documents."""
14
15    name: FilenameString = Field(
16        description="A name to identify this RAG configuration for your own reference.",
17    )
18
19    is_archived: bool = Field(
20        default=False,
21        description="Whether the RAG configuration is archived. Archived RAG configurations are not shown in the UI and are not available for use.",
22    )
23
24    description: str | None = Field(
25        default=None,
26        description="A description of the RAG configuration for you and your team. Will not be used in prompts/training/validation.",
27    )
28
29    tool_name: ToolNameString = Field(
30        description="A name for the model to identify the Search Tool in conversations.",
31    )
32
33    tool_description: str = Field(
34        description="A description of the purpose of the tool. The model will use this description to understand the tool's capabilities.",
35        max_length=128,
36    )
37
38    extractor_config_id: ID_TYPE = Field(
39        description="The ID of the extractor config used to extract the documents.",
40    )
41
42    chunker_config_id: ID_TYPE = Field(
43        description="The ID of the chunker config used to chunk the documents.",
44    )
45
46    embedding_config_id: ID_TYPE = Field(
47        description="The ID of the embedding config used to embed the documents.",
48    )
49
50    vector_store_config_id: ID_TYPE = Field(
51        description="The ID of the vector store config used to store the documents.",
52    )
53
54    reranker_config_id: ID_TYPE | None = Field(
55        default=None,
56        description="The ID of the reranker config used to rerank the documents. If None, no reranking will be performed.",
57    )
58
59    tags: list[str] | None = Field(
60        default=None,
61        description="List of document tags to filter by. If None, all documents in the project are used.",
62    )
63
64    # Workaround to return typed parent without importing Project
65    def parent_project(self) -> Union["Project", None]:
66        if self.parent is None or self.parent.__class__.__name__ != "Project":
67            return None
68        return self.parent  # type: ignore
69
70    @model_validator(mode="after")
71    def validate_tags(self):
72        if self.tags is not None:
73            if len(self.tags) == 0:
74                raise ValueError("Tags cannot be an empty list.")
75            for tag in self.tags:
76                if not tag:
77                    raise ValueError("Tags cannot be empty.")
78                if " " in tag:
79                    raise ValueError("Tags cannot contain spaces. Try underscores.")
80
81        if self.tool_name.strip() == "":
82            raise ValueError("Tool name cannot be empty.")
83        if self.tool_description.strip() == "":
84            raise ValueError("Tool description cannot be empty.")
85
86        return self
class RagConfig(kiln_ai.datamodel.basemodel.KilnParentedModel):
13class RagConfig(KilnParentedModel):
14    """Configuration for Retrieval-Augmented Generation (RAG) on a project's documents."""
15
16    name: FilenameString = Field(
17        description="A name to identify this RAG configuration for your own reference.",
18    )
19
20    is_archived: bool = Field(
21        default=False,
22        description="Whether the RAG configuration is archived. Archived RAG configurations are not shown in the UI and are not available for use.",
23    )
24
25    description: str | None = Field(
26        default=None,
27        description="A description of the RAG configuration for you and your team. Will not be used in prompts/training/validation.",
28    )
29
30    tool_name: ToolNameString = Field(
31        description="A name for the model to identify the Search Tool in conversations.",
32    )
33
34    tool_description: str = Field(
35        description="A description of the purpose of the tool. The model will use this description to understand the tool's capabilities.",
36        max_length=128,
37    )
38
39    extractor_config_id: ID_TYPE = Field(
40        description="The ID of the extractor config used to extract the documents.",
41    )
42
43    chunker_config_id: ID_TYPE = Field(
44        description="The ID of the chunker config used to chunk the documents.",
45    )
46
47    embedding_config_id: ID_TYPE = Field(
48        description="The ID of the embedding config used to embed the documents.",
49    )
50
51    vector_store_config_id: ID_TYPE = Field(
52        description="The ID of the vector store config used to store the documents.",
53    )
54
55    reranker_config_id: ID_TYPE | None = Field(
56        default=None,
57        description="The ID of the reranker config used to rerank the documents. If None, no reranking will be performed.",
58    )
59
60    tags: list[str] | None = Field(
61        default=None,
62        description="List of document tags to filter by. If None, all documents in the project are used.",
63    )
64
65    # Workaround to return typed parent without importing Project
66    def parent_project(self) -> Union["Project", None]:
67        if self.parent is None or self.parent.__class__.__name__ != "Project":
68            return None
69        return self.parent  # type: ignore
70
71    @model_validator(mode="after")
72    def validate_tags(self):
73        if self.tags is not None:
74            if len(self.tags) == 0:
75                raise ValueError("Tags cannot be an empty list.")
76            for tag in self.tags:
77                if not tag:
78                    raise ValueError("Tags cannot be empty.")
79                if " " in tag:
80                    raise ValueError("Tags cannot contain spaces. Try underscores.")
81
82        if self.tool_name.strip() == "":
83            raise ValueError("Tool name cannot be empty.")
84        if self.tool_description.strip() == "":
85            raise ValueError("Tool description cannot be empty.")
86
87        return self

Configuration for Retrieval-Augmented Generation (RAG) on a project's documents.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7f90236f9b20>, json_schema_input_type=PydanticUndefined), StringConstraints(strip_whitespace=None, to_upper=None, to_lower=None, strict=None, min_length=1, max_length=120, pattern=None)]
is_archived: bool
description: str | None
tool_name: Annotated[str, BeforeValidator(func=<function tool_name_validator at 0x7f90233e9260>, json_schema_input_type=PydanticUndefined), StringConstraints(strip_whitespace=None, to_upper=None, to_lower=None, strict=None, min_length=1, max_length=64, pattern=None)]
tool_description: str
extractor_config_id: Optional[str]
chunker_config_id: Optional[str]
embedding_config_id: Optional[str]
vector_store_config_id: Optional[str]
reranker_config_id: Optional[str]
tags: list[str] | None
def parent_project(self) -> Optional[kiln_ai.datamodel.Project]:
66    def parent_project(self) -> Union["Project", None]:
67        if self.parent is None or self.parent.__class__.__name__ != "Project":
68            return None
69        return self.parent  # type: ignore
@model_validator(mode='after')
def validate_tags(self):
71    @model_validator(mode="after")
72    def validate_tags(self):
73        if self.tags is not None:
74            if len(self.tags) == 0:
75                raise ValueError("Tags cannot be an empty list.")
76            for tag in self.tags:
77                if not tag:
78                    raise ValueError("Tags cannot be empty.")
79                if " " in tag:
80                    raise ValueError("Tags cannot contain spaces. Try underscores.")
81
82        if self.tool_name.strip() == "":
83            raise ValueError("Tool name cannot be empty.")
84        if self.tool_description.strip() == "":
85            raise ValueError("Tool description cannot be empty.")
86
87        return self
def relationship_name() -> str:
761        def relationship_name_method() -> str:
762            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
754        def parent_class_method() -> Type[KilnParentModel]:
755            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.