kiln_ai.datamodel.rag
1from typing import TYPE_CHECKING, Union 2 3from pydantic import Field, model_validator 4 5from kiln_ai.datamodel.basemodel import ID_TYPE, FilenameString, KilnParentedModel 6from kiln_ai.utils.validation import ToolNameString 7 8if TYPE_CHECKING: 9 from kiln_ai.datamodel.project import Project 10 11 12class RagConfig(KilnParentedModel): 13 name: FilenameString = Field( 14 description="A name to identify this RAG configuration for your own reference.", 15 ) 16 17 is_archived: bool = Field( 18 default=False, 19 description="Whether the RAG configuration is archived. Archived RAG configurations are not shown in the UI and are not available for use.", 20 ) 21 22 description: str | None = Field( 23 default=None, 24 description="A description of the RAG configuration for you and your team. Will not be used in prompts/training/validation.", 25 ) 26 27 tool_name: ToolNameString = Field( 28 description="A name for the model to identify the Search Tool in conversations.", 29 ) 30 31 tool_description: str = Field( 32 description="A description of the purpose of the tool. The model will use this description to understand the tool's capabilities.", 33 max_length=128, 34 ) 35 36 extractor_config_id: ID_TYPE = Field( 37 description="The ID of the extractor config used to extract the documents.", 38 ) 39 40 chunker_config_id: ID_TYPE = Field( 41 description="The ID of the chunker config used to chunk the documents.", 42 ) 43 44 embedding_config_id: ID_TYPE = Field( 45 description="The ID of the embedding config used to embed the documents.", 46 ) 47 48 vector_store_config_id: ID_TYPE = Field( 49 description="The ID of the vector store config used to store the documents.", 50 ) 51 52 reranker_config_id: ID_TYPE | None = Field( 53 default=None, 54 description="The ID of the reranker config used to rerank the documents. If None, no reranking will be performed.", 55 ) 56 57 tags: list[str] | None = Field( 58 default=None, 59 description="List of document tags to filter by. If None, all documents in the project are used.", 60 ) 61 62 # Workaround to return typed parent without importing Project 63 def parent_project(self) -> Union["Project", None]: 64 if self.parent is None or self.parent.__class__.__name__ != "Project": 65 return None 66 return self.parent # type: ignore 67 68 @model_validator(mode="after") 69 def validate_tags(self): 70 if self.tags is not None: 71 if len(self.tags) == 0: 72 raise ValueError("Tags cannot be an empty list.") 73 for tag in self.tags: 74 if not tag: 75 raise ValueError("Tags cannot be empty.") 76 if " " in tag: 77 raise ValueError("Tags cannot contain spaces. Try underscores.") 78 79 if self.tool_name.strip() == "": 80 raise ValueError("Tool name cannot be empty.") 81 if self.tool_description.strip() == "": 82 raise ValueError("Tool description cannot be empty.") 83 84 return self
class
RagConfig(kiln_ai.datamodel.basemodel.KilnParentedModel):
13class RagConfig(KilnParentedModel): 14 name: FilenameString = Field( 15 description="A name to identify this RAG configuration for your own reference.", 16 ) 17 18 is_archived: bool = Field( 19 default=False, 20 description="Whether the RAG configuration is archived. Archived RAG configurations are not shown in the UI and are not available for use.", 21 ) 22 23 description: str | None = Field( 24 default=None, 25 description="A description of the RAG configuration for you and your team. Will not be used in prompts/training/validation.", 26 ) 27 28 tool_name: ToolNameString = Field( 29 description="A name for the model to identify the Search Tool in conversations.", 30 ) 31 32 tool_description: str = Field( 33 description="A description of the purpose of the tool. The model will use this description to understand the tool's capabilities.", 34 max_length=128, 35 ) 36 37 extractor_config_id: ID_TYPE = Field( 38 description="The ID of the extractor config used to extract the documents.", 39 ) 40 41 chunker_config_id: ID_TYPE = Field( 42 description="The ID of the chunker config used to chunk the documents.", 43 ) 44 45 embedding_config_id: ID_TYPE = Field( 46 description="The ID of the embedding config used to embed the documents.", 47 ) 48 49 vector_store_config_id: ID_TYPE = Field( 50 description="The ID of the vector store config used to store the documents.", 51 ) 52 53 reranker_config_id: ID_TYPE | None = Field( 54 default=None, 55 description="The ID of the reranker config used to rerank the documents. If None, no reranking will be performed.", 56 ) 57 58 tags: list[str] | None = Field( 59 default=None, 60 description="List of document tags to filter by. If None, all documents in the project are used.", 61 ) 62 63 # Workaround to return typed parent without importing Project 64 def parent_project(self) -> Union["Project", None]: 65 if self.parent is None or self.parent.__class__.__name__ != "Project": 66 return None 67 return self.parent # type: ignore 68 69 @model_validator(mode="after") 70 def validate_tags(self): 71 if self.tags is not None: 72 if len(self.tags) == 0: 73 raise ValueError("Tags cannot be an empty list.") 74 for tag in self.tags: 75 if not tag: 76 raise ValueError("Tags cannot be empty.") 77 if " " in tag: 78 raise ValueError("Tags cannot contain spaces. Try underscores.") 79 80 if self.tool_name.strip() == "": 81 raise ValueError("Tool name cannot be empty.") 82 if self.tool_description.strip() == "": 83 raise ValueError("Tool description cannot be empty.") 84 85 return self
Base model for Kiln models that have a parent-child relationship. This base class is for child models.
This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.
Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.
name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7f2f1ec0c9a0>, json_schema_input_type=PydanticUndefined)]
tool_name: Annotated[str, BeforeValidator(func=<function tool_name_validator at 0x7f2f1dc82f20>, json_schema_input_type=PydanticUndefined)]
def
parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
The type of the None singleton.
model_config =
{'validate_assignment': True}
Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
def
model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
337def init_private_attributes(self: BaseModel, context: Any, /) -> None: 338 """This function is meant to behave like a BaseModel method to initialise private attributes. 339 340 It takes context as an argument since that's what pydantic-core passes when calling it. 341 342 Args: 343 self: The BaseModel instance. 344 context: The context. 345 """ 346 if getattr(self, '__pydantic_private__', None) is None: 347 pydantic_private = {} 348 for name, private_attr in self.__private_attributes__.items(): 349 default = private_attr.get_default() 350 if default is not PydanticUndefined: 351 pydantic_private[name] = default 352 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.