kiln_ai.datamodel.rag
1from typing import TYPE_CHECKING, Union 2 3from pydantic import Field, model_validator 4 5from kiln_ai.datamodel.basemodel import ID_TYPE, FilenameString, KilnParentedModel 6from kiln_ai.utils.validation import ToolNameString 7 8if TYPE_CHECKING: 9 from kiln_ai.datamodel.project import Project 10 11 12class RagConfig(KilnParentedModel): 13 """Configuration for Retrieval-Augmented Generation (RAG) on a project's documents.""" 14 15 name: FilenameString = Field( 16 description="A name to identify this RAG configuration for your own reference.", 17 ) 18 19 is_archived: bool = Field( 20 default=False, 21 description="Whether the RAG configuration is archived. Archived RAG configurations are not shown in the UI and are not available for use.", 22 ) 23 24 description: str | None = Field( 25 default=None, 26 description="A description of the RAG configuration for you and your team. Will not be used in prompts/training/validation.", 27 ) 28 29 tool_name: ToolNameString = Field( 30 description="A name for the model to identify the Search Tool in conversations.", 31 ) 32 33 tool_description: str = Field( 34 description="A description of the purpose of the tool. The model will use this description to understand the tool's capabilities.", 35 max_length=128, 36 ) 37 38 extractor_config_id: ID_TYPE = Field( 39 description="The ID of the extractor config used to extract the documents.", 40 ) 41 42 chunker_config_id: ID_TYPE = Field( 43 description="The ID of the chunker config used to chunk the documents.", 44 ) 45 46 embedding_config_id: ID_TYPE = Field( 47 description="The ID of the embedding config used to embed the documents.", 48 ) 49 50 vector_store_config_id: ID_TYPE = Field( 51 description="The ID of the vector store config used to store the documents.", 52 ) 53 54 reranker_config_id: ID_TYPE | None = Field( 55 default=None, 56 description="The ID of the reranker config used to rerank the documents. If None, no reranking will be performed.", 57 ) 58 59 tags: list[str] | None = Field( 60 default=None, 61 description="List of document tags to filter by. If None, all documents in the project are used.", 62 ) 63 64 # Workaround to return typed parent without importing Project 65 def parent_project(self) -> Union["Project", None]: 66 if self.parent is None or self.parent.__class__.__name__ != "Project": 67 return None 68 return self.parent # type: ignore 69 70 @model_validator(mode="after") 71 def validate_tags(self): 72 if self.tags is not None: 73 if len(self.tags) == 0: 74 raise ValueError("Tags cannot be an empty list.") 75 for tag in self.tags: 76 if not tag: 77 raise ValueError("Tags cannot be empty.") 78 if " " in tag: 79 raise ValueError("Tags cannot contain spaces. Try underscores.") 80 81 if self.tool_name.strip() == "": 82 raise ValueError("Tool name cannot be empty.") 83 if self.tool_description.strip() == "": 84 raise ValueError("Tool description cannot be empty.") 85 86 return self
class
RagConfig(kiln_ai.datamodel.basemodel.KilnParentedModel):
13class RagConfig(KilnParentedModel): 14 """Configuration for Retrieval-Augmented Generation (RAG) on a project's documents.""" 15 16 name: FilenameString = Field( 17 description="A name to identify this RAG configuration for your own reference.", 18 ) 19 20 is_archived: bool = Field( 21 default=False, 22 description="Whether the RAG configuration is archived. Archived RAG configurations are not shown in the UI and are not available for use.", 23 ) 24 25 description: str | None = Field( 26 default=None, 27 description="A description of the RAG configuration for you and your team. Will not be used in prompts/training/validation.", 28 ) 29 30 tool_name: ToolNameString = Field( 31 description="A name for the model to identify the Search Tool in conversations.", 32 ) 33 34 tool_description: str = Field( 35 description="A description of the purpose of the tool. The model will use this description to understand the tool's capabilities.", 36 max_length=128, 37 ) 38 39 extractor_config_id: ID_TYPE = Field( 40 description="The ID of the extractor config used to extract the documents.", 41 ) 42 43 chunker_config_id: ID_TYPE = Field( 44 description="The ID of the chunker config used to chunk the documents.", 45 ) 46 47 embedding_config_id: ID_TYPE = Field( 48 description="The ID of the embedding config used to embed the documents.", 49 ) 50 51 vector_store_config_id: ID_TYPE = Field( 52 description="The ID of the vector store config used to store the documents.", 53 ) 54 55 reranker_config_id: ID_TYPE | None = Field( 56 default=None, 57 description="The ID of the reranker config used to rerank the documents. If None, no reranking will be performed.", 58 ) 59 60 tags: list[str] | None = Field( 61 default=None, 62 description="List of document tags to filter by. If None, all documents in the project are used.", 63 ) 64 65 # Workaround to return typed parent without importing Project 66 def parent_project(self) -> Union["Project", None]: 67 if self.parent is None or self.parent.__class__.__name__ != "Project": 68 return None 69 return self.parent # type: ignore 70 71 @model_validator(mode="after") 72 def validate_tags(self): 73 if self.tags is not None: 74 if len(self.tags) == 0: 75 raise ValueError("Tags cannot be an empty list.") 76 for tag in self.tags: 77 if not tag: 78 raise ValueError("Tags cannot be empty.") 79 if " " in tag: 80 raise ValueError("Tags cannot contain spaces. Try underscores.") 81 82 if self.tool_name.strip() == "": 83 raise ValueError("Tool name cannot be empty.") 84 if self.tool_description.strip() == "": 85 raise ValueError("Tool description cannot be empty.") 86 87 return self
Configuration for Retrieval-Augmented Generation (RAG) on a project's documents.
name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7f90236f9b20>, json_schema_input_type=PydanticUndefined), StringConstraints(strip_whitespace=None, to_upper=None, to_lower=None, strict=None, min_length=1, max_length=120, pattern=None)]
tool_name: Annotated[str, BeforeValidator(func=<function tool_name_validator at 0x7f90233e9260>, json_schema_input_type=PydanticUndefined), StringConstraints(strip_whitespace=None, to_upper=None, to_lower=None, strict=None, min_length=1, max_length=64, pattern=None)]
def
parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
The type of the None singleton.
model_config =
{'validate_assignment': True}
Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
def
model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
337def init_private_attributes(self: BaseModel, context: Any, /) -> None: 338 """This function is meant to behave like a BaseModel method to initialise private attributes. 339 340 It takes context as an argument since that's what pydantic-core passes when calling it. 341 342 Args: 343 self: The BaseModel instance. 344 context: The context. 345 """ 346 if getattr(self, '__pydantic_private__', None) is None: 347 pydantic_private = {} 348 for name, private_attr in self.__private_attributes__.items(): 349 default = private_attr.get_default() 350 if default is not PydanticUndefined: 351 pydantic_private[name] = default 352 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.