kiln_ai.datamodel.embedding
1from typing import TYPE_CHECKING, List, Union 2 3from pydantic import BaseModel, Field, PositiveInt 4from typing_extensions import TypedDict 5 6from kiln_ai.datamodel.basemodel import ID_TYPE, FilenameString, KilnParentedModel 7from kiln_ai.datamodel.datamodel_enums import ModelProviderName 8 9if TYPE_CHECKING: 10 from kiln_ai.datamodel.chunk import ChunkedDocument 11 from kiln_ai.datamodel.project import Project 12 13 14class EmbeddingProperties(TypedDict, total=False): 15 dimensions: PositiveInt 16 17 18class EmbeddingConfig(KilnParentedModel): 19 """Configuration for generating embeddings from document chunks.""" 20 21 name: FilenameString = Field( 22 description="A name to identify the embedding config.", 23 ) 24 description: str | None = Field( 25 default=None, 26 description="A description for your reference, not shared with embedding models.", 27 ) 28 model_provider_name: ModelProviderName = Field( 29 description="The provider to use to generate embeddings.", 30 ) 31 model_name: str = Field( 32 description="The model to use to generate embeddings.", 33 ) 34 properties: EmbeddingProperties = Field( 35 description="Properties to be used to execute the embedding config.", 36 ) 37 38 # Workaround to return typed parent without importing Project 39 def parent_project(self) -> Union["Project", None]: 40 if self.parent is None or self.parent.__class__.__name__ != "Project": 41 return None 42 return self.parent # type: ignore 43 44 45class Embedding(BaseModel): 46 """A single embedding vector.""" 47 48 vector: List[float] = Field(description="The vector of the embedding.") 49 50 51class ChunkEmbeddings(KilnParentedModel): 52 """Embeddings for the chunks of a chunked document.""" 53 54 embedding_config_id: ID_TYPE = Field( 55 description="The ID of the embedding config used to generate the embeddings.", 56 ) 57 embeddings: List[Embedding] = Field( 58 description="The embeddings of the chunks. The embedding at index i corresponds to the chunk at index i in the parent chunked document." 59 ) 60 61 def parent_chunked_document(self) -> Union["ChunkedDocument", None]: 62 if self.parent is None or self.parent.__class__.__name__ != "ChunkedDocument": 63 return None 64 return self.parent # type: ignore
19class EmbeddingConfig(KilnParentedModel): 20 """Configuration for generating embeddings from document chunks.""" 21 22 name: FilenameString = Field( 23 description="A name to identify the embedding config.", 24 ) 25 description: str | None = Field( 26 default=None, 27 description="A description for your reference, not shared with embedding models.", 28 ) 29 model_provider_name: ModelProviderName = Field( 30 description="The provider to use to generate embeddings.", 31 ) 32 model_name: str = Field( 33 description="The model to use to generate embeddings.", 34 ) 35 properties: EmbeddingProperties = Field( 36 description="Properties to be used to execute the embedding config.", 37 ) 38 39 # Workaround to return typed parent without importing Project 40 def parent_project(self) -> Union["Project", None]: 41 if self.parent is None or self.parent.__class__.__name__ != "Project": 42 return None 43 return self.parent # type: ignore
Configuration for generating embeddings from document chunks.
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
337def init_private_attributes(self: BaseModel, context: Any, /) -> None: 338 """This function is meant to behave like a BaseModel method to initialise private attributes. 339 340 It takes context as an argument since that's what pydantic-core passes when calling it. 341 342 Args: 343 self: The BaseModel instance. 344 context: The context. 345 """ 346 if getattr(self, '__pydantic_private__', None) is None: 347 pydantic_private = {} 348 for name, private_attr in self.__private_attributes__.items(): 349 default = private_attr.get_default() 350 if default is not PydanticUndefined: 351 pydantic_private[name] = default 352 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.
46class Embedding(BaseModel): 47 """A single embedding vector.""" 48 49 vector: List[float] = Field(description="The vector of the embedding.")
A single embedding vector.
52class ChunkEmbeddings(KilnParentedModel): 53 """Embeddings for the chunks of a chunked document.""" 54 55 embedding_config_id: ID_TYPE = Field( 56 description="The ID of the embedding config used to generate the embeddings.", 57 ) 58 embeddings: List[Embedding] = Field( 59 description="The embeddings of the chunks. The embedding at index i corresponds to the chunk at index i in the parent chunked document." 60 ) 61 62 def parent_chunked_document(self) -> Union["ChunkedDocument", None]: 63 if self.parent is None or self.parent.__class__.__name__ != "ChunkedDocument": 64 return None 65 return self.parent # type: ignore
Embeddings for the chunks of a chunked document.
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
337def init_private_attributes(self: BaseModel, context: Any, /) -> None: 338 """This function is meant to behave like a BaseModel method to initialise private attributes. 339 340 It takes context as an argument since that's what pydantic-core passes when calling it. 341 342 Args: 343 self: The BaseModel instance. 344 context: The context. 345 """ 346 if getattr(self, '__pydantic_private__', None) is None: 347 pydantic_private = {} 348 for name, private_attr in self.__private_attributes__.items(): 349 default = private_attr.get_default() 350 if default is not PydanticUndefined: 351 pydantic_private[name] = default 352 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.