kiln_ai.datamodel.embedding

 1from typing import TYPE_CHECKING, List, Union
 2
 3from pydantic import BaseModel, Field, PositiveInt
 4from typing_extensions import TypedDict
 5
 6from kiln_ai.datamodel.basemodel import ID_TYPE, FilenameString, KilnParentedModel
 7from kiln_ai.datamodel.datamodel_enums import ModelProviderName
 8
 9if TYPE_CHECKING:
10    from kiln_ai.datamodel.chunk import ChunkedDocument
11    from kiln_ai.datamodel.project import Project
12
13
14class EmbeddingProperties(TypedDict, total=False):
15    dimensions: PositiveInt
16
17
18class EmbeddingConfig(KilnParentedModel):
19    """Configuration for generating embeddings from document chunks."""
20
21    name: FilenameString = Field(
22        description="A name to identify the embedding config.",
23    )
24    description: str | None = Field(
25        default=None,
26        description="A description for your reference, not shared with embedding models.",
27    )
28    model_provider_name: ModelProviderName = Field(
29        description="The provider to use to generate embeddings.",
30    )
31    model_name: str = Field(
32        description="The model to use to generate embeddings.",
33    )
34    properties: EmbeddingProperties = Field(
35        description="Properties to be used to execute the embedding config.",
36    )
37
38    # Workaround to return typed parent without importing Project
39    def parent_project(self) -> Union["Project", None]:
40        if self.parent is None or self.parent.__class__.__name__ != "Project":
41            return None
42        return self.parent  # type: ignore
43
44
45class Embedding(BaseModel):
46    """A single embedding vector."""
47
48    vector: List[float] = Field(description="The vector of the embedding.")
49
50
51class ChunkEmbeddings(KilnParentedModel):
52    """Embeddings for the chunks of a chunked document."""
53
54    embedding_config_id: ID_TYPE = Field(
55        description="The ID of the embedding config used to generate the embeddings.",
56    )
57    embeddings: List[Embedding] = Field(
58        description="The embeddings of the chunks. The embedding at index i corresponds to the chunk at index i in the parent chunked document."
59    )
60
61    def parent_chunked_document(self) -> Union["ChunkedDocument", None]:
62        if self.parent is None or self.parent.__class__.__name__ != "ChunkedDocument":
63            return None
64        return self.parent  # type: ignore
class EmbeddingProperties(typing_extensions.TypedDict):
15class EmbeddingProperties(TypedDict, total=False):
16    dimensions: PositiveInt
dimensions: Annotated[int, Gt(gt=0)]
class EmbeddingConfig(kiln_ai.datamodel.basemodel.KilnParentedModel):
19class EmbeddingConfig(KilnParentedModel):
20    """Configuration for generating embeddings from document chunks."""
21
22    name: FilenameString = Field(
23        description="A name to identify the embedding config.",
24    )
25    description: str | None = Field(
26        default=None,
27        description="A description for your reference, not shared with embedding models.",
28    )
29    model_provider_name: ModelProviderName = Field(
30        description="The provider to use to generate embeddings.",
31    )
32    model_name: str = Field(
33        description="The model to use to generate embeddings.",
34    )
35    properties: EmbeddingProperties = Field(
36        description="Properties to be used to execute the embedding config.",
37    )
38
39    # Workaround to return typed parent without importing Project
40    def parent_project(self) -> Union["Project", None]:
41        if self.parent is None or self.parent.__class__.__name__ != "Project":
42            return None
43        return self.parent  # type: ignore

Configuration for generating embeddings from document chunks.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7f90236f9b20>, json_schema_input_type=PydanticUndefined), StringConstraints(strip_whitespace=None, to_upper=None, to_lower=None, strict=None, min_length=1, max_length=120, pattern=None)]
description: str | None
model_provider_name: kiln_ai.datamodel.datamodel_enums.ModelProviderName
model_name: str
properties: EmbeddingProperties
def parent_project(self) -> Optional[kiln_ai.datamodel.Project]:
40    def parent_project(self) -> Union["Project", None]:
41        if self.parent is None or self.parent.__class__.__name__ != "Project":
42            return None
43        return self.parent  # type: ignore
def relationship_name() -> str:
761        def relationship_name_method() -> str:
762            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
754        def parent_class_method() -> Type[KilnParentModel]:
755            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.

class Embedding(pydantic.main.BaseModel):
46class Embedding(BaseModel):
47    """A single embedding vector."""
48
49    vector: List[float] = Field(description="The vector of the embedding.")

A single embedding vector.

vector: List[float]
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class ChunkEmbeddings(kiln_ai.datamodel.basemodel.KilnParentedModel):
52class ChunkEmbeddings(KilnParentedModel):
53    """Embeddings for the chunks of a chunked document."""
54
55    embedding_config_id: ID_TYPE = Field(
56        description="The ID of the embedding config used to generate the embeddings.",
57    )
58    embeddings: List[Embedding] = Field(
59        description="The embeddings of the chunks. The embedding at index i corresponds to the chunk at index i in the parent chunked document."
60    )
61
62    def parent_chunked_document(self) -> Union["ChunkedDocument", None]:
63        if self.parent is None or self.parent.__class__.__name__ != "ChunkedDocument":
64            return None
65        return self.parent  # type: ignore

Embeddings for the chunks of a chunked document.

embedding_config_id: Optional[str]
embeddings: List[Embedding]
def parent_chunked_document(self) -> Optional[kiln_ai.datamodel.chunk.ChunkedDocument]:
62    def parent_chunked_document(self) -> Union["ChunkedDocument", None]:
63        if self.parent is None or self.parent.__class__.__name__ != "ChunkedDocument":
64            return None
65        return self.parent  # type: ignore
def relationship_name() -> str:
761        def relationship_name_method() -> str:
762            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
754        def parent_class_method() -> Type[KilnParentModel]:
755            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.