kiln_ai.datamodel.embedding

 1from typing import TYPE_CHECKING, List, Union
 2
 3from pydantic import BaseModel, Field, model_validator
 4
 5from kiln_ai.datamodel.basemodel import ID_TYPE, FilenameString, KilnParentedModel
 6from kiln_ai.datamodel.datamodel_enums import ModelProviderName
 7
 8if TYPE_CHECKING:
 9    from kiln_ai.datamodel.chunk import ChunkedDocument
10    from kiln_ai.datamodel.project import Project
11
12
13class EmbeddingConfig(KilnParentedModel):
14    name: FilenameString = Field(
15        description="A name to identify the embedding config.",
16    )
17    description: str | None = Field(
18        default=None,
19        description="A description for your reference, not shared with embedding models.",
20    )
21    model_provider_name: ModelProviderName = Field(
22        description="The provider to use to generate embeddings.",
23    )
24    model_name: str = Field(
25        description="The model to use to generate embeddings.",
26    )
27    properties: dict[str, str | int | float | bool] = Field(
28        description="Properties to be used to execute the embedding config.",
29    )
30
31    # Workaround to return typed parent without importing Project
32    def parent_project(self) -> Union["Project", None]:
33        if self.parent is None or self.parent.__class__.__name__ != "Project":
34            return None
35        return self.parent  # type: ignore
36
37    @model_validator(mode="after")
38    def validate_properties(self):
39        if "dimensions" in self.properties:
40            if (
41                not isinstance(self.properties["dimensions"], int)
42                or self.properties["dimensions"] <= 0
43            ):
44                raise ValueError("Dimensions must be a positive integer")
45
46        return self
47
48
49class Embedding(BaseModel):
50    vector: List[float] = Field(description="The vector of the embedding.")
51
52
53class ChunkEmbeddings(KilnParentedModel):
54    embedding_config_id: ID_TYPE = Field(
55        description="The ID of the embedding config used to generate the embeddings.",
56    )
57    embeddings: List[Embedding] = Field(
58        description="The embeddings of the chunks. The embedding at index i corresponds to the chunk at index i in the parent chunked document."
59    )
60
61    def parent_chunked_document(self) -> Union["ChunkedDocument", None]:
62        if self.parent is None or self.parent.__class__.__name__ != "ChunkedDocument":
63            return None
64        return self.parent  # type: ignore
class EmbeddingConfig(kiln_ai.datamodel.basemodel.KilnParentedModel):
14class EmbeddingConfig(KilnParentedModel):
15    name: FilenameString = Field(
16        description="A name to identify the embedding config.",
17    )
18    description: str | None = Field(
19        default=None,
20        description="A description for your reference, not shared with embedding models.",
21    )
22    model_provider_name: ModelProviderName = Field(
23        description="The provider to use to generate embeddings.",
24    )
25    model_name: str = Field(
26        description="The model to use to generate embeddings.",
27    )
28    properties: dict[str, str | int | float | bool] = Field(
29        description="Properties to be used to execute the embedding config.",
30    )
31
32    # Workaround to return typed parent without importing Project
33    def parent_project(self) -> Union["Project", None]:
34        if self.parent is None or self.parent.__class__.__name__ != "Project":
35            return None
36        return self.parent  # type: ignore
37
38    @model_validator(mode="after")
39    def validate_properties(self):
40        if "dimensions" in self.properties:
41            if (
42                not isinstance(self.properties["dimensions"], int)
43                or self.properties["dimensions"] <= 0
44            ):
45                raise ValueError("Dimensions must be a positive integer")
46
47        return self

Base model for Kiln models that have a parent-child relationship. This base class is for child models.

This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.

Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7fc0765f0900>, json_schema_input_type=PydanticUndefined)]
description: str | None
model_provider_name: kiln_ai.datamodel.datamodel_enums.ModelProviderName
model_name: str
properties: dict[str, str | int | float | bool]
def parent_project(self) -> Optional[kiln_ai.datamodel.Project]:
33    def parent_project(self) -> Union["Project", None]:
34        if self.parent is None or self.parent.__class__.__name__ != "Project":
35            return None
36        return self.parent  # type: ignore
@model_validator(mode='after')
def validate_properties(self):
38    @model_validator(mode="after")
39    def validate_properties(self):
40        if "dimensions" in self.properties:
41            if (
42                not isinstance(self.properties["dimensions"], int)
43                or self.properties["dimensions"] <= 0
44            ):
45                raise ValueError("Dimensions must be a positive integer")
46
47        return self
def relationship_name() -> str:
661        def relationship_name_method() -> str:
662            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
654        def parent_class_method() -> Type[KilnParentModel]:
655            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.

class Embedding(pydantic.main.BaseModel):
50class Embedding(BaseModel):
51    vector: List[float] = Field(description="The vector of the embedding.")

!!! abstract "Usage Documentation" Models

A base class for creating Pydantic models.

Attributes: __class_vars__: The names of the class variables defined on the model. __private_attributes__: Metadata about the private attributes of the model. __signature__: The synthesized __init__ [Signature][inspect.Signature] of the model.

__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
    This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
    __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [`RootModel`][pydantic.root_model.RootModel].
__pydantic_serializer__: The `pydantic-core` `SchemaSerializer` used to dump instances of the model.
__pydantic_validator__: The `pydantic-core` `SchemaValidator` used to validate instances of the model.

__pydantic_fields__: A dictionary of field names and their corresponding [`FieldInfo`][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [`ComputedFieldInfo`][pydantic.fields.ComputedFieldInfo] objects.

__pydantic_extra__: A dictionary containing extra values, if [`extra`][pydantic.config.ConfigDict.extra]
    is set to `'allow'`.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.
vector: List[float]
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class ChunkEmbeddings(kiln_ai.datamodel.basemodel.KilnParentedModel):
54class ChunkEmbeddings(KilnParentedModel):
55    embedding_config_id: ID_TYPE = Field(
56        description="The ID of the embedding config used to generate the embeddings.",
57    )
58    embeddings: List[Embedding] = Field(
59        description="The embeddings of the chunks. The embedding at index i corresponds to the chunk at index i in the parent chunked document."
60    )
61
62    def parent_chunked_document(self) -> Union["ChunkedDocument", None]:
63        if self.parent is None or self.parent.__class__.__name__ != "ChunkedDocument":
64            return None
65        return self.parent  # type: ignore

Base model for Kiln models that have a parent-child relationship. This base class is for child models.

This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.

Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.

embedding_config_id: Optional[str]
embeddings: List[Embedding]
def parent_chunked_document(self) -> Optional[kiln_ai.datamodel.chunk.ChunkedDocument]:
62    def parent_chunked_document(self) -> Union["ChunkedDocument", None]:
63        if self.parent is None or self.parent.__class__.__name__ != "ChunkedDocument":
64            return None
65        return self.parent  # type: ignore
def relationship_name() -> str:
661        def relationship_name_method() -> str:
662            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
654        def parent_class_method() -> Type[KilnParentModel]:
655            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.