kiln_ai.datamodel.chunk

View Source

  1import logging
  2from enum import Enum
  3from typing import TYPE_CHECKING, Annotated, List, Union
  4
  5import anyio
  6from pydantic import (
  7    AfterValidator,
  8    BaseModel,
  9    Field,
 10    NonNegativeInt,
 11    PositiveInt,
 12    SerializationInfo,
 13    ValidationInfo,
 14    field_serializer,
 15    model_validator,
 16)
 17from typing_extensions import Literal, TypedDict
 18
 19from kiln_ai.datamodel.basemodel import (
 20    ID_TYPE,
 21    FilenameString,
 22    KilnAttachmentModel,
 23    KilnParentedModel,
 24    KilnParentModel,
 25)
 26from kiln_ai.datamodel.embedding import ChunkEmbeddings
 27
 28logger = logging.getLogger(__name__)
 29
 30if TYPE_CHECKING:
 31    from kiln_ai.datamodel.extraction import Extraction
 32    from kiln_ai.datamodel.project import Project
 33
 34
 35class ChunkerType(str, Enum):
 36    FIXED_WINDOW = "fixed_window"
 37    SEMANTIC = "semantic"
 38
 39
 40class SemanticChunkerProperties(TypedDict, total=True):
 41    chunker_type: Literal[ChunkerType.SEMANTIC]
 42    embedding_config_id: str
 43    buffer_size: PositiveInt
 44    breakpoint_percentile_threshold: NonNegativeInt
 45    include_metadata: bool
 46    include_prev_next_rel: bool
 47
 48
 49class FixedWindowChunkerProperties(TypedDict, total=True):
 50    chunker_type: Literal[ChunkerType.FIXED_WINDOW]
 51    chunk_overlap: NonNegativeInt
 52    chunk_size: PositiveInt
 53
 54
 55def validate_fixed_window_chunker_properties(
 56    properties: FixedWindowChunkerProperties,
 57) -> FixedWindowChunkerProperties:
 58    """Validate the properties for the fixed window chunker and set defaults if needed."""
 59    # the typed dict only validates the shape and types, but not the logic, so we validate here
 60    if properties["chunk_overlap"] >= properties["chunk_size"]:
 61        raise ValueError("Chunk overlap must be less than chunk size.")
 62
 63    return properties
 64
 65
 66def validate_semantic_chunker_properties(
 67    properties: SemanticChunkerProperties,
 68) -> SemanticChunkerProperties:
 69    """Validate the properties for the semantic chunker."""
 70    buffer_size = properties["buffer_size"]
 71    if buffer_size < 1:
 72        raise ValueError("buffer_size must be greater than or equal to 1.")
 73
 74    breakpoint_percentile_threshold = properties["breakpoint_percentile_threshold"]
 75    if not (0 <= breakpoint_percentile_threshold <= 100):
 76        raise ValueError("breakpoint_percentile_threshold must be between 0 and 100.")
 77
 78    return properties
 79
 80
 81SemanticChunkerPropertiesValidator = Annotated[
 82    SemanticChunkerProperties,
 83    AfterValidator(lambda v: validate_semantic_chunker_properties(v)),
 84]
 85
 86FixedWindowChunkerPropertiesValidator = Annotated[
 87    FixedWindowChunkerProperties,
 88    AfterValidator(lambda v: validate_fixed_window_chunker_properties(v)),
 89]
 90
 91
 92class ChunkerConfig(KilnParentedModel):
 93    name: FilenameString = Field(
 94        description="A name to identify the chunker config.",
 95    )
 96    description: str | None = Field(
 97        default=None, description="The description of the chunker config"
 98    )
 99    chunker_type: ChunkerType = Field(
100        description="This is used to determine the type of chunker to use.",
101    )
102    properties: (
103        SemanticChunkerPropertiesValidator | FixedWindowChunkerPropertiesValidator
104    ) = Field(
105        description="Properties to be used to execute the chunker config. This is chunker_type specific and should serialize to a json dict.",
106        discriminator="chunker_type",
107    )
108
109    # Workaround to return typed parent without importing Project
110    def parent_project(self) -> Union["Project", None]:
111        if self.parent is None or self.parent.__class__.__name__ != "Project":
112            return None
113        return self.parent  # type: ignore
114
115    @model_validator(mode="before")
116    def upgrade_missing_discriminator_properties(
117        cls, data: dict, info: ValidationInfo
118    ) -> dict:
119        if not info.context or not info.context.get("loading_from_file", False):
120            # Not loading from file, so no need to upgrade
121            return data
122
123        if not isinstance(data, dict):
124            return data
125
126        # backward compatibility:
127        # - we originally did not have the chunker_type in the properties, so we need to add it here
128        # - we started wanted to have chunker_type in the properties to use pydantic's discriminated union feature
129        properties = data.get("properties", {})
130        if "chunker_type" not in properties:
131            # the chunker_type on the parent model is always there, we just need to add it to the properties
132            properties["chunker_type"] = data["chunker_type"]
133            data["properties"] = properties
134        return data
135
136    @model_validator(mode="after")
137    def ensure_chunker_type_matches_properties(self):
138        # sanity check to ensure the chunker_type matches the properties chunker_type
139        if self.chunker_type != self.properties["chunker_type"]:
140            raise ValueError(
141                f"Chunker type mismatch: {self.chunker_type} != {self.properties['chunker_type']}. This is a bug, please report it."
142            )
143        return self
144
145    # expose the typed properties based on the chunker_type
146    @property
147    def semantic_properties(self) -> SemanticChunkerProperties:
148        if self.properties["chunker_type"] != ChunkerType.SEMANTIC:
149            raise ValueError(
150                "Semantic properties are only available for semantic chunker."
151            )
152        # TypedDict cannot be checked at runtime, so we need to ignore the type check
153        # or cast (but it is currently banned in our linting rules). Better solution
154        # would be discriminated union, but that requires the discriminator to be part
155        # of the properties (not outside on the parent model).
156        return self.properties
157
158    @property
159    def fixed_window_properties(self) -> FixedWindowChunkerProperties:
160        if self.properties["chunker_type"] != ChunkerType.FIXED_WINDOW:
161            raise ValueError(
162                "Fixed window properties are only available for fixed window chunker."
163            )
164        # TypedDict cannot be checked at runtime, so we need to ignore the type check
165        # or cast (but it is currently banned in our linting rules). Better solution
166        # would be discriminated union, but that requires the discriminator to be part
167        # of the properties (not outside on the parent model).
168        return self.properties
169
170
171class Chunk(BaseModel):
172    content: KilnAttachmentModel = Field(
173        description="The content of the chunk, stored as an attachment."
174    )
175
176    @field_serializer("content")
177    def serialize_content(
178        self, content: KilnAttachmentModel, info: SerializationInfo
179    ) -> dict:
180        context = info.context or {}
181        context["filename_prefix"] = "content"
182        return content.model_dump(mode="json", context=context)
183
184
185class ChunkedDocument(
186    KilnParentedModel, KilnParentModel, parent_of={"chunk_embeddings": ChunkEmbeddings}
187):
188    chunker_config_id: ID_TYPE = Field(
189        description="The ID of the chunker config used to chunk the document.",
190    )
191    chunks: List[Chunk] = Field(description="The chunks of the document.")
192
193    def parent_extraction(self) -> Union["Extraction", None]:
194        if self.parent is None or self.parent.__class__.__name__ != "Extraction":
195            return None
196        return self.parent  # type: ignore
197
198    def chunk_embeddings(self, readonly: bool = False) -> list[ChunkEmbeddings]:
199        return super().chunk_embeddings(readonly=readonly)  # type: ignore
200
201    async def load_chunks_text(self) -> list[str]:
202        """Utility to return a list of text for each chunk, loaded from each chunk's content attachment."""
203        if not self.path:
204            raise ValueError(
205                "Failed to resolve the path of chunk content attachment because the chunk does not have a path."
206            )
207
208        chunks_text: list[str] = []
209        for chunk in self.chunks:
210            full_path = chunk.content.resolve_path(self.path.parent)
211
212            try:
213                chunks_text.append(
214                    await anyio.Path(full_path).read_text(encoding="utf-8")
215                )
216            except Exception as e:
217                raise ValueError(
218                    f"Failed to read chunk content for {full_path}: {e}"
219                ) from e
220
221        return chunks_text

logger = <Logger kiln_ai.datamodel.chunk (WARNING)>

class ChunkerType(builtins.str, enum.Enum): View Source

36class ChunkerType(str, Enum):
37    FIXED_WINDOW = "fixed_window"
38    SEMANTIC = "semantic"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

FIXED_WINDOW = <ChunkerType.FIXED_WINDOW: 'fixed_window'>

SEMANTIC = <ChunkerType.SEMANTIC: 'semantic'>

class SemanticChunkerProperties(typing_extensions.TypedDict): View Source

41class SemanticChunkerProperties(TypedDict, total=True):
42    chunker_type: Literal[ChunkerType.SEMANTIC]
43    embedding_config_id: str
44    buffer_size: PositiveInt
45    breakpoint_percentile_threshold: NonNegativeInt
46    include_metadata: bool
47    include_prev_next_rel: bool

chunker_type: Literal[<ChunkerType.SEMANTIC: 'semantic'>]

embedding_config_id: str

buffer_size: Annotated[int, Gt(gt=0)]

breakpoint_percentile_threshold: Annotated[int, Ge(ge=0)]

include_metadata: bool

include_prev_next_rel: bool

class FixedWindowChunkerProperties(typing_extensions.TypedDict): View Source

50class FixedWindowChunkerProperties(TypedDict, total=True):
51    chunker_type: Literal[ChunkerType.FIXED_WINDOW]
52    chunk_overlap: NonNegativeInt
53    chunk_size: PositiveInt

chunker_type: Literal[<ChunkerType.FIXED_WINDOW: 'fixed_window'>]

chunk_overlap: Annotated[int, Ge(ge=0)]

chunk_size: Annotated[int, Gt(gt=0)]

def validate_fixed_window_chunker_properties( properties: FixedWindowChunkerProperties) -> FixedWindowChunkerProperties: View Source

56def validate_fixed_window_chunker_properties(
57    properties: FixedWindowChunkerProperties,
58) -> FixedWindowChunkerProperties:
59    """Validate the properties for the fixed window chunker and set defaults if needed."""
60    # the typed dict only validates the shape and types, but not the logic, so we validate here
61    if properties["chunk_overlap"] >= properties["chunk_size"]:
62        raise ValueError("Chunk overlap must be less than chunk size.")
63
64    return properties

Validate the properties for the fixed window chunker and set defaults if needed.

def validate_semantic_chunker_properties( properties: SemanticChunkerProperties) -> SemanticChunkerProperties: View Source

67def validate_semantic_chunker_properties(
68    properties: SemanticChunkerProperties,
69) -> SemanticChunkerProperties:
70    """Validate the properties for the semantic chunker."""
71    buffer_size = properties["buffer_size"]
72    if buffer_size < 1:
73        raise ValueError("buffer_size must be greater than or equal to 1.")
74
75    breakpoint_percentile_threshold = properties["breakpoint_percentile_threshold"]
76    if not (0 <= breakpoint_percentile_threshold <= 100):
77        raise ValueError("breakpoint_percentile_threshold must be between 0 and 100.")
78
79    return properties

Validate the properties for the semantic chunker.

SemanticChunkerPropertiesValidator = typing.Annotated[SemanticChunkerProperties, AfterValidator(func=<function <lambda>>)]

FixedWindowChunkerPropertiesValidator = typing.Annotated[FixedWindowChunkerProperties, AfterValidator(func=<function <lambda>>)]

class ChunkerConfig(kiln_ai.datamodel.basemodel.KilnParentedModel): View Source

 93class ChunkerConfig(KilnParentedModel):
 94    name: FilenameString = Field(
 95        description="A name to identify the chunker config.",
 96    )
 97    description: str | None = Field(
 98        default=None, description="The description of the chunker config"
 99    )
100    chunker_type: ChunkerType = Field(
101        description="This is used to determine the type of chunker to use.",
102    )
103    properties: (
104        SemanticChunkerPropertiesValidator | FixedWindowChunkerPropertiesValidator
105    ) = Field(
106        description="Properties to be used to execute the chunker config. This is chunker_type specific and should serialize to a json dict.",
107        discriminator="chunker_type",
108    )
109
110    # Workaround to return typed parent without importing Project
111    def parent_project(self) -> Union["Project", None]:
112        if self.parent is None or self.parent.__class__.__name__ != "Project":
113            return None
114        return self.parent  # type: ignore
115
116    @model_validator(mode="before")
117    def upgrade_missing_discriminator_properties(
118        cls, data: dict, info: ValidationInfo
119    ) -> dict:
120        if not info.context or not info.context.get("loading_from_file", False):
121            # Not loading from file, so no need to upgrade
122            return data
123
124        if not isinstance(data, dict):
125            return data
126
127        # backward compatibility:
128        # - we originally did not have the chunker_type in the properties, so we need to add it here
129        # - we started wanted to have chunker_type in the properties to use pydantic's discriminated union feature
130        properties = data.get("properties", {})
131        if "chunker_type" not in properties:
132            # the chunker_type on the parent model is always there, we just need to add it to the properties
133            properties["chunker_type"] = data["chunker_type"]
134            data["properties"] = properties
135        return data
136
137    @model_validator(mode="after")
138    def ensure_chunker_type_matches_properties(self):
139        # sanity check to ensure the chunker_type matches the properties chunker_type
140        if self.chunker_type != self.properties["chunker_type"]:
141            raise ValueError(
142                f"Chunker type mismatch: {self.chunker_type} != {self.properties['chunker_type']}. This is a bug, please report it."
143            )
144        return self
145
146    # expose the typed properties based on the chunker_type
147    @property
148    def semantic_properties(self) -> SemanticChunkerProperties:
149        if self.properties["chunker_type"] != ChunkerType.SEMANTIC:
150            raise ValueError(
151                "Semantic properties are only available for semantic chunker."
152            )
153        # TypedDict cannot be checked at runtime, so we need to ignore the type check
154        # or cast (but it is currently banned in our linting rules). Better solution
155        # would be discriminated union, but that requires the discriminator to be part
156        # of the properties (not outside on the parent model).
157        return self.properties
158
159    @property
160    def fixed_window_properties(self) -> FixedWindowChunkerProperties:
161        if self.properties["chunker_type"] != ChunkerType.FIXED_WINDOW:
162            raise ValueError(
163                "Fixed window properties are only available for fixed window chunker."
164            )
165        # TypedDict cannot be checked at runtime, so we need to ignore the type check
166        # or cast (but it is currently banned in our linting rules). Better solution
167        # would be discriminated union, but that requires the discriminator to be part
168        # of the properties (not outside on the parent model).
169        return self.properties

Base model for Kiln models that have a parent-child relationship. This base class is for child models.

This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.

Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7f2f1ec0c9a0>, json_schema_input_type=PydanticUndefined)]

description: str | None

chunker_type: ChunkerType

properties: Union[Annotated[SemanticChunkerProperties, AfterValidator(func=<function <lambda> at 0x7f2f1ec8e520>)], Annotated[FixedWindowChunkerProperties, AfterValidator(func=<function <lambda> at 0x7f2f1ec8eb60>)]]

def parent_project(self) -> Optional[kiln_ai.datamodel.Project]: View Source

111    def parent_project(self) -> Union["Project", None]:
112        if self.parent is None or self.parent.__class__.__name__ != "Project":
113            return None
114        return self.parent  # type: ignore

@model_validator(mode='before')

def upgrade_missing_discriminator_properties(cls, data: dict, info: pydantic_core.core_schema.ValidationInfo) -> dict: View Source

116    @model_validator(mode="before")
117    def upgrade_missing_discriminator_properties(
118        cls, data: dict, info: ValidationInfo
119    ) -> dict:
120        if not info.context or not info.context.get("loading_from_file", False):
121            # Not loading from file, so no need to upgrade
122            return data
123
124        if not isinstance(data, dict):
125            return data
126
127        # backward compatibility:
128        # - we originally did not have the chunker_type in the properties, so we need to add it here
129        # - we started wanted to have chunker_type in the properties to use pydantic's discriminated union feature
130        properties = data.get("properties", {})
131        if "chunker_type" not in properties:
132            # the chunker_type on the parent model is always there, we just need to add it to the properties
133            properties["chunker_type"] = data["chunker_type"]
134            data["properties"] = properties
135        return data

@model_validator(mode='after')

def ensure_chunker_type_matches_properties(self): View Source

137    @model_validator(mode="after")
138    def ensure_chunker_type_matches_properties(self):
139        # sanity check to ensure the chunker_type matches the properties chunker_type
140        if self.chunker_type != self.properties["chunker_type"]:
141            raise ValueError(
142                f"Chunker type mismatch: {self.chunker_type} != {self.properties['chunker_type']}. This is a bug, please report it."
143            )
144        return self

semantic_properties: SemanticChunkerProperties View Source

147    @property
148    def semantic_properties(self) -> SemanticChunkerProperties:
149        if self.properties["chunker_type"] != ChunkerType.SEMANTIC:
150            raise ValueError(
151                "Semantic properties are only available for semantic chunker."
152            )
153        # TypedDict cannot be checked at runtime, so we need to ignore the type check
154        # or cast (but it is currently banned in our linting rules). Better solution
155        # would be discriminated union, but that requires the discriminator to be part
156        # of the properties (not outside on the parent model).
157        return self.properties

fixed_window_properties: FixedWindowChunkerProperties View Source

159    @property
160    def fixed_window_properties(self) -> FixedWindowChunkerProperties:
161        if self.properties["chunker_type"] != ChunkerType.FIXED_WINDOW:
162            raise ValueError(
163                "Fixed window properties are only available for fixed window chunker."
164            )
165        # TypedDict cannot be checked at runtime, so we need to ignore the type check
166        # or cast (but it is currently banned in our linting rules). Better solution
167        # would be discriminated union, but that requires the discriminator to be part
168        # of the properties (not outside on the parent model).
169        return self.properties

def relationship_name() -> str: View Source

713        def relationship_name_method() -> str:
714            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]: View Source

706        def parent_class_method() -> Type[KilnParentModel]:
707            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None: View Source

337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.

class Chunk(pydantic.main.BaseModel): View Source

172class Chunk(BaseModel):
173    content: KilnAttachmentModel = Field(
174        description="The content of the chunk, stored as an attachment."
175    )
176
177    @field_serializer("content")
178    def serialize_content(
179        self, content: KilnAttachmentModel, info: SerializationInfo
180    ) -> dict:
181        context = info.context or {}
182        context["filename_prefix"] = "content"
183        return content.model_dump(mode="json", context=context)

!!! abstract "Usage Documentation" Models

A base class for creating Pydantic models.

Attributes: __class_vars__: The names of the class variables defined on the model. __private_attributes__: Metadata about the private attributes of the model. __signature__: The synthesized __init__ [Signature][inspect.Signature] of the model.

__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
    This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
    __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [`RootModel`][pydantic.root_model.RootModel].
__pydantic_serializer__: The `pydantic-core` `SchemaSerializer` used to dump instances of the model.
__pydantic_validator__: The `pydantic-core` `SchemaValidator` used to validate instances of the model.

__pydantic_fields__: A dictionary of field names and their corresponding [`FieldInfo`][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [`ComputedFieldInfo`][pydantic.fields.ComputedFieldInfo] objects.

__pydantic_extra__: A dictionary containing extra values, if [`extra`][pydantic.config.ConfigDict.extra]
    is set to `'allow'`.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.

content: kiln_ai.datamodel.basemodel.KilnAttachmentModel

@field_serializer('content')

def serialize_content( self, content: kiln_ai.datamodel.basemodel.KilnAttachmentModel, info: pydantic_core.core_schema.SerializationInfo) -> dict: View Source

177    @field_serializer("content")
178    def serialize_content(
179        self, content: KilnAttachmentModel, info: SerializationInfo
180    ) -> dict:
181        context = info.context or {}
182        context["filename_prefix"] = "content"
183        return content.model_dump(mode="json", context=context)

model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class ChunkedDocument(kiln_ai.datamodel.basemodel.KilnParentedModel, kiln_ai.datamodel.basemodel.KilnParentModel): View Source

186class ChunkedDocument(
187    KilnParentedModel, KilnParentModel, parent_of={"chunk_embeddings": ChunkEmbeddings}
188):
189    chunker_config_id: ID_TYPE = Field(
190        description="The ID of the chunker config used to chunk the document.",
191    )
192    chunks: List[Chunk] = Field(description="The chunks of the document.")
193
194    def parent_extraction(self) -> Union["Extraction", None]:
195        if self.parent is None or self.parent.__class__.__name__ != "Extraction":
196            return None
197        return self.parent  # type: ignore
198
199    def chunk_embeddings(self, readonly: bool = False) -> list[ChunkEmbeddings]:
200        return super().chunk_embeddings(readonly=readonly)  # type: ignore
201
202    async def load_chunks_text(self) -> list[str]:
203        """Utility to return a list of text for each chunk, loaded from each chunk's content attachment."""
204        if not self.path:
205            raise ValueError(
206                "Failed to resolve the path of chunk content attachment because the chunk does not have a path."
207            )
208
209        chunks_text: list[str] = []
210        for chunk in self.chunks:
211            full_path = chunk.content.resolve_path(self.path.parent)
212
213            try:
214                chunks_text.append(
215                    await anyio.Path(full_path).read_text(encoding="utf-8")
216                )
217            except Exception as e:
218                raise ValueError(
219                    f"Failed to read chunk content for {full_path}: {e}"
220                ) from e
221
222        return chunks_text

Base model for Kiln models that have a parent-child relationship. This base class is for child models.

This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.

Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.

chunker_config_id: Optional[str]

chunks: List[Chunk]

def parent_extraction(self) -> Optional[kiln_ai.datamodel.extraction.Extraction]: View Source

194    def parent_extraction(self) -> Union["Extraction", None]:
195        if self.parent is None or self.parent.__class__.__name__ != "Extraction":
196            return None
197        return self.parent  # type: ignore

def chunk_embeddings( self, readonly=False) -> List[kiln_ai.datamodel.embedding.ChunkEmbeddings]: View Source

695        def child_method(self, readonly: bool = False) -> list[child_class]:
696            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

async def load_chunks_text(self) -> list[str]: View Source

202    async def load_chunks_text(self) -> list[str]:
203        """Utility to return a list of text for each chunk, loaded from each chunk's content attachment."""
204        if not self.path:
205            raise ValueError(
206                "Failed to resolve the path of chunk content attachment because the chunk does not have a path."
207            )
208
209        chunks_text: list[str] = []
210        for chunk in self.chunks:
211            full_path = chunk.content.resolve_path(self.path.parent)
212
213            try:
214                chunks_text.append(
215                    await anyio.Path(full_path).read_text(encoding="utf-8")
216                )
217            except Exception as e:
218                raise ValueError(
219                    f"Failed to read chunk content for {full_path}: {e}"
220                ) from e
221
222        return chunks_text

Utility to return a list of text for each chunk, loaded from each chunk's content attachment.

def relationship_name() -> str: View Source

713        def relationship_name_method() -> str:
714            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]: View Source

706        def parent_class_method() -> Type[KilnParentModel]:
707            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None: View Source

337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.