kiln_ai.datamodel.chunk

  1import logging
  2from enum import Enum
  3from typing import TYPE_CHECKING, Annotated, List, Union
  4
  5import anyio
  6from pydantic import (
  7    AfterValidator,
  8    BaseModel,
  9    Field,
 10    NonNegativeInt,
 11    PositiveInt,
 12    SerializationInfo,
 13    ValidationInfo,
 14    field_serializer,
 15    model_validator,
 16)
 17from typing_extensions import Literal, TypedDict
 18
 19from kiln_ai.datamodel.basemodel import (
 20    ID_TYPE,
 21    FilenameString,
 22    KilnAttachmentModel,
 23    KilnParentedModel,
 24    KilnParentModel,
 25)
 26from kiln_ai.datamodel.embedding import ChunkEmbeddings
 27
 28logger = logging.getLogger(__name__)
 29
 30if TYPE_CHECKING:
 31    from kiln_ai.datamodel.extraction import Extraction
 32    from kiln_ai.datamodel.project import Project
 33
 34
 35class ChunkerType(str, Enum):
 36    """The type of chunking algorithm to use."""
 37
 38    FIXED_WINDOW = "fixed_window"
 39    SEMANTIC = "semantic"
 40
 41
 42class SemanticChunkerProperties(TypedDict, total=True):
 43    chunker_type: Literal[ChunkerType.SEMANTIC]
 44    embedding_config_id: str
 45    buffer_size: PositiveInt
 46    breakpoint_percentile_threshold: NonNegativeInt
 47    include_metadata: bool
 48    include_prev_next_rel: bool
 49
 50
 51class FixedWindowChunkerProperties(TypedDict, total=True):
 52    chunker_type: Literal[ChunkerType.FIXED_WINDOW]
 53    chunk_overlap: NonNegativeInt
 54    chunk_size: PositiveInt
 55
 56
 57def validate_fixed_window_chunker_properties(
 58    properties: FixedWindowChunkerProperties,
 59) -> FixedWindowChunkerProperties:
 60    """Validate the properties for the fixed window chunker and set defaults if needed."""
 61    # the typed dict only validates the shape and types, but not the logic, so we validate here
 62    if properties["chunk_overlap"] >= properties["chunk_size"]:
 63        raise ValueError("Chunk overlap must be less than chunk size.")
 64
 65    return properties
 66
 67
 68def validate_semantic_chunker_properties(
 69    properties: SemanticChunkerProperties,
 70) -> SemanticChunkerProperties:
 71    """Validate the properties for the semantic chunker."""
 72    buffer_size = properties["buffer_size"]
 73    if buffer_size < 1:
 74        raise ValueError("buffer_size must be greater than or equal to 1.")
 75
 76    breakpoint_percentile_threshold = properties["breakpoint_percentile_threshold"]
 77    if not (0 <= breakpoint_percentile_threshold <= 100):
 78        raise ValueError("breakpoint_percentile_threshold must be between 0 and 100.")
 79
 80    return properties
 81
 82
 83SemanticChunkerPropertiesValidator = Annotated[
 84    SemanticChunkerProperties,
 85    AfterValidator(lambda v: validate_semantic_chunker_properties(v)),
 86]
 87
 88FixedWindowChunkerPropertiesValidator = Annotated[
 89    FixedWindowChunkerProperties,
 90    AfterValidator(lambda v: validate_fixed_window_chunker_properties(v)),
 91]
 92
 93
 94class ChunkerConfig(KilnParentedModel):
 95    """Configuration for chunking extracted documents into smaller pieces."""
 96
 97    name: FilenameString = Field(
 98        description="A name to identify the chunker config.",
 99    )
100    description: str | None = Field(
101        default=None, description="The description of the chunker config"
102    )
103    chunker_type: ChunkerType = Field(
104        description="This is used to determine the type of chunker to use.",
105    )
106    properties: (
107        SemanticChunkerPropertiesValidator | FixedWindowChunkerPropertiesValidator
108    ) = Field(
109        description="Properties to be used to execute the chunker config. This is chunker_type specific and should serialize to a json dict.",
110        discriminator="chunker_type",
111    )
112
113    # Workaround to return typed parent without importing Project
114    def parent_project(self) -> Union["Project", None]:
115        if self.parent is None or self.parent.__class__.__name__ != "Project":
116            return None
117        return self.parent  # type: ignore
118
119    @model_validator(mode="before")
120    def upgrade_missing_discriminator_properties(
121        cls, data: dict, info: ValidationInfo
122    ) -> dict:
123        if not info.context or not info.context.get("loading_from_file", False):
124            # Not loading from file, so no need to upgrade
125            return data
126
127        if not isinstance(data, dict):
128            return data
129
130        # backward compatibility:
131        # - we originally did not have the chunker_type in the properties, so we need to add it here
132        # - we started wanted to have chunker_type in the properties to use pydantic's discriminated union feature
133        properties = data.get("properties", {})
134        if "chunker_type" not in properties:
135            # the chunker_type on the parent model is always there, we just need to add it to the properties
136            properties["chunker_type"] = data["chunker_type"]
137            data["properties"] = properties
138        return data
139
140    @model_validator(mode="after")
141    def ensure_chunker_type_matches_properties(self):
142        # sanity check to ensure the chunker_type matches the properties chunker_type
143        if self.chunker_type != self.properties["chunker_type"]:
144            raise ValueError(
145                f"Chunker type mismatch: {self.chunker_type} != {self.properties['chunker_type']}. This is a bug, please report it."
146            )
147        return self
148
149    # expose the typed properties based on the chunker_type
150    @property
151    def semantic_properties(self) -> SemanticChunkerProperties:
152        if self.properties["chunker_type"] != ChunkerType.SEMANTIC:
153            raise ValueError(
154                "Semantic properties are only available for semantic chunker."
155            )
156        # TypedDict cannot be checked at runtime, so we need to ignore the type check
157        # or cast (but it is currently banned in our linting rules). Better solution
158        # would be discriminated union, but that requires the discriminator to be part
159        # of the properties (not outside on the parent model).
160        return self.properties  # type: ignore[return-value]
161
162    @property
163    def fixed_window_properties(self) -> FixedWindowChunkerProperties:
164        if self.properties["chunker_type"] != ChunkerType.FIXED_WINDOW:
165            raise ValueError(
166                "Fixed window properties are only available for fixed window chunker."
167            )
168        # TypedDict cannot be checked at runtime, so we need to ignore the type check
169        # or cast (but it is currently banned in our linting rules). Better solution
170        # would be discriminated union, but that requires the discriminator to be part
171        # of the properties (not outside on the parent model).
172        return self.properties  # type: ignore[return-value]
173
174
175class Chunk(BaseModel):
176    """A single chunk of a document, stored as a file attachment."""
177
178    content: KilnAttachmentModel = Field(
179        description="The content of the chunk, stored as an attachment."
180    )
181
182    @field_serializer("content")
183    def serialize_content(
184        self, content: KilnAttachmentModel, info: SerializationInfo
185    ) -> dict:
186        context = info.context or {}
187        context["filename_prefix"] = "content"
188        return content.model_dump(mode="json", context=context)
189
190
191class ChunkedDocument(
192    KilnParentedModel, KilnParentModel, parent_of={"chunk_embeddings": ChunkEmbeddings}
193):
194    """A document that has been chunked, storing the resulting chunks."""
195
196    chunker_config_id: ID_TYPE = Field(
197        description="The ID of the chunker config used to chunk the document.",
198    )
199    chunks: List[Chunk] = Field(description="The chunks of the document.")
200
201    def parent_extraction(self) -> Union["Extraction", None]:
202        if self.parent is None or self.parent.__class__.__name__ != "Extraction":
203            return None
204        return self.parent  # type: ignore
205
206    def chunk_embeddings(self, readonly: bool = False) -> list[ChunkEmbeddings]:
207        return super().chunk_embeddings(readonly=readonly)  # type: ignore
208
209    async def load_chunks_text(self) -> list[str]:
210        """Utility to return a list of text for each chunk, loaded from each chunk's content attachment."""
211        if not self.path:
212            raise ValueError(
213                "Failed to resolve the path of chunk content attachment because the chunk does not have a path."
214            )
215
216        chunks_text: list[str] = []
217        for chunk in self.chunks:
218            full_path = chunk.content.resolve_path(self.path.parent)
219
220            try:
221                chunks_text.append(
222                    await anyio.Path(full_path).read_text(encoding="utf-8")
223                )
224            except Exception as e:
225                raise ValueError(
226                    f"Failed to read chunk content for {full_path}: {e}"
227                ) from e
228
229        return chunks_text
logger = <Logger kiln_ai.datamodel.chunk (WARNING)>
class ChunkerType(builtins.str, enum.Enum):
36class ChunkerType(str, Enum):
37    """The type of chunking algorithm to use."""
38
39    FIXED_WINDOW = "fixed_window"
40    SEMANTIC = "semantic"

The type of chunking algorithm to use.

FIXED_WINDOW = <ChunkerType.FIXED_WINDOW: 'fixed_window'>
SEMANTIC = <ChunkerType.SEMANTIC: 'semantic'>
class SemanticChunkerProperties(typing_extensions.TypedDict):
43class SemanticChunkerProperties(TypedDict, total=True):
44    chunker_type: Literal[ChunkerType.SEMANTIC]
45    embedding_config_id: str
46    buffer_size: PositiveInt
47    breakpoint_percentile_threshold: NonNegativeInt
48    include_metadata: bool
49    include_prev_next_rel: bool
chunker_type: Literal[<ChunkerType.SEMANTIC: 'semantic'>]
embedding_config_id: str
buffer_size: Annotated[int, Gt(gt=0)]
breakpoint_percentile_threshold: Annotated[int, Ge(ge=0)]
include_metadata: bool
include_prev_next_rel: bool
class FixedWindowChunkerProperties(typing_extensions.TypedDict):
52class FixedWindowChunkerProperties(TypedDict, total=True):
53    chunker_type: Literal[ChunkerType.FIXED_WINDOW]
54    chunk_overlap: NonNegativeInt
55    chunk_size: PositiveInt
chunker_type: Literal[<ChunkerType.FIXED_WINDOW: 'fixed_window'>]
chunk_overlap: Annotated[int, Ge(ge=0)]
chunk_size: Annotated[int, Gt(gt=0)]
def validate_fixed_window_chunker_properties( properties: FixedWindowChunkerProperties) -> FixedWindowChunkerProperties:
58def validate_fixed_window_chunker_properties(
59    properties: FixedWindowChunkerProperties,
60) -> FixedWindowChunkerProperties:
61    """Validate the properties for the fixed window chunker and set defaults if needed."""
62    # the typed dict only validates the shape and types, but not the logic, so we validate here
63    if properties["chunk_overlap"] >= properties["chunk_size"]:
64        raise ValueError("Chunk overlap must be less than chunk size.")
65
66    return properties

Validate the properties for the fixed window chunker and set defaults if needed.

def validate_semantic_chunker_properties( properties: SemanticChunkerProperties) -> SemanticChunkerProperties:
69def validate_semantic_chunker_properties(
70    properties: SemanticChunkerProperties,
71) -> SemanticChunkerProperties:
72    """Validate the properties for the semantic chunker."""
73    buffer_size = properties["buffer_size"]
74    if buffer_size < 1:
75        raise ValueError("buffer_size must be greater than or equal to 1.")
76
77    breakpoint_percentile_threshold = properties["breakpoint_percentile_threshold"]
78    if not (0 <= breakpoint_percentile_threshold <= 100):
79        raise ValueError("breakpoint_percentile_threshold must be between 0 and 100.")
80
81    return properties

Validate the properties for the semantic chunker.

SemanticChunkerPropertiesValidator = typing.Annotated[SemanticChunkerProperties, AfterValidator(func=<function <lambda>>)]
FixedWindowChunkerPropertiesValidator = typing.Annotated[FixedWindowChunkerProperties, AfterValidator(func=<function <lambda>>)]
class ChunkerConfig(kiln_ai.datamodel.basemodel.KilnParentedModel):
 95class ChunkerConfig(KilnParentedModel):
 96    """Configuration for chunking extracted documents into smaller pieces."""
 97
 98    name: FilenameString = Field(
 99        description="A name to identify the chunker config.",
100    )
101    description: str | None = Field(
102        default=None, description="The description of the chunker config"
103    )
104    chunker_type: ChunkerType = Field(
105        description="This is used to determine the type of chunker to use.",
106    )
107    properties: (
108        SemanticChunkerPropertiesValidator | FixedWindowChunkerPropertiesValidator
109    ) = Field(
110        description="Properties to be used to execute the chunker config. This is chunker_type specific and should serialize to a json dict.",
111        discriminator="chunker_type",
112    )
113
114    # Workaround to return typed parent without importing Project
115    def parent_project(self) -> Union["Project", None]:
116        if self.parent is None or self.parent.__class__.__name__ != "Project":
117            return None
118        return self.parent  # type: ignore
119
120    @model_validator(mode="before")
121    def upgrade_missing_discriminator_properties(
122        cls, data: dict, info: ValidationInfo
123    ) -> dict:
124        if not info.context or not info.context.get("loading_from_file", False):
125            # Not loading from file, so no need to upgrade
126            return data
127
128        if not isinstance(data, dict):
129            return data
130
131        # backward compatibility:
132        # - we originally did not have the chunker_type in the properties, so we need to add it here
133        # - we started wanted to have chunker_type in the properties to use pydantic's discriminated union feature
134        properties = data.get("properties", {})
135        if "chunker_type" not in properties:
136            # the chunker_type on the parent model is always there, we just need to add it to the properties
137            properties["chunker_type"] = data["chunker_type"]
138            data["properties"] = properties
139        return data
140
141    @model_validator(mode="after")
142    def ensure_chunker_type_matches_properties(self):
143        # sanity check to ensure the chunker_type matches the properties chunker_type
144        if self.chunker_type != self.properties["chunker_type"]:
145            raise ValueError(
146                f"Chunker type mismatch: {self.chunker_type} != {self.properties['chunker_type']}. This is a bug, please report it."
147            )
148        return self
149
150    # expose the typed properties based on the chunker_type
151    @property
152    def semantic_properties(self) -> SemanticChunkerProperties:
153        if self.properties["chunker_type"] != ChunkerType.SEMANTIC:
154            raise ValueError(
155                "Semantic properties are only available for semantic chunker."
156            )
157        # TypedDict cannot be checked at runtime, so we need to ignore the type check
158        # or cast (but it is currently banned in our linting rules). Better solution
159        # would be discriminated union, but that requires the discriminator to be part
160        # of the properties (not outside on the parent model).
161        return self.properties  # type: ignore[return-value]
162
163    @property
164    def fixed_window_properties(self) -> FixedWindowChunkerProperties:
165        if self.properties["chunker_type"] != ChunkerType.FIXED_WINDOW:
166            raise ValueError(
167                "Fixed window properties are only available for fixed window chunker."
168            )
169        # TypedDict cannot be checked at runtime, so we need to ignore the type check
170        # or cast (but it is currently banned in our linting rules). Better solution
171        # would be discriminated union, but that requires the discriminator to be part
172        # of the properties (not outside on the parent model).
173        return self.properties  # type: ignore[return-value]

Configuration for chunking extracted documents into smaller pieces.

name: Annotated[str, BeforeValidator(func=<function name_validator.<locals>.fn at 0x7f90236f9b20>, json_schema_input_type=PydanticUndefined), StringConstraints(strip_whitespace=None, to_upper=None, to_lower=None, strict=None, min_length=1, max_length=120, pattern=None)]
description: str | None
chunker_type: ChunkerType
properties: Union[Annotated[SemanticChunkerProperties, AfterValidator(func=<function <lambda> at 0x7f90238714e0>)], Annotated[FixedWindowChunkerProperties, AfterValidator(func=<function <lambda> at 0x7f9023871800>)]]
def parent_project(self) -> Optional[kiln_ai.datamodel.Project]:
115    def parent_project(self) -> Union["Project", None]:
116        if self.parent is None or self.parent.__class__.__name__ != "Project":
117            return None
118        return self.parent  # type: ignore
@model_validator(mode='before')
def upgrade_missing_discriminator_properties(cls, data: dict, info: pydantic_core.core_schema.ValidationInfo) -> dict:
120    @model_validator(mode="before")
121    def upgrade_missing_discriminator_properties(
122        cls, data: dict, info: ValidationInfo
123    ) -> dict:
124        if not info.context or not info.context.get("loading_from_file", False):
125            # Not loading from file, so no need to upgrade
126            return data
127
128        if not isinstance(data, dict):
129            return data
130
131        # backward compatibility:
132        # - we originally did not have the chunker_type in the properties, so we need to add it here
133        # - we started wanted to have chunker_type in the properties to use pydantic's discriminated union feature
134        properties = data.get("properties", {})
135        if "chunker_type" not in properties:
136            # the chunker_type on the parent model is always there, we just need to add it to the properties
137            properties["chunker_type"] = data["chunker_type"]
138            data["properties"] = properties
139        return data
@model_validator(mode='after')
def ensure_chunker_type_matches_properties(self):
141    @model_validator(mode="after")
142    def ensure_chunker_type_matches_properties(self):
143        # sanity check to ensure the chunker_type matches the properties chunker_type
144        if self.chunker_type != self.properties["chunker_type"]:
145            raise ValueError(
146                f"Chunker type mismatch: {self.chunker_type} != {self.properties['chunker_type']}. This is a bug, please report it."
147            )
148        return self
semantic_properties: SemanticChunkerProperties
151    @property
152    def semantic_properties(self) -> SemanticChunkerProperties:
153        if self.properties["chunker_type"] != ChunkerType.SEMANTIC:
154            raise ValueError(
155                "Semantic properties are only available for semantic chunker."
156            )
157        # TypedDict cannot be checked at runtime, so we need to ignore the type check
158        # or cast (but it is currently banned in our linting rules). Better solution
159        # would be discriminated union, but that requires the discriminator to be part
160        # of the properties (not outside on the parent model).
161        return self.properties  # type: ignore[return-value]
fixed_window_properties: FixedWindowChunkerProperties
163    @property
164    def fixed_window_properties(self) -> FixedWindowChunkerProperties:
165        if self.properties["chunker_type"] != ChunkerType.FIXED_WINDOW:
166            raise ValueError(
167                "Fixed window properties are only available for fixed window chunker."
168            )
169        # TypedDict cannot be checked at runtime, so we need to ignore the type check
170        # or cast (but it is currently banned in our linting rules). Better solution
171        # would be discriminated union, but that requires the discriminator to be part
172        # of the properties (not outside on the parent model).
173        return self.properties  # type: ignore[return-value]
def relationship_name() -> str:
761        def relationship_name_method() -> str:
762            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
754        def parent_class_method() -> Type[KilnParentModel]:
755            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.

class Chunk(pydantic.main.BaseModel):
176class Chunk(BaseModel):
177    """A single chunk of a document, stored as a file attachment."""
178
179    content: KilnAttachmentModel = Field(
180        description="The content of the chunk, stored as an attachment."
181    )
182
183    @field_serializer("content")
184    def serialize_content(
185        self, content: KilnAttachmentModel, info: SerializationInfo
186    ) -> dict:
187        context = info.context or {}
188        context["filename_prefix"] = "content"
189        return content.model_dump(mode="json", context=context)

A single chunk of a document, stored as a file attachment.

content: kiln_ai.datamodel.basemodel.KilnAttachmentModel
@field_serializer('content')
def serialize_content( self, content: kiln_ai.datamodel.basemodel.KilnAttachmentModel, info: pydantic_core.core_schema.SerializationInfo) -> dict:
183    @field_serializer("content")
184    def serialize_content(
185        self, content: KilnAttachmentModel, info: SerializationInfo
186    ) -> dict:
187        context = info.context or {}
188        context["filename_prefix"] = "content"
189        return content.model_dump(mode="json", context=context)
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class ChunkedDocument(kiln_ai.datamodel.basemodel.KilnParentedModel, kiln_ai.datamodel.basemodel.KilnParentModel):
192class ChunkedDocument(
193    KilnParentedModel, KilnParentModel, parent_of={"chunk_embeddings": ChunkEmbeddings}
194):
195    """A document that has been chunked, storing the resulting chunks."""
196
197    chunker_config_id: ID_TYPE = Field(
198        description="The ID of the chunker config used to chunk the document.",
199    )
200    chunks: List[Chunk] = Field(description="The chunks of the document.")
201
202    def parent_extraction(self) -> Union["Extraction", None]:
203        if self.parent is None or self.parent.__class__.__name__ != "Extraction":
204            return None
205        return self.parent  # type: ignore
206
207    def chunk_embeddings(self, readonly: bool = False) -> list[ChunkEmbeddings]:
208        return super().chunk_embeddings(readonly=readonly)  # type: ignore
209
210    async def load_chunks_text(self) -> list[str]:
211        """Utility to return a list of text for each chunk, loaded from each chunk's content attachment."""
212        if not self.path:
213            raise ValueError(
214                "Failed to resolve the path of chunk content attachment because the chunk does not have a path."
215            )
216
217        chunks_text: list[str] = []
218        for chunk in self.chunks:
219            full_path = chunk.content.resolve_path(self.path.parent)
220
221            try:
222                chunks_text.append(
223                    await anyio.Path(full_path).read_text(encoding="utf-8")
224                )
225            except Exception as e:
226                raise ValueError(
227                    f"Failed to read chunk content for {full_path}: {e}"
228                ) from e
229
230        return chunks_text

A document that has been chunked, storing the resulting chunks.

chunker_config_id: Optional[str]
chunks: List[Chunk]
def parent_extraction(self) -> Optional[kiln_ai.datamodel.extraction.Extraction]:
202    def parent_extraction(self) -> Union["Extraction", None]:
203        if self.parent is None or self.parent.__class__.__name__ != "Extraction":
204            return None
205        return self.parent  # type: ignore
def chunk_embeddings( self, readonly=False) -> List[kiln_ai.datamodel.embedding.ChunkEmbeddings]:
743        def child_method(self, readonly: bool = False) -> list[child_class]:  # type: ignore[invalid-type-form]
744            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

async def load_chunks_text(self) -> list[str]:
210    async def load_chunks_text(self) -> list[str]:
211        """Utility to return a list of text for each chunk, loaded from each chunk's content attachment."""
212        if not self.path:
213            raise ValueError(
214                "Failed to resolve the path of chunk content attachment because the chunk does not have a path."
215            )
216
217        chunks_text: list[str] = []
218        for chunk in self.chunks:
219            full_path = chunk.content.resolve_path(self.path.parent)
220
221            try:
222                chunks_text.append(
223                    await anyio.Path(full_path).read_text(encoding="utf-8")
224                )
225            except Exception as e:
226                raise ValueError(
227                    f"Failed to read chunk content for {full_path}: {e}"
228                ) from e
229
230        return chunks_text

Utility to return a list of text for each chunk, loaded from each chunk's content attachment.

def relationship_name() -> str:
761        def relationship_name_method() -> str:
762            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
754        def parent_class_method() -> Type[KilnParentModel]:
755            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
337def init_private_attributes(self: BaseModel, context: Any, /) -> None:
338    """This function is meant to behave like a BaseModel method to initialise private attributes.
339
340    It takes context as an argument since that's what pydantic-core passes when calling it.
341
342    Args:
343        self: The BaseModel instance.
344        context: The context.
345    """
346    if getattr(self, '__pydantic_private__', None) is None:
347        pydantic_private = {}
348        for name, private_attr in self.__private_attributes__.items():
349            default = private_attr.get_default()
350            if default is not PydanticUndefined:
351                pydantic_private[name] = default
352        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.