kiln_ai.datamodel.extraction
1import logging 2from enum import Enum 3from typing import TYPE_CHECKING, Any, List, Union 4 5import anyio 6from pydantic import ( 7 BaseModel, 8 Field, 9 SerializationInfo, 10 ValidationInfo, 11 computed_field, 12 field_serializer, 13 field_validator, 14 model_validator, 15) 16from typing_extensions import Self 17 18from kiln_ai.datamodel.basemodel import ( 19 ID_TYPE, 20 FilenameString, 21 KilnAttachmentModel, 22 KilnParentedModel, 23 KilnParentModel, 24) 25from kiln_ai.datamodel.chunk import ChunkedDocument 26 27logger = logging.getLogger(__name__) 28 29if TYPE_CHECKING: 30 from kiln_ai.datamodel.project import Project 31 32logger = logging.getLogger(__name__) 33 34 35class Kind(str, Enum): 36 DOCUMENT = "document" 37 IMAGE = "image" 38 VIDEO = "video" 39 AUDIO = "audio" 40 41 42class OutputFormat(str, Enum): 43 TEXT = "text/plain" 44 MARKDOWN = "text/markdown" 45 46 47class ExtractorType(str, Enum): 48 LITELLM = "litellm" 49 50 51SUPPORTED_MIME_TYPES = { 52 Kind.DOCUMENT: { 53 "application/pdf", 54 "text/plain", 55 "text/markdown", 56 "text/html", 57 "text/md", 58 }, 59 Kind.IMAGE: { 60 "image/png", 61 "image/jpeg", 62 }, 63 Kind.VIDEO: { 64 "video/mp4", 65 "video/quicktime", 66 }, 67 Kind.AUDIO: { 68 "audio/wav", 69 "audio/mpeg", 70 "audio/ogg", 71 }, 72} 73 74 75class ExtractionModel(BaseModel): 76 name: str 77 label: str 78 79 80def validate_prompt(prompt: Any, name: str): 81 if not isinstance(prompt, str): 82 raise ValueError(f"{name} must be a string.") 83 if prompt == "": 84 raise ValueError(f"{name} cannot be empty.") 85 86 87class ExtractionSource(str, Enum): 88 PROCESSED = "processed" 89 PASSTHROUGH = "passthrough" 90 91 92class Extraction( 93 KilnParentedModel, KilnParentModel, parent_of={"chunked_documents": ChunkedDocument} 94): 95 source: ExtractionSource = Field( 96 description="The source of the extraction.", 97 ) 98 extractor_config_id: ID_TYPE = Field( 99 description="The ID of the extractor config used to extract the data.", 100 ) 101 output: KilnAttachmentModel = Field( 102 description="The extraction output.", 103 ) 104 105 def parent_document(self) -> Union["Document", None]: 106 if self.parent is None or self.parent.__class__.__name__ != "Document": 107 return None 108 return self.parent # type: ignore 109 110 async def output_content(self) -> str | None: 111 if not self.path: 112 raise ValueError( 113 "Failed to resolve the path of extraction output attachment because the extraction does not have a path." 114 ) 115 116 full_path = self.output.resolve_path(self.path.parent) 117 118 try: 119 return await anyio.Path(full_path).read_text(encoding="utf-8") 120 except Exception as e: 121 logger.error( 122 f"Failed to read extraction output for {full_path}: {e}", exc_info=True 123 ) 124 raise ValueError(f"Failed to read extraction output: {e}") 125 126 def chunked_documents(self, readonly: bool = False) -> list[ChunkedDocument]: 127 return super().chunked_documents(readonly=readonly) # type: ignore 128 129 130class ExtractorConfig(KilnParentedModel): 131 name: FilenameString = Field( 132 description="A name to identify the extractor config.", 133 ) 134 is_archived: bool = Field( 135 default=False, 136 description="Whether the extractor config is archived. Archived extractor configs are not shown in the UI and are not available for use.", 137 ) 138 description: str | None = Field( 139 default=None, description="The description of the extractor config" 140 ) 141 model_provider_name: str = Field( 142 description="The name of the model provider to use for the extractor config.", 143 ) 144 model_name: str = Field( 145 description="The name of the model to use for the extractor config.", 146 ) 147 output_format: OutputFormat = Field( 148 default=OutputFormat.MARKDOWN, 149 description="The format to use for the output.", 150 ) 151 passthrough_mimetypes: list[OutputFormat] = Field( 152 default_factory=list, 153 description="If the mimetype is in this list, the extractor will not be used and the text content of the file will be returned as is.", 154 ) 155 extractor_type: ExtractorType = Field( 156 description="This is used to determine the type of extractor to use.", 157 ) 158 properties: dict[str, str | int | float | bool | dict[str, str] | None] = Field( 159 default_factory=dict, 160 description="Properties to be used to execute the extractor config. This is extractor_type specific and should serialize to a json dict.", 161 ) 162 163 @field_validator("properties") 164 @classmethod 165 def validate_properties( 166 cls, properties: dict[str, Any], info: ValidationInfo 167 ) -> dict[str, Any]: 168 def get_property(key: str) -> str: 169 value = properties.get(key) 170 if value is None or value == "" or not isinstance(value, str): 171 raise ValueError(f"Prompt for {key} must be a string") 172 return value 173 174 return { 175 "prompt_document": get_property( 176 "prompt_document", 177 ), 178 "prompt_image": get_property( 179 "prompt_image", 180 ), 181 "prompt_video": get_property( 182 "prompt_video", 183 ), 184 "prompt_audio": get_property( 185 "prompt_audio", 186 ), 187 } 188 189 def prompt_document(self) -> str | None: 190 prompt = self.properties.get("prompt_document") 191 if prompt is None: 192 return None 193 if not isinstance(prompt, str): 194 raise ValueError( 195 "Invalid prompt_document. prompt_document must be a string." 196 ) 197 return prompt 198 199 def prompt_video(self) -> str | None: 200 prompt = self.properties.get("prompt_video") 201 if prompt is None: 202 return None 203 if not isinstance(prompt, str): 204 raise ValueError("Invalid prompt_video. prompt_video must be a string.") 205 return prompt 206 207 def prompt_audio(self) -> str | None: 208 prompt = self.properties.get("prompt_audio") 209 if prompt is None: 210 return None 211 if not isinstance(prompt, str): 212 raise ValueError("Invalid prompt_audio. prompt_audio must be a string.") 213 return prompt 214 215 def prompt_image(self) -> str | None: 216 prompt = self.properties.get("prompt_image") 217 if prompt is None: 218 return None 219 if not isinstance(prompt, str): 220 raise ValueError("Invalid prompt_image. prompt_image must be a string.") 221 return prompt 222 223 # Workaround to return typed parent without importing Project 224 def parent_project(self) -> Union["Project", None]: 225 if self.parent is None or self.parent.__class__.__name__ != "Project": 226 return None 227 return self.parent # type: ignore 228 229 230class FileInfo(BaseModel): 231 filename: str = Field(description="The filename of the file") 232 233 size: int = Field(description="The size of the file in bytes") 234 235 mime_type: str = Field(description="The MIME type of the file") 236 237 attachment: KilnAttachmentModel = Field( 238 description="The attachment to the file", 239 ) 240 241 @field_serializer("attachment") 242 def serialize_attachment( 243 self, attachment: KilnAttachmentModel, info: SerializationInfo 244 ) -> dict: 245 context = info.context or {} 246 context["filename_prefix"] = "attachment" 247 return attachment.model_dump(mode="json", context=context) 248 249 @field_validator("mime_type") 250 @classmethod 251 def validate_mime_type(cls, mime_type: str, info: ValidationInfo) -> str: 252 filename = info.data.get("filename") or "" 253 254 for mime_types in SUPPORTED_MIME_TYPES.values(): 255 if mime_type in mime_types: 256 return mime_type 257 raise ValueError(f"MIME type is not supported: {mime_type} (for {filename})") 258 259 260class Document( 261 KilnParentedModel, KilnParentModel, parent_of={"extractions": Extraction} 262): 263 # this field should not be changed after creation 264 name: FilenameString = Field( 265 description="A name to identify the document.", 266 ) 267 268 # this field can be changed after creation 269 name_override: str | None = Field( 270 description="A friendly name to identify the document. This is used for display purposes and can be different from the name.", 271 default=None, 272 ) 273 274 description: str = Field(description="A description for the file") 275 276 original_file: FileInfo = Field(description="The original file") 277 278 kind: Kind = Field( 279 description="The kind of document. The kind is a broad family of filetypes that can be handled in a similar way" 280 ) 281 282 tags: List[str] = Field( 283 default_factory=list, 284 description="Tags for the document. Tags are used to categorize documents for filtering and reporting.", 285 ) 286 287 @model_validator(mode="after") 288 def validate_tags(self) -> Self: 289 for tag in self.tags: 290 if not tag: 291 raise ValueError("Tags cannot be empty strings") 292 if " " in tag: 293 raise ValueError("Tags cannot contain spaces. Try underscores.") 294 295 return self 296 297 # Workaround to return typed parent without importing Project 298 def parent_project(self) -> Union["Project", None]: 299 if self.parent is None or self.parent.__class__.__name__ != "Project": 300 return None 301 return self.parent # type: ignore 302 303 def extractions(self, readonly: bool = False) -> list[Extraction]: 304 return super().extractions(readonly=readonly) # type: ignore 305 306 @computed_field 307 @property 308 def friendly_name(self) -> str: 309 # backward compatibility: old documents did not have name_override 310 return self.name_override or self.name 311 312 313def get_kind_from_mime_type(mime_type: str) -> Kind | None: 314 for kind, mime_types in SUPPORTED_MIME_TYPES.items(): 315 if mime_type in mime_types: 316 return kind 317 return None
36class Kind(str, Enum): 37 DOCUMENT = "document" 38 IMAGE = "image" 39 VIDEO = "video" 40 AUDIO = "audio"
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.
!!! abstract "Usage Documentation" Models
A base class for creating Pydantic models.
Attributes:
__class_vars__: The names of the class variables defined on the model.
__private_attributes__: Metadata about the private attributes of the model.
__signature__: The synthesized __init__
[Signature
][inspect.Signature] of the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
__args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [`RootModel`][pydantic.root_model.RootModel].
__pydantic_serializer__: The `pydantic-core` `SchemaSerializer` used to dump instances of the model.
__pydantic_validator__: The `pydantic-core` `SchemaValidator` used to validate instances of the model.
__pydantic_fields__: A dictionary of field names and their corresponding [`FieldInfo`][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [`ComputedFieldInfo`][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_extra__: A dictionary containing extra values, if [`extra`][pydantic.config.ConfigDict.extra]
is set to `'allow'`.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.
93class Extraction( 94 KilnParentedModel, KilnParentModel, parent_of={"chunked_documents": ChunkedDocument} 95): 96 source: ExtractionSource = Field( 97 description="The source of the extraction.", 98 ) 99 extractor_config_id: ID_TYPE = Field( 100 description="The ID of the extractor config used to extract the data.", 101 ) 102 output: KilnAttachmentModel = Field( 103 description="The extraction output.", 104 ) 105 106 def parent_document(self) -> Union["Document", None]: 107 if self.parent is None or self.parent.__class__.__name__ != "Document": 108 return None 109 return self.parent # type: ignore 110 111 async def output_content(self) -> str | None: 112 if not self.path: 113 raise ValueError( 114 "Failed to resolve the path of extraction output attachment because the extraction does not have a path." 115 ) 116 117 full_path = self.output.resolve_path(self.path.parent) 118 119 try: 120 return await anyio.Path(full_path).read_text(encoding="utf-8") 121 except Exception as e: 122 logger.error( 123 f"Failed to read extraction output for {full_path}: {e}", exc_info=True 124 ) 125 raise ValueError(f"Failed to read extraction output: {e}") 126 127 def chunked_documents(self, readonly: bool = False) -> list[ChunkedDocument]: 128 return super().chunked_documents(readonly=readonly) # type: ignore
Base model for Kiln models that have a parent-child relationship. This base class is for child models.
This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.
Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.
111 async def output_content(self) -> str | None: 112 if not self.path: 113 raise ValueError( 114 "Failed to resolve the path of extraction output attachment because the extraction does not have a path." 115 ) 116 117 full_path = self.output.resolve_path(self.path.parent) 118 119 try: 120 return await anyio.Path(full_path).read_text(encoding="utf-8") 121 except Exception as e: 122 logger.error( 123 f"Failed to read extraction output for {full_path}: {e}", exc_info=True 124 ) 125 raise ValueError(f"Failed to read extraction output: {e}")
643 def child_method(self, readonly: bool = False) -> list[child_class]: 644 return child_class.all_children_of_parent_path(self.path, readonly=readonly)
The type of the None singleton.
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
337def init_private_attributes(self: BaseModel, context: Any, /) -> None: 338 """This function is meant to behave like a BaseModel method to initialise private attributes. 339 340 It takes context as an argument since that's what pydantic-core passes when calling it. 341 342 Args: 343 self: The BaseModel instance. 344 context: The context. 345 """ 346 if getattr(self, '__pydantic_private__', None) is None: 347 pydantic_private = {} 348 for name, private_attr in self.__private_attributes__.items(): 349 default = private_attr.get_default() 350 if default is not PydanticUndefined: 351 pydantic_private[name] = default 352 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.
131class ExtractorConfig(KilnParentedModel): 132 name: FilenameString = Field( 133 description="A name to identify the extractor config.", 134 ) 135 is_archived: bool = Field( 136 default=False, 137 description="Whether the extractor config is archived. Archived extractor configs are not shown in the UI and are not available for use.", 138 ) 139 description: str | None = Field( 140 default=None, description="The description of the extractor config" 141 ) 142 model_provider_name: str = Field( 143 description="The name of the model provider to use for the extractor config.", 144 ) 145 model_name: str = Field( 146 description="The name of the model to use for the extractor config.", 147 ) 148 output_format: OutputFormat = Field( 149 default=OutputFormat.MARKDOWN, 150 description="The format to use for the output.", 151 ) 152 passthrough_mimetypes: list[OutputFormat] = Field( 153 default_factory=list, 154 description="If the mimetype is in this list, the extractor will not be used and the text content of the file will be returned as is.", 155 ) 156 extractor_type: ExtractorType = Field( 157 description="This is used to determine the type of extractor to use.", 158 ) 159 properties: dict[str, str | int | float | bool | dict[str, str] | None] = Field( 160 default_factory=dict, 161 description="Properties to be used to execute the extractor config. This is extractor_type specific and should serialize to a json dict.", 162 ) 163 164 @field_validator("properties") 165 @classmethod 166 def validate_properties( 167 cls, properties: dict[str, Any], info: ValidationInfo 168 ) -> dict[str, Any]: 169 def get_property(key: str) -> str: 170 value = properties.get(key) 171 if value is None or value == "" or not isinstance(value, str): 172 raise ValueError(f"Prompt for {key} must be a string") 173 return value 174 175 return { 176 "prompt_document": get_property( 177 "prompt_document", 178 ), 179 "prompt_image": get_property( 180 "prompt_image", 181 ), 182 "prompt_video": get_property( 183 "prompt_video", 184 ), 185 "prompt_audio": get_property( 186 "prompt_audio", 187 ), 188 } 189 190 def prompt_document(self) -> str | None: 191 prompt = self.properties.get("prompt_document") 192 if prompt is None: 193 return None 194 if not isinstance(prompt, str): 195 raise ValueError( 196 "Invalid prompt_document. prompt_document must be a string." 197 ) 198 return prompt 199 200 def prompt_video(self) -> str | None: 201 prompt = self.properties.get("prompt_video") 202 if prompt is None: 203 return None 204 if not isinstance(prompt, str): 205 raise ValueError("Invalid prompt_video. prompt_video must be a string.") 206 return prompt 207 208 def prompt_audio(self) -> str | None: 209 prompt = self.properties.get("prompt_audio") 210 if prompt is None: 211 return None 212 if not isinstance(prompt, str): 213 raise ValueError("Invalid prompt_audio. prompt_audio must be a string.") 214 return prompt 215 216 def prompt_image(self) -> str | None: 217 prompt = self.properties.get("prompt_image") 218 if prompt is None: 219 return None 220 if not isinstance(prompt, str): 221 raise ValueError("Invalid prompt_image. prompt_image must be a string.") 222 return prompt 223 224 # Workaround to return typed parent without importing Project 225 def parent_project(self) -> Union["Project", None]: 226 if self.parent is None or self.parent.__class__.__name__ != "Project": 227 return None 228 return self.parent # type: ignore
Base model for Kiln models that have a parent-child relationship. This base class is for child models.
This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.
Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.
164 @field_validator("properties") 165 @classmethod 166 def validate_properties( 167 cls, properties: dict[str, Any], info: ValidationInfo 168 ) -> dict[str, Any]: 169 def get_property(key: str) -> str: 170 value = properties.get(key) 171 if value is None or value == "" or not isinstance(value, str): 172 raise ValueError(f"Prompt for {key} must be a string") 173 return value 174 175 return { 176 "prompt_document": get_property( 177 "prompt_document", 178 ), 179 "prompt_image": get_property( 180 "prompt_image", 181 ), 182 "prompt_video": get_property( 183 "prompt_video", 184 ), 185 "prompt_audio": get_property( 186 "prompt_audio", 187 ), 188 }
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
337def init_private_attributes(self: BaseModel, context: Any, /) -> None: 338 """This function is meant to behave like a BaseModel method to initialise private attributes. 339 340 It takes context as an argument since that's what pydantic-core passes when calling it. 341 342 Args: 343 self: The BaseModel instance. 344 context: The context. 345 """ 346 if getattr(self, '__pydantic_private__', None) is None: 347 pydantic_private = {} 348 for name, private_attr in self.__private_attributes__.items(): 349 default = private_attr.get_default() 350 if default is not PydanticUndefined: 351 pydantic_private[name] = default 352 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.
231class FileInfo(BaseModel): 232 filename: str = Field(description="The filename of the file") 233 234 size: int = Field(description="The size of the file in bytes") 235 236 mime_type: str = Field(description="The MIME type of the file") 237 238 attachment: KilnAttachmentModel = Field( 239 description="The attachment to the file", 240 ) 241 242 @field_serializer("attachment") 243 def serialize_attachment( 244 self, attachment: KilnAttachmentModel, info: SerializationInfo 245 ) -> dict: 246 context = info.context or {} 247 context["filename_prefix"] = "attachment" 248 return attachment.model_dump(mode="json", context=context) 249 250 @field_validator("mime_type") 251 @classmethod 252 def validate_mime_type(cls, mime_type: str, info: ValidationInfo) -> str: 253 filename = info.data.get("filename") or "" 254 255 for mime_types in SUPPORTED_MIME_TYPES.values(): 256 if mime_type in mime_types: 257 return mime_type 258 raise ValueError(f"MIME type is not supported: {mime_type} (for {filename})")
!!! abstract "Usage Documentation" Models
A base class for creating Pydantic models.
Attributes:
__class_vars__: The names of the class variables defined on the model.
__private_attributes__: Metadata about the private attributes of the model.
__signature__: The synthesized __init__
[Signature
][inspect.Signature] of the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
__args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [`RootModel`][pydantic.root_model.RootModel].
__pydantic_serializer__: The `pydantic-core` `SchemaSerializer` used to dump instances of the model.
__pydantic_validator__: The `pydantic-core` `SchemaValidator` used to validate instances of the model.
__pydantic_fields__: A dictionary of field names and their corresponding [`FieldInfo`][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [`ComputedFieldInfo`][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_extra__: A dictionary containing extra values, if [`extra`][pydantic.config.ConfigDict.extra]
is set to `'allow'`.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.
250 @field_validator("mime_type") 251 @classmethod 252 def validate_mime_type(cls, mime_type: str, info: ValidationInfo) -> str: 253 filename = info.data.get("filename") or "" 254 255 for mime_types in SUPPORTED_MIME_TYPES.values(): 256 if mime_type in mime_types: 257 return mime_type 258 raise ValueError(f"MIME type is not supported: {mime_type} (for {filename})")
261class Document( 262 KilnParentedModel, KilnParentModel, parent_of={"extractions": Extraction} 263): 264 # this field should not be changed after creation 265 name: FilenameString = Field( 266 description="A name to identify the document.", 267 ) 268 269 # this field can be changed after creation 270 name_override: str | None = Field( 271 description="A friendly name to identify the document. This is used for display purposes and can be different from the name.", 272 default=None, 273 ) 274 275 description: str = Field(description="A description for the file") 276 277 original_file: FileInfo = Field(description="The original file") 278 279 kind: Kind = Field( 280 description="The kind of document. The kind is a broad family of filetypes that can be handled in a similar way" 281 ) 282 283 tags: List[str] = Field( 284 default_factory=list, 285 description="Tags for the document. Tags are used to categorize documents for filtering and reporting.", 286 ) 287 288 @model_validator(mode="after") 289 def validate_tags(self) -> Self: 290 for tag in self.tags: 291 if not tag: 292 raise ValueError("Tags cannot be empty strings") 293 if " " in tag: 294 raise ValueError("Tags cannot contain spaces. Try underscores.") 295 296 return self 297 298 # Workaround to return typed parent without importing Project 299 def parent_project(self) -> Union["Project", None]: 300 if self.parent is None or self.parent.__class__.__name__ != "Project": 301 return None 302 return self.parent # type: ignore 303 304 def extractions(self, readonly: bool = False) -> list[Extraction]: 305 return super().extractions(readonly=readonly) # type: ignore 306 307 @computed_field 308 @property 309 def friendly_name(self) -> str: 310 # backward compatibility: old documents did not have name_override 311 return self.name_override or self.name
Base model for Kiln models that have a parent-child relationship. This base class is for child models.
This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.
Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.
643 def child_method(self, readonly: bool = False) -> list[child_class]: 644 return child_class.all_children_of_parent_path(self.path, readonly=readonly)
The type of the None singleton.
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
337def init_private_attributes(self: BaseModel, context: Any, /) -> None: 338 """This function is meant to behave like a BaseModel method to initialise private attributes. 339 340 It takes context as an argument since that's what pydantic-core passes when calling it. 341 342 Args: 343 self: The BaseModel instance. 344 context: The context. 345 """ 346 if getattr(self, '__pydantic_private__', None) is None: 347 pydantic_private = {} 348 for name, private_attr in self.__private_attributes__.items(): 349 default = private_attr.get_default() 350 if default is not PydanticUndefined: 351 pydantic_private[name] = default 352 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.