kiln_ai.datamodel.basemodel
1import json 2import os 3import re 4import shutil 5import uuid 6from abc import ABCMeta 7from builtins import classmethod 8from datetime import datetime 9from pathlib import Path 10from typing import ( 11 Any, 12 Dict, 13 List, 14 Optional, 15 Type, 16 TypeVar, 17) 18 19from pydantic import ( 20 BaseModel, 21 ConfigDict, 22 Field, 23 ValidationError, 24 ValidationInfo, 25 computed_field, 26 model_validator, 27) 28from pydantic_core import ErrorDetails 29from typing_extensions import Self 30 31from kiln_ai.datamodel.model_cache import ModelCache 32from kiln_ai.utils.config import Config 33from kiln_ai.utils.formatting import snake_case 34 35# ID is a 12 digit random integer string. 36# Should be unique per item, at least inside the context of a parent/child relationship. 37# Use integers to make it easier to type for a search function. 38# Allow none, even though we generate it, because we clear it in the REST API if the object is ephemeral (not persisted to disk) 39ID_FIELD = Field(default_factory=lambda: str(uuid.uuid4().int)[:12]) 40ID_TYPE = Optional[str] 41T = TypeVar("T", bound="KilnBaseModel") 42PT = TypeVar("PT", bound="KilnParentedModel") 43 44 45# Naming conventions: 46# 1) Names are filename safe as they may be used as file names. They are informational and not to be used in prompts/training/validation. 47# 2) Descrptions are for Kiln users to describe/understanding the purpose of this object. They must never be used in prompts/training/validation. Use "instruction/requirements" instead. 48 49# Filename compatible names 50NAME_REGEX = r"^[A-Za-z0-9 _-]+$" 51NAME_FIELD = Field( 52 min_length=1, 53 max_length=120, 54 pattern=NAME_REGEX, 55 description="A name for this entity.", 56) 57SHORT_NAME_FIELD = Field( 58 min_length=1, 59 max_length=32, 60 pattern=NAME_REGEX, 61 description="A name for this entity", 62) 63 64 65def string_to_valid_name(name: str) -> str: 66 # Replace any character not allowed by NAME_REGEX with an underscore 67 valid_name = re.sub(r"[^A-Za-z0-9 _-]", "_", name) 68 # Replace consecutive underscores with a single underscore 69 valid_name = re.sub(r"_+", "_", valid_name) 70 # Remove leading and trailing underscores or whitespace 71 return valid_name.strip("_").strip() 72 73 74class KilnBaseModel(BaseModel): 75 """Base model for all Kiln data models with common functionality for persistence and versioning. 76 77 Attributes: 78 v (int): Schema version number for migration support 79 id (str): Unique identifier for the model instance 80 path (Path): File system path where the model is stored 81 created_at (datetime): Timestamp when the model was created 82 created_by (str): User ID of the creator 83 """ 84 85 model_config = ConfigDict(validate_assignment=True) 86 87 v: int = Field(default=1) # schema_version 88 id: ID_TYPE = ID_FIELD 89 path: Optional[Path] = Field(default=None) 90 created_at: datetime = Field(default_factory=datetime.now) 91 created_by: str = Field(default_factory=lambda: Config.shared().user_id) 92 93 _loaded_from_file: bool = False 94 95 @computed_field() 96 def model_type(self) -> str: 97 return self.type_name() 98 99 # if changing the model name, should keep the original name here for parsing old files 100 @classmethod 101 def type_name(cls) -> str: 102 return snake_case(cls.__name__) 103 104 # used as /obj_folder/base_filename.kiln 105 @classmethod 106 def base_filename(cls) -> str: 107 return cls.type_name() + ".kiln" 108 109 @classmethod 110 def load_from_folder(cls: Type[T], folderPath: Path) -> T: 111 """Load a model instance from a folder using the default filename. 112 113 Args: 114 folderPath (Path): Directory path containing the model file 115 116 Returns: 117 T: Instance of the model 118 """ 119 path = folderPath / cls.base_filename() 120 return cls.load_from_file(path) 121 122 @classmethod 123 def load_from_file(cls: Type[T], path: Path | str) -> T: 124 """Load a model instance from a specific file path. 125 126 Args: 127 path (Path): Path to the model file 128 129 Returns: 130 T: Instance of the model 131 132 Raises: 133 ValueError: If the loaded model is not of the expected type or version 134 FileNotFoundError: If the file does not exist 135 """ 136 if isinstance(path, str): 137 path = Path(path) 138 cached_model = ModelCache.shared().get_model(path, cls) 139 if cached_model is not None: 140 return cached_model 141 with open(path, "r") as file: 142 # modified time of file for cache invalidation. From file descriptor so it's atomic w read. 143 mtime_ns = os.fstat(file.fileno()).st_mtime_ns 144 file_data = file.read() 145 parsed_json = json.loads(file_data) 146 m = cls.model_validate(parsed_json, context={"loading_from_file": True}) 147 if not isinstance(m, cls): 148 raise ValueError(f"Loaded model is not of type {cls.__name__}") 149 m._loaded_from_file = True 150 file_data = None 151 m.path = path 152 if m.v > m.max_schema_version(): 153 raise ValueError( 154 f"Cannot load from file because the schema version is higher than the current version. Upgrade kiln to the latest version. " 155 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 156 f"version: {m.v}, max version: {m.max_schema_version()}" 157 ) 158 if parsed_json["model_type"] != cls.type_name(): 159 raise ValueError( 160 f"Cannot load from file because the model type is incorrect. Expected {cls.type_name()}, got {parsed_json['model_type']}. " 161 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 162 f"version: {m.v}, max version: {m.max_schema_version()}" 163 ) 164 ModelCache.shared().set_model(path, m, mtime_ns) 165 return m 166 167 def loaded_from_file(self, info: ValidationInfo | None = None) -> bool: 168 # Two methods of indicated it's loaded from file: 169 # 1) info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file 170 # 2) self._loaded_from_file -> After loading, set by the loader 171 if ( 172 info is not None 173 and info.context is not None 174 and info.context.get("loading_from_file", False) 175 ): 176 return True 177 return self._loaded_from_file 178 179 def save_to_file(self) -> None: 180 """Save the model instance to a file. 181 182 Raises: 183 ValueError: If the path is not set 184 """ 185 path = self.build_path() 186 if path is None: 187 raise ValueError( 188 f"Cannot save to file because 'path' is not set. Class: {self.__class__.__name__}, " 189 f"id: {getattr(self, 'id', None)}, path: {path}" 190 ) 191 path.parent.mkdir(parents=True, exist_ok=True) 192 json_data = self.model_dump_json(indent=2, exclude={"path"}) 193 with open(path, "w") as file: 194 file.write(json_data) 195 # save the path so even if something like name changes, the file doesn't move 196 self.path = path 197 # We could save, but invalidating will trigger load on next use. 198 # This ensures everything in cache is loaded from disk, and the cache perfectly reflects what's on disk 199 ModelCache.shared().invalidate(path) 200 201 def delete(self) -> None: 202 if self.path is None: 203 raise ValueError("Cannot delete model because path is not set") 204 dir_path = self.path.parent if self.path.is_file() else self.path 205 if dir_path is None: 206 raise ValueError("Cannot delete model because path is not set") 207 shutil.rmtree(dir_path) 208 ModelCache.shared().invalidate(self.path) 209 self.path = None 210 211 def build_path(self) -> Path | None: 212 if self.path is not None: 213 return self.path 214 return None 215 216 # increment for breaking changes 217 def max_schema_version(self) -> int: 218 return 1 219 220 221class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta): 222 """Base model for Kiln models that have a parent-child relationship. This base class is for child models. 223 224 This class provides functionality for managing hierarchical relationships between models, 225 including parent reference handling and file system organization. 226 227 Attributes: 228 parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory. 229 """ 230 231 # Parent is an in memory only reference to parent. If it's set we use that. If not we'll try to load it from disk based on the path. 232 # We don't persist the parent reference to disk. See the accessors below for how we make it a clean api (parent accessor will lazy load from disk) 233 parent: Optional[KilnBaseModel] = Field(default=None, exclude=True) 234 235 def __getattribute__(self, name: str) -> Any: 236 if name == "parent": 237 return self.load_parent() 238 return super().__getattribute__(name) 239 240 def cached_parent(self) -> Optional[KilnBaseModel]: 241 return object.__getattribute__(self, "parent") 242 243 def load_parent(self) -> Optional[KilnBaseModel]: 244 """Get the parent model instance, loading it from disk if necessary. 245 246 Returns: 247 Optional[KilnBaseModel]: The parent model instance or None if not set 248 """ 249 cached_parent = self.cached_parent() 250 if cached_parent is not None: 251 return cached_parent 252 253 # lazy load parent from path 254 if self.path is None: 255 return None 256 # Note: this only works with base_filename. If we every support custom names, we need to change this. 257 parent_path = ( 258 self.path.parent.parent.parent 259 / self.__class__.parent_type().base_filename() 260 ) 261 if parent_path is None: 262 return None 263 loaded_parent = self.__class__.parent_type().load_from_file(parent_path) 264 self.parent = loaded_parent 265 return loaded_parent 266 267 # Dynamically implemented by KilnParentModel method injection 268 @classmethod 269 def relationship_name(cls) -> str: 270 raise NotImplementedError("Relationship name must be implemented") 271 272 # Dynamically implemented by KilnParentModel method injection 273 @classmethod 274 def parent_type(cls) -> Type[KilnBaseModel]: 275 raise NotImplementedError("Parent type must be implemented") 276 277 @model_validator(mode="after") 278 def check_parent_type(self) -> Self: 279 cached_parent = self.cached_parent() 280 if cached_parent is not None: 281 expected_parent_type = self.__class__.parent_type() 282 if not isinstance(cached_parent, expected_parent_type): 283 raise ValueError( 284 f"Parent must be of type {expected_parent_type}, but was {type(cached_parent)}" 285 ) 286 return self 287 288 def build_child_dirname(self) -> Path: 289 # Default implementation for readable folder names. 290 # {id} - {name}/{type}.kiln 291 if self.id is None: 292 # consider generating an ID here. But if it's been cleared, we've already used this without one so raise for now. 293 raise ValueError("ID is not set - can not save or build path") 294 path = self.id 295 name = getattr(self, "name", None) 296 if name is not None: 297 path = f"{path} - {name[:32]}" 298 return Path(path) 299 300 def build_path(self) -> Path | None: 301 # if specifically loaded from an existing path, keep that no matter what 302 # this ensures the file structure is easy to use with git/version control 303 # and that changes to things like name (which impacts default path) don't leave dangling files 304 if self.path is not None: 305 return self.path 306 # Build a path under parent_folder/relationship/file.kiln 307 if self.parent is None: 308 return None 309 parent_path = self.parent.build_path() 310 if parent_path is None: 311 return None 312 parent_folder = parent_path.parent 313 if parent_folder is None: 314 return None 315 return ( 316 parent_folder 317 / self.__class__.relationship_name() 318 / self.build_child_dirname() 319 / self.__class__.base_filename() 320 ) 321 322 @classmethod 323 def iterate_children_paths_of_parent_path(cls: Type[PT], parent_path: Path | None): 324 if parent_path is None: 325 # children are disk based. If not saved, they don't exist 326 return [] 327 328 # Determine the parent folder 329 if parent_path.is_file(): 330 parent_folder = parent_path.parent 331 else: 332 parent_folder = parent_path 333 334 parent = cls.parent_type().load_from_file(parent_path) 335 if parent is None: 336 raise ValueError("Parent must be set to load children") 337 338 # Ignore type error: this is abstract base class, but children must implement relationship_name 339 relationship_folder = parent_folder / Path(cls.relationship_name()) # type: ignore 340 341 if not relationship_folder.exists() or not relationship_folder.is_dir(): 342 return [] 343 344 # Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder 345 for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"): 346 yield child_file 347 348 @classmethod 349 def all_children_of_parent_path( 350 cls: Type[PT], parent_path: Path | None 351 ) -> list[PT]: 352 children = [] 353 for child_path in cls.iterate_children_paths_of_parent_path(parent_path): 354 children.append(cls.load_from_file(child_path)) 355 return children 356 357 @classmethod 358 def from_id_and_parent_path( 359 cls: Type[PT], id: str, parent_path: Path | None 360 ) -> PT | None: 361 """ 362 Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match. 363 364 Uses cache so still slow on first load. 365 """ 366 if parent_path is None: 367 return None 368 369 # Note: we're using the in-file ID. We could make this faster using the path-ID if this becomes perf bottleneck, but it's better to have 1 source of truth. 370 for child_path in cls.iterate_children_paths_of_parent_path(parent_path): 371 child_id = ModelCache.shared().get_model_id(child_path, cls) 372 if child_id == id: 373 return cls.load_from_file(child_path) 374 if child_id is None: 375 child = cls.load_from_file(child_path) 376 if child.id == id: 377 return child 378 return None 379 380 381# Parent create methods for all child relationships 382# You must pass in parent_of in the subclass definition, defining the child relationships 383class KilnParentModel(KilnBaseModel, metaclass=ABCMeta): 384 """Base model for Kiln models that can have child models. 385 386 This class provides functionality for managing collections of child models and their persistence. 387 Child relationships must be defined using the parent_of parameter in the class definition. 388 389 Args: 390 parent_of (Dict[str, Type[KilnParentedModel]]): Mapping of relationship names to child model types 391 """ 392 393 @classmethod 394 def _create_child_method( 395 cls, relationship_name: str, child_class: Type[KilnParentedModel] 396 ): 397 def child_method(self) -> list[child_class]: 398 return child_class.all_children_of_parent_path(self.path) 399 400 child_method.__name__ = relationship_name 401 child_method.__annotations__ = {"return": List[child_class]} 402 setattr(cls, relationship_name, child_method) 403 404 @classmethod 405 def _create_parent_methods( 406 cls, targetCls: Type[KilnParentedModel], relationship_name: str 407 ): 408 def parent_class_method() -> Type[KilnParentModel]: 409 return cls 410 411 parent_class_method.__name__ = "parent_type" 412 parent_class_method.__annotations__ = {"return": Type[KilnParentModel]} 413 setattr(targetCls, "parent_type", parent_class_method) 414 415 def relationship_name_method() -> str: 416 return relationship_name 417 418 relationship_name_method.__name__ = "relationship_name" 419 relationship_name_method.__annotations__ = {"return": str} 420 setattr(targetCls, "relationship_name", relationship_name_method) 421 422 @classmethod 423 def __init_subclass__(cls, parent_of: Dict[str, Type[KilnParentedModel]], **kwargs): 424 super().__init_subclass__(**kwargs) 425 cls._parent_of = parent_of 426 for relationship_name, child_class in parent_of.items(): 427 cls._create_child_method(relationship_name, child_class) 428 cls._create_parent_methods(child_class, relationship_name) 429 430 @classmethod 431 def validate_and_save_with_subrelations( 432 cls, 433 data: Dict[str, Any], 434 path: Path | None = None, 435 parent: KilnBaseModel | None = None, 436 ): 437 """Validate and save a model instance along with all its nested child relationships. 438 439 Args: 440 data (Dict[str, Any]): Model data including child relationships 441 path (Path, optional): Path where the model should be saved 442 parent (KilnBaseModel, optional): Parent model instance for parented models 443 444 Returns: 445 KilnParentModel: The validated and saved model instance 446 447 Raises: 448 ValidationError: If validation fails for the model or any of its children 449 """ 450 # Validate first, then save. Don't want error half way through, and partly persisted 451 # TODO P2: save to tmp dir, then move atomically. But need to merge directories so later. 452 cls._validate_nested(data, save=False, path=path, parent=parent) 453 instance = cls._validate_nested(data, save=True, path=path, parent=parent) 454 return instance 455 456 @classmethod 457 def _validate_nested( 458 cls, 459 data: Dict[str, Any], 460 save: bool = False, 461 parent: KilnBaseModel | None = None, 462 path: Path | None = None, 463 ): 464 # Collect all validation errors so we can report them all at once 465 validation_errors = [] 466 467 try: 468 instance = cls.model_validate(data) 469 if path is not None: 470 instance.path = path 471 if parent is not None and isinstance(instance, KilnParentedModel): 472 instance.parent = parent 473 if save: 474 instance.save_to_file() 475 except ValidationError as e: 476 instance = None 477 for suberror in e.errors(): 478 validation_errors.append(suberror) 479 480 for key, value_list in data.items(): 481 if key in cls._parent_of: 482 parent_type = cls._parent_of[key] 483 if not isinstance(value_list, list): 484 raise ValueError( 485 f"Expected a list for {key}, but got {type(value_list)}" 486 ) 487 for value_index, value in enumerate(value_list): 488 try: 489 if issubclass(parent_type, KilnParentModel): 490 kwargs = {"data": value, "save": save} 491 if instance is not None: 492 kwargs["parent"] = instance 493 parent_type._validate_nested(**kwargs) 494 elif issubclass(parent_type, KilnParentedModel): 495 # Root node 496 subinstance = parent_type.model_validate(value) 497 if instance is not None: 498 subinstance.parent = instance 499 if save: 500 subinstance.save_to_file() 501 else: 502 raise ValueError( 503 f"Invalid type {parent_type}. Should be KilnBaseModel based." 504 ) 505 except ValidationError as e: 506 for suberror in e.errors(): 507 cls._append_loc(suberror, key, value_index) 508 validation_errors.append(suberror) 509 510 if len(validation_errors) > 0: 511 raise ValidationError.from_exception_data( 512 title=f"Validation failed for {cls.__name__}", 513 line_errors=validation_errors, 514 input_type="json", 515 ) 516 517 return instance 518 519 @classmethod 520 def _append_loc( 521 cls, error: ErrorDetails, current_loc: str, value_index: int | None = None 522 ): 523 orig_loc = error["loc"] if "loc" in error else None 524 new_loc: list[str | int] = [current_loc] 525 if value_index is not None: 526 new_loc.append(value_index) 527 if isinstance(orig_loc, tuple): 528 new_loc.extend(list(orig_loc)) 529 elif isinstance(orig_loc, list): 530 new_loc.extend(orig_loc) 531 error["loc"] = tuple(new_loc)
66def string_to_valid_name(name: str) -> str: 67 # Replace any character not allowed by NAME_REGEX with an underscore 68 valid_name = re.sub(r"[^A-Za-z0-9 _-]", "_", name) 69 # Replace consecutive underscores with a single underscore 70 valid_name = re.sub(r"_+", "_", valid_name) 71 # Remove leading and trailing underscores or whitespace 72 return valid_name.strip("_").strip()
75class KilnBaseModel(BaseModel): 76 """Base model for all Kiln data models with common functionality for persistence and versioning. 77 78 Attributes: 79 v (int): Schema version number for migration support 80 id (str): Unique identifier for the model instance 81 path (Path): File system path where the model is stored 82 created_at (datetime): Timestamp when the model was created 83 created_by (str): User ID of the creator 84 """ 85 86 model_config = ConfigDict(validate_assignment=True) 87 88 v: int = Field(default=1) # schema_version 89 id: ID_TYPE = ID_FIELD 90 path: Optional[Path] = Field(default=None) 91 created_at: datetime = Field(default_factory=datetime.now) 92 created_by: str = Field(default_factory=lambda: Config.shared().user_id) 93 94 _loaded_from_file: bool = False 95 96 @computed_field() 97 def model_type(self) -> str: 98 return self.type_name() 99 100 # if changing the model name, should keep the original name here for parsing old files 101 @classmethod 102 def type_name(cls) -> str: 103 return snake_case(cls.__name__) 104 105 # used as /obj_folder/base_filename.kiln 106 @classmethod 107 def base_filename(cls) -> str: 108 return cls.type_name() + ".kiln" 109 110 @classmethod 111 def load_from_folder(cls: Type[T], folderPath: Path) -> T: 112 """Load a model instance from a folder using the default filename. 113 114 Args: 115 folderPath (Path): Directory path containing the model file 116 117 Returns: 118 T: Instance of the model 119 """ 120 path = folderPath / cls.base_filename() 121 return cls.load_from_file(path) 122 123 @classmethod 124 def load_from_file(cls: Type[T], path: Path | str) -> T: 125 """Load a model instance from a specific file path. 126 127 Args: 128 path (Path): Path to the model file 129 130 Returns: 131 T: Instance of the model 132 133 Raises: 134 ValueError: If the loaded model is not of the expected type or version 135 FileNotFoundError: If the file does not exist 136 """ 137 if isinstance(path, str): 138 path = Path(path) 139 cached_model = ModelCache.shared().get_model(path, cls) 140 if cached_model is not None: 141 return cached_model 142 with open(path, "r") as file: 143 # modified time of file for cache invalidation. From file descriptor so it's atomic w read. 144 mtime_ns = os.fstat(file.fileno()).st_mtime_ns 145 file_data = file.read() 146 parsed_json = json.loads(file_data) 147 m = cls.model_validate(parsed_json, context={"loading_from_file": True}) 148 if not isinstance(m, cls): 149 raise ValueError(f"Loaded model is not of type {cls.__name__}") 150 m._loaded_from_file = True 151 file_data = None 152 m.path = path 153 if m.v > m.max_schema_version(): 154 raise ValueError( 155 f"Cannot load from file because the schema version is higher than the current version. Upgrade kiln to the latest version. " 156 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 157 f"version: {m.v}, max version: {m.max_schema_version()}" 158 ) 159 if parsed_json["model_type"] != cls.type_name(): 160 raise ValueError( 161 f"Cannot load from file because the model type is incorrect. Expected {cls.type_name()}, got {parsed_json['model_type']}. " 162 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 163 f"version: {m.v}, max version: {m.max_schema_version()}" 164 ) 165 ModelCache.shared().set_model(path, m, mtime_ns) 166 return m 167 168 def loaded_from_file(self, info: ValidationInfo | None = None) -> bool: 169 # Two methods of indicated it's loaded from file: 170 # 1) info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file 171 # 2) self._loaded_from_file -> After loading, set by the loader 172 if ( 173 info is not None 174 and info.context is not None 175 and info.context.get("loading_from_file", False) 176 ): 177 return True 178 return self._loaded_from_file 179 180 def save_to_file(self) -> None: 181 """Save the model instance to a file. 182 183 Raises: 184 ValueError: If the path is not set 185 """ 186 path = self.build_path() 187 if path is None: 188 raise ValueError( 189 f"Cannot save to file because 'path' is not set. Class: {self.__class__.__name__}, " 190 f"id: {getattr(self, 'id', None)}, path: {path}" 191 ) 192 path.parent.mkdir(parents=True, exist_ok=True) 193 json_data = self.model_dump_json(indent=2, exclude={"path"}) 194 with open(path, "w") as file: 195 file.write(json_data) 196 # save the path so even if something like name changes, the file doesn't move 197 self.path = path 198 # We could save, but invalidating will trigger load on next use. 199 # This ensures everything in cache is loaded from disk, and the cache perfectly reflects what's on disk 200 ModelCache.shared().invalidate(path) 201 202 def delete(self) -> None: 203 if self.path is None: 204 raise ValueError("Cannot delete model because path is not set") 205 dir_path = self.path.parent if self.path.is_file() else self.path 206 if dir_path is None: 207 raise ValueError("Cannot delete model because path is not set") 208 shutil.rmtree(dir_path) 209 ModelCache.shared().invalidate(self.path) 210 self.path = None 211 212 def build_path(self) -> Path | None: 213 if self.path is not None: 214 return self.path 215 return None 216 217 # increment for breaking changes 218 def max_schema_version(self) -> int: 219 return 1
Base model for all Kiln data models with common functionality for persistence and versioning.
Attributes: v (int): Schema version number for migration support id (str): Unique identifier for the model instance path (Path): File system path where the model is stored created_at (datetime): Timestamp when the model was created created_by (str): User ID of the creator
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
110 @classmethod 111 def load_from_folder(cls: Type[T], folderPath: Path) -> T: 112 """Load a model instance from a folder using the default filename. 113 114 Args: 115 folderPath (Path): Directory path containing the model file 116 117 Returns: 118 T: Instance of the model 119 """ 120 path = folderPath / cls.base_filename() 121 return cls.load_from_file(path)
Load a model instance from a folder using the default filename.
Args: folderPath (Path): Directory path containing the model file
Returns: T: Instance of the model
123 @classmethod 124 def load_from_file(cls: Type[T], path: Path | str) -> T: 125 """Load a model instance from a specific file path. 126 127 Args: 128 path (Path): Path to the model file 129 130 Returns: 131 T: Instance of the model 132 133 Raises: 134 ValueError: If the loaded model is not of the expected type or version 135 FileNotFoundError: If the file does not exist 136 """ 137 if isinstance(path, str): 138 path = Path(path) 139 cached_model = ModelCache.shared().get_model(path, cls) 140 if cached_model is not None: 141 return cached_model 142 with open(path, "r") as file: 143 # modified time of file for cache invalidation. From file descriptor so it's atomic w read. 144 mtime_ns = os.fstat(file.fileno()).st_mtime_ns 145 file_data = file.read() 146 parsed_json = json.loads(file_data) 147 m = cls.model_validate(parsed_json, context={"loading_from_file": True}) 148 if not isinstance(m, cls): 149 raise ValueError(f"Loaded model is not of type {cls.__name__}") 150 m._loaded_from_file = True 151 file_data = None 152 m.path = path 153 if m.v > m.max_schema_version(): 154 raise ValueError( 155 f"Cannot load from file because the schema version is higher than the current version. Upgrade kiln to the latest version. " 156 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 157 f"version: {m.v}, max version: {m.max_schema_version()}" 158 ) 159 if parsed_json["model_type"] != cls.type_name(): 160 raise ValueError( 161 f"Cannot load from file because the model type is incorrect. Expected {cls.type_name()}, got {parsed_json['model_type']}. " 162 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 163 f"version: {m.v}, max version: {m.max_schema_version()}" 164 ) 165 ModelCache.shared().set_model(path, m, mtime_ns) 166 return m
Load a model instance from a specific file path.
Args: path (Path): Path to the model file
Returns: T: Instance of the model
Raises: ValueError: If the loaded model is not of the expected type or version FileNotFoundError: If the file does not exist
168 def loaded_from_file(self, info: ValidationInfo | None = None) -> bool: 169 # Two methods of indicated it's loaded from file: 170 # 1) info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file 171 # 2) self._loaded_from_file -> After loading, set by the loader 172 if ( 173 info is not None 174 and info.context is not None 175 and info.context.get("loading_from_file", False) 176 ): 177 return True 178 return self._loaded_from_file
180 def save_to_file(self) -> None: 181 """Save the model instance to a file. 182 183 Raises: 184 ValueError: If the path is not set 185 """ 186 path = self.build_path() 187 if path is None: 188 raise ValueError( 189 f"Cannot save to file because 'path' is not set. Class: {self.__class__.__name__}, " 190 f"id: {getattr(self, 'id', None)}, path: {path}" 191 ) 192 path.parent.mkdir(parents=True, exist_ok=True) 193 json_data = self.model_dump_json(indent=2, exclude={"path"}) 194 with open(path, "w") as file: 195 file.write(json_data) 196 # save the path so even if something like name changes, the file doesn't move 197 self.path = path 198 # We could save, but invalidating will trigger load on next use. 199 # This ensures everything in cache is loaded from disk, and the cache perfectly reflects what's on disk 200 ModelCache.shared().invalidate(path)
Save the model instance to a file.
Raises: ValueError: If the path is not set
202 def delete(self) -> None: 203 if self.path is None: 204 raise ValueError("Cannot delete model because path is not set") 205 dir_path = self.path.parent if self.path.is_file() else self.path 206 if dir_path is None: 207 raise ValueError("Cannot delete model because path is not set") 208 shutil.rmtree(dir_path) 209 ModelCache.shared().invalidate(self.path) 210 self.path = None
384def init_private_attributes(self: BaseModel, context: Any, /) -> None: 385 """This function is meant to behave like a BaseModel method to initialise private attributes. 386 387 It takes context as an argument since that's what pydantic-core passes when calling it. 388 389 Args: 390 self: The BaseModel instance. 391 context: The context. 392 """ 393 if getattr(self, '__pydantic_private__', None) is None: 394 pydantic_private = {} 395 for name, private_attr in self.__private_attributes__.items(): 396 default = private_attr.get_default() 397 if default is not PydanticUndefined: 398 pydantic_private[name] = default 399 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.
222class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta): 223 """Base model for Kiln models that have a parent-child relationship. This base class is for child models. 224 225 This class provides functionality for managing hierarchical relationships between models, 226 including parent reference handling and file system organization. 227 228 Attributes: 229 parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory. 230 """ 231 232 # Parent is an in memory only reference to parent. If it's set we use that. If not we'll try to load it from disk based on the path. 233 # We don't persist the parent reference to disk. See the accessors below for how we make it a clean api (parent accessor will lazy load from disk) 234 parent: Optional[KilnBaseModel] = Field(default=None, exclude=True) 235 236 def __getattribute__(self, name: str) -> Any: 237 if name == "parent": 238 return self.load_parent() 239 return super().__getattribute__(name) 240 241 def cached_parent(self) -> Optional[KilnBaseModel]: 242 return object.__getattribute__(self, "parent") 243 244 def load_parent(self) -> Optional[KilnBaseModel]: 245 """Get the parent model instance, loading it from disk if necessary. 246 247 Returns: 248 Optional[KilnBaseModel]: The parent model instance or None if not set 249 """ 250 cached_parent = self.cached_parent() 251 if cached_parent is not None: 252 return cached_parent 253 254 # lazy load parent from path 255 if self.path is None: 256 return None 257 # Note: this only works with base_filename. If we every support custom names, we need to change this. 258 parent_path = ( 259 self.path.parent.parent.parent 260 / self.__class__.parent_type().base_filename() 261 ) 262 if parent_path is None: 263 return None 264 loaded_parent = self.__class__.parent_type().load_from_file(parent_path) 265 self.parent = loaded_parent 266 return loaded_parent 267 268 # Dynamically implemented by KilnParentModel method injection 269 @classmethod 270 def relationship_name(cls) -> str: 271 raise NotImplementedError("Relationship name must be implemented") 272 273 # Dynamically implemented by KilnParentModel method injection 274 @classmethod 275 def parent_type(cls) -> Type[KilnBaseModel]: 276 raise NotImplementedError("Parent type must be implemented") 277 278 @model_validator(mode="after") 279 def check_parent_type(self) -> Self: 280 cached_parent = self.cached_parent() 281 if cached_parent is not None: 282 expected_parent_type = self.__class__.parent_type() 283 if not isinstance(cached_parent, expected_parent_type): 284 raise ValueError( 285 f"Parent must be of type {expected_parent_type}, but was {type(cached_parent)}" 286 ) 287 return self 288 289 def build_child_dirname(self) -> Path: 290 # Default implementation for readable folder names. 291 # {id} - {name}/{type}.kiln 292 if self.id is None: 293 # consider generating an ID here. But if it's been cleared, we've already used this without one so raise for now. 294 raise ValueError("ID is not set - can not save or build path") 295 path = self.id 296 name = getattr(self, "name", None) 297 if name is not None: 298 path = f"{path} - {name[:32]}" 299 return Path(path) 300 301 def build_path(self) -> Path | None: 302 # if specifically loaded from an existing path, keep that no matter what 303 # this ensures the file structure is easy to use with git/version control 304 # and that changes to things like name (which impacts default path) don't leave dangling files 305 if self.path is not None: 306 return self.path 307 # Build a path under parent_folder/relationship/file.kiln 308 if self.parent is None: 309 return None 310 parent_path = self.parent.build_path() 311 if parent_path is None: 312 return None 313 parent_folder = parent_path.parent 314 if parent_folder is None: 315 return None 316 return ( 317 parent_folder 318 / self.__class__.relationship_name() 319 / self.build_child_dirname() 320 / self.__class__.base_filename() 321 ) 322 323 @classmethod 324 def iterate_children_paths_of_parent_path(cls: Type[PT], parent_path: Path | None): 325 if parent_path is None: 326 # children are disk based. If not saved, they don't exist 327 return [] 328 329 # Determine the parent folder 330 if parent_path.is_file(): 331 parent_folder = parent_path.parent 332 else: 333 parent_folder = parent_path 334 335 parent = cls.parent_type().load_from_file(parent_path) 336 if parent is None: 337 raise ValueError("Parent must be set to load children") 338 339 # Ignore type error: this is abstract base class, but children must implement relationship_name 340 relationship_folder = parent_folder / Path(cls.relationship_name()) # type: ignore 341 342 if not relationship_folder.exists() or not relationship_folder.is_dir(): 343 return [] 344 345 # Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder 346 for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"): 347 yield child_file 348 349 @classmethod 350 def all_children_of_parent_path( 351 cls: Type[PT], parent_path: Path | None 352 ) -> list[PT]: 353 children = [] 354 for child_path in cls.iterate_children_paths_of_parent_path(parent_path): 355 children.append(cls.load_from_file(child_path)) 356 return children 357 358 @classmethod 359 def from_id_and_parent_path( 360 cls: Type[PT], id: str, parent_path: Path | None 361 ) -> PT | None: 362 """ 363 Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match. 364 365 Uses cache so still slow on first load. 366 """ 367 if parent_path is None: 368 return None 369 370 # Note: we're using the in-file ID. We could make this faster using the path-ID if this becomes perf bottleneck, but it's better to have 1 source of truth. 371 for child_path in cls.iterate_children_paths_of_parent_path(parent_path): 372 child_id = ModelCache.shared().get_model_id(child_path, cls) 373 if child_id == id: 374 return cls.load_from_file(child_path) 375 if child_id is None: 376 child = cls.load_from_file(child_path) 377 if child.id == id: 378 return child 379 return None
Base model for Kiln models that have a parent-child relationship. This base class is for child models.
This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.
Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.
244 def load_parent(self) -> Optional[KilnBaseModel]: 245 """Get the parent model instance, loading it from disk if necessary. 246 247 Returns: 248 Optional[KilnBaseModel]: The parent model instance or None if not set 249 """ 250 cached_parent = self.cached_parent() 251 if cached_parent is not None: 252 return cached_parent 253 254 # lazy load parent from path 255 if self.path is None: 256 return None 257 # Note: this only works with base_filename. If we every support custom names, we need to change this. 258 parent_path = ( 259 self.path.parent.parent.parent 260 / self.__class__.parent_type().base_filename() 261 ) 262 if parent_path is None: 263 return None 264 loaded_parent = self.__class__.parent_type().load_from_file(parent_path) 265 self.parent = loaded_parent 266 return loaded_parent
Get the parent model instance, loading it from disk if necessary.
Returns: Optional[KilnBaseModel]: The parent model instance or None if not set
278 @model_validator(mode="after") 279 def check_parent_type(self) -> Self: 280 cached_parent = self.cached_parent() 281 if cached_parent is not None: 282 expected_parent_type = self.__class__.parent_type() 283 if not isinstance(cached_parent, expected_parent_type): 284 raise ValueError( 285 f"Parent must be of type {expected_parent_type}, but was {type(cached_parent)}" 286 ) 287 return self
289 def build_child_dirname(self) -> Path: 290 # Default implementation for readable folder names. 291 # {id} - {name}/{type}.kiln 292 if self.id is None: 293 # consider generating an ID here. But if it's been cleared, we've already used this without one so raise for now. 294 raise ValueError("ID is not set - can not save or build path") 295 path = self.id 296 name = getattr(self, "name", None) 297 if name is not None: 298 path = f"{path} - {name[:32]}" 299 return Path(path)
301 def build_path(self) -> Path | None: 302 # if specifically loaded from an existing path, keep that no matter what 303 # this ensures the file structure is easy to use with git/version control 304 # and that changes to things like name (which impacts default path) don't leave dangling files 305 if self.path is not None: 306 return self.path 307 # Build a path under parent_folder/relationship/file.kiln 308 if self.parent is None: 309 return None 310 parent_path = self.parent.build_path() 311 if parent_path is None: 312 return None 313 parent_folder = parent_path.parent 314 if parent_folder is None: 315 return None 316 return ( 317 parent_folder 318 / self.__class__.relationship_name() 319 / self.build_child_dirname() 320 / self.__class__.base_filename() 321 )
323 @classmethod 324 def iterate_children_paths_of_parent_path(cls: Type[PT], parent_path: Path | None): 325 if parent_path is None: 326 # children are disk based. If not saved, they don't exist 327 return [] 328 329 # Determine the parent folder 330 if parent_path.is_file(): 331 parent_folder = parent_path.parent 332 else: 333 parent_folder = parent_path 334 335 parent = cls.parent_type().load_from_file(parent_path) 336 if parent is None: 337 raise ValueError("Parent must be set to load children") 338 339 # Ignore type error: this is abstract base class, but children must implement relationship_name 340 relationship_folder = parent_folder / Path(cls.relationship_name()) # type: ignore 341 342 if not relationship_folder.exists() or not relationship_folder.is_dir(): 343 return [] 344 345 # Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder 346 for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"): 347 yield child_file
358 @classmethod 359 def from_id_and_parent_path( 360 cls: Type[PT], id: str, parent_path: Path | None 361 ) -> PT | None: 362 """ 363 Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match. 364 365 Uses cache so still slow on first load. 366 """ 367 if parent_path is None: 368 return None 369 370 # Note: we're using the in-file ID. We could make this faster using the path-ID if this becomes perf bottleneck, but it's better to have 1 source of truth. 371 for child_path in cls.iterate_children_paths_of_parent_path(parent_path): 372 child_id = ModelCache.shared().get_model_id(child_path, cls) 373 if child_id == id: 374 return cls.load_from_file(child_path) 375 if child_id is None: 376 child = cls.load_from_file(child_path) 377 if child.id == id: 378 return child 379 return None
Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match.
Uses cache so still slow on first load.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
384class KilnParentModel(KilnBaseModel, metaclass=ABCMeta): 385 """Base model for Kiln models that can have child models. 386 387 This class provides functionality for managing collections of child models and their persistence. 388 Child relationships must be defined using the parent_of parameter in the class definition. 389 390 Args: 391 parent_of (Dict[str, Type[KilnParentedModel]]): Mapping of relationship names to child model types 392 """ 393 394 @classmethod 395 def _create_child_method( 396 cls, relationship_name: str, child_class: Type[KilnParentedModel] 397 ): 398 def child_method(self) -> list[child_class]: 399 return child_class.all_children_of_parent_path(self.path) 400 401 child_method.__name__ = relationship_name 402 child_method.__annotations__ = {"return": List[child_class]} 403 setattr(cls, relationship_name, child_method) 404 405 @classmethod 406 def _create_parent_methods( 407 cls, targetCls: Type[KilnParentedModel], relationship_name: str 408 ): 409 def parent_class_method() -> Type[KilnParentModel]: 410 return cls 411 412 parent_class_method.__name__ = "parent_type" 413 parent_class_method.__annotations__ = {"return": Type[KilnParentModel]} 414 setattr(targetCls, "parent_type", parent_class_method) 415 416 def relationship_name_method() -> str: 417 return relationship_name 418 419 relationship_name_method.__name__ = "relationship_name" 420 relationship_name_method.__annotations__ = {"return": str} 421 setattr(targetCls, "relationship_name", relationship_name_method) 422 423 @classmethod 424 def __init_subclass__(cls, parent_of: Dict[str, Type[KilnParentedModel]], **kwargs): 425 super().__init_subclass__(**kwargs) 426 cls._parent_of = parent_of 427 for relationship_name, child_class in parent_of.items(): 428 cls._create_child_method(relationship_name, child_class) 429 cls._create_parent_methods(child_class, relationship_name) 430 431 @classmethod 432 def validate_and_save_with_subrelations( 433 cls, 434 data: Dict[str, Any], 435 path: Path | None = None, 436 parent: KilnBaseModel | None = None, 437 ): 438 """Validate and save a model instance along with all its nested child relationships. 439 440 Args: 441 data (Dict[str, Any]): Model data including child relationships 442 path (Path, optional): Path where the model should be saved 443 parent (KilnBaseModel, optional): Parent model instance for parented models 444 445 Returns: 446 KilnParentModel: The validated and saved model instance 447 448 Raises: 449 ValidationError: If validation fails for the model or any of its children 450 """ 451 # Validate first, then save. Don't want error half way through, and partly persisted 452 # TODO P2: save to tmp dir, then move atomically. But need to merge directories so later. 453 cls._validate_nested(data, save=False, path=path, parent=parent) 454 instance = cls._validate_nested(data, save=True, path=path, parent=parent) 455 return instance 456 457 @classmethod 458 def _validate_nested( 459 cls, 460 data: Dict[str, Any], 461 save: bool = False, 462 parent: KilnBaseModel | None = None, 463 path: Path | None = None, 464 ): 465 # Collect all validation errors so we can report them all at once 466 validation_errors = [] 467 468 try: 469 instance = cls.model_validate(data) 470 if path is not None: 471 instance.path = path 472 if parent is not None and isinstance(instance, KilnParentedModel): 473 instance.parent = parent 474 if save: 475 instance.save_to_file() 476 except ValidationError as e: 477 instance = None 478 for suberror in e.errors(): 479 validation_errors.append(suberror) 480 481 for key, value_list in data.items(): 482 if key in cls._parent_of: 483 parent_type = cls._parent_of[key] 484 if not isinstance(value_list, list): 485 raise ValueError( 486 f"Expected a list for {key}, but got {type(value_list)}" 487 ) 488 for value_index, value in enumerate(value_list): 489 try: 490 if issubclass(parent_type, KilnParentModel): 491 kwargs = {"data": value, "save": save} 492 if instance is not None: 493 kwargs["parent"] = instance 494 parent_type._validate_nested(**kwargs) 495 elif issubclass(parent_type, KilnParentedModel): 496 # Root node 497 subinstance = parent_type.model_validate(value) 498 if instance is not None: 499 subinstance.parent = instance 500 if save: 501 subinstance.save_to_file() 502 else: 503 raise ValueError( 504 f"Invalid type {parent_type}. Should be KilnBaseModel based." 505 ) 506 except ValidationError as e: 507 for suberror in e.errors(): 508 cls._append_loc(suberror, key, value_index) 509 validation_errors.append(suberror) 510 511 if len(validation_errors) > 0: 512 raise ValidationError.from_exception_data( 513 title=f"Validation failed for {cls.__name__}", 514 line_errors=validation_errors, 515 input_type="json", 516 ) 517 518 return instance 519 520 @classmethod 521 def _append_loc( 522 cls, error: ErrorDetails, current_loc: str, value_index: int | None = None 523 ): 524 orig_loc = error["loc"] if "loc" in error else None 525 new_loc: list[str | int] = [current_loc] 526 if value_index is not None: 527 new_loc.append(value_index) 528 if isinstance(orig_loc, tuple): 529 new_loc.extend(list(orig_loc)) 530 elif isinstance(orig_loc, list): 531 new_loc.extend(orig_loc) 532 error["loc"] = tuple(new_loc)
Base model for Kiln models that can have child models.
This class provides functionality for managing collections of child models and their persistence. Child relationships must be defined using the parent_of parameter in the class definition.
Args: parent_of (Dict[str, Type[KilnParentedModel]]): Mapping of relationship names to child model types
431 @classmethod 432 def validate_and_save_with_subrelations( 433 cls, 434 data: Dict[str, Any], 435 path: Path | None = None, 436 parent: KilnBaseModel | None = None, 437 ): 438 """Validate and save a model instance along with all its nested child relationships. 439 440 Args: 441 data (Dict[str, Any]): Model data including child relationships 442 path (Path, optional): Path where the model should be saved 443 parent (KilnBaseModel, optional): Parent model instance for parented models 444 445 Returns: 446 KilnParentModel: The validated and saved model instance 447 448 Raises: 449 ValidationError: If validation fails for the model or any of its children 450 """ 451 # Validate first, then save. Don't want error half way through, and partly persisted 452 # TODO P2: save to tmp dir, then move atomically. But need to merge directories so later. 453 cls._validate_nested(data, save=False, path=path, parent=parent) 454 instance = cls._validate_nested(data, save=True, path=path, parent=parent) 455 return instance
Validate and save a model instance along with all its nested child relationships.
Args: data (Dict[str, Any]): Model data including child relationships path (Path, optional): Path where the model should be saved parent (KilnBaseModel, optional): Parent model instance for parented models
Returns: KilnParentModel: The validated and saved model instance
Raises: ValidationError: If validation fails for the model or any of its children
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.