kiln_ai.adapters.chunkers.fixed_window_chunker
1from typing import List 2 3from llama_index.core.text_splitter import SentenceSplitter 4 5from kiln_ai.adapters.chunkers.base_chunker import ( 6 BaseChunker, 7 ChunkingResult, 8 TextChunk, 9) 10from kiln_ai.datamodel.chunk import ChunkerConfig, ChunkerType 11 12 13class FixedWindowChunker(BaseChunker): 14 def __init__(self, chunker_config: ChunkerConfig): 15 if chunker_config.chunker_type != ChunkerType.FIXED_WINDOW: 16 raise ValueError("Chunker type must be FIXED_WINDOW") 17 18 chunk_size = chunker_config.chunk_size() 19 if chunk_size is None: 20 raise ValueError("Chunk size must be set") 21 22 chunk_overlap = chunker_config.chunk_overlap() 23 if chunk_overlap is None: 24 raise ValueError("Chunk overlap must be set") 25 26 super().__init__(chunker_config) 27 self.splitter = SentenceSplitter( 28 chunk_size=chunk_size, 29 chunk_overlap=chunk_overlap, 30 ) 31 32 async def _chunk(self, text: str) -> ChunkingResult: 33 sentences = self.splitter.split_text(text) 34 35 chunks: List[TextChunk] = [] 36 for sentence in sentences: 37 chunks.append(TextChunk(text=sentence)) 38 39 return ChunkingResult(chunks=chunks)
14class FixedWindowChunker(BaseChunker): 15 def __init__(self, chunker_config: ChunkerConfig): 16 if chunker_config.chunker_type != ChunkerType.FIXED_WINDOW: 17 raise ValueError("Chunker type must be FIXED_WINDOW") 18 19 chunk_size = chunker_config.chunk_size() 20 if chunk_size is None: 21 raise ValueError("Chunk size must be set") 22 23 chunk_overlap = chunker_config.chunk_overlap() 24 if chunk_overlap is None: 25 raise ValueError("Chunk overlap must be set") 26 27 super().__init__(chunker_config) 28 self.splitter = SentenceSplitter( 29 chunk_size=chunk_size, 30 chunk_overlap=chunk_overlap, 31 ) 32 33 async def _chunk(self, text: str) -> ChunkingResult: 34 sentences = self.splitter.split_text(text) 35 36 chunks: List[TextChunk] = [] 37 for sentence in sentences: 38 chunks.append(TextChunk(text=sentence)) 39 40 return ChunkingResult(chunks=chunks)
Base class for all chunkers.
Should be subclassed by each chunker.
FixedWindowChunker(chunker_config: kiln_ai.datamodel.chunk.ChunkerConfig)
15 def __init__(self, chunker_config: ChunkerConfig): 16 if chunker_config.chunker_type != ChunkerType.FIXED_WINDOW: 17 raise ValueError("Chunker type must be FIXED_WINDOW") 18 19 chunk_size = chunker_config.chunk_size() 20 if chunk_size is None: 21 raise ValueError("Chunk size must be set") 22 23 chunk_overlap = chunker_config.chunk_overlap() 24 if chunk_overlap is None: 25 raise ValueError("Chunk overlap must be set") 26 27 super().__init__(chunker_config) 28 self.splitter = SentenceSplitter( 29 chunk_size=chunk_size, 30 chunk_overlap=chunk_overlap, 31 )