kiln_ai.adapters.chunkers.fixed_window_chunker

 1from typing import List
 2
 3from llama_index.core.text_splitter import SentenceSplitter
 4
 5from kiln_ai.adapters.chunkers.base_chunker import (
 6    BaseChunker,
 7    ChunkingResult,
 8    TextChunk,
 9)
10from kiln_ai.datamodel.chunk import ChunkerConfig, ChunkerType
11
12
13class FixedWindowChunker(BaseChunker):
14    def __init__(self, chunker_config: ChunkerConfig):
15        if chunker_config.chunker_type != ChunkerType.FIXED_WINDOW:
16            raise ValueError("Chunker type must be FIXED_WINDOW")
17
18        super().__init__(chunker_config)
19        self.splitter = SentenceSplitter(
20            chunk_size=chunker_config.fixed_window_properties["chunk_size"],
21            chunk_overlap=chunker_config.fixed_window_properties["chunk_overlap"],
22        )
23
24    async def _chunk(self, text: str) -> ChunkingResult:
25        sentences = self.splitter.split_text(text)
26
27        chunks: List[TextChunk] = []
28        for sentence in sentences:
29            chunks.append(TextChunk(text=sentence))
30
31        return ChunkingResult(chunks=chunks)
class FixedWindowChunker(kiln_ai.adapters.chunkers.base_chunker.BaseChunker):
14class FixedWindowChunker(BaseChunker):
15    def __init__(self, chunker_config: ChunkerConfig):
16        if chunker_config.chunker_type != ChunkerType.FIXED_WINDOW:
17            raise ValueError("Chunker type must be FIXED_WINDOW")
18
19        super().__init__(chunker_config)
20        self.splitter = SentenceSplitter(
21            chunk_size=chunker_config.fixed_window_properties["chunk_size"],
22            chunk_overlap=chunker_config.fixed_window_properties["chunk_overlap"],
23        )
24
25    async def _chunk(self, text: str) -> ChunkingResult:
26        sentences = self.splitter.split_text(text)
27
28        chunks: List[TextChunk] = []
29        for sentence in sentences:
30            chunks.append(TextChunk(text=sentence))
31
32        return ChunkingResult(chunks=chunks)

Base class for all chunkers.

Should be subclassed by each chunker.

FixedWindowChunker(chunker_config: kiln_ai.datamodel.chunk.ChunkerConfig)
15    def __init__(self, chunker_config: ChunkerConfig):
16        if chunker_config.chunker_type != ChunkerType.FIXED_WINDOW:
17            raise ValueError("Chunker type must be FIXED_WINDOW")
18
19        super().__init__(chunker_config)
20        self.splitter = SentenceSplitter(
21            chunk_size=chunker_config.fixed_window_properties["chunk_size"],
22            chunk_overlap=chunker_config.fixed_window_properties["chunk_overlap"],
23        )
splitter