Initial commit

This commit is contained in:
2025-03-14 17:28:01 +09:00
commit ba9c1a4a5f
225 changed files with 22467 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
modules:
- module_type: llama_index_chunk
chunk_method: [ Token, Sentence ]
chunk_size: [ 1024, 512 ]
chunk_overlap: 24
add_file_name: en
- module_type: llama_index_chunk
chunk_method: [ SentenceWindow ]
window_size: 3
add_file_name: en
- module_type: llama_index_chunk
chunk_method: [ Semantic_llama_index ]
embed_model: openai
buffer_size: 1
breakpoint_percentile_threshold: 95
add_file_name: en
- module_type: llama_index_chunk
chunk_method: [ SemanticDoubleMerging ]
add_file_name: en
- module_type: llama_index_chunk
chunk_method: [ SimpleFile ]
add_file_name: en
- module_type: langchain_chunk
chunk_method: sentencetransformerstoken
- module_type: langchain_chunk
chunk_method: recursivecharacter
separators: [ " ", "\n" ]
- module_type: langchain_chunk
chunk_method: character
separator: ". "
- module_type: langchain_chunk
chunk_method: Konlpy

View File

@@ -0,0 +1,19 @@
modules:
- module_type: llama_index_chunk
chunk_method: [ Token, Sentence ]
chunk_size: [ 1024, 512 ]
chunk_overlap: 24
add_file_name: ko
- module_type: llama_index_chunk
chunk_method: [ SentenceWindow ]
sentence_splitter: kiwi
add_file_name: ko
- module_type: llama_index_chunk
chunk_method: [ Semantic_llama_index ]
embed_model: openai
add_file_name: ko
- module_type: llama_index_chunk
chunk_method: [ SimpleFile ]
add_file_name: ko
- module_type: langchain_chunk
chunk_method: KonlpyTextSplitter

View File

@@ -0,0 +1,3 @@
modules:
- module_type: llama_index_chunk
chunk_method: Token