Initial commit

This commit is contained in:
kyy
2025-03-14 17:33:18 +09:00
parent ba9c1a4a5f
commit 6814230bfb
61 changed files with 2087124 additions and 4 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,13 @@
modules:
- module_type: llama_index_chunk
chunk_method: Token
chunk_size: [200, 1000]
chunk_overlap: [30, 200]
add_file_name: ko
- module_type: llama_index_chunk
chunk_method: Semantic_llama_index
embed_model: hf_snowflake-arctic-embed-l-v2.0-ko
add_file_name: ko
- module_type: langchain_chunk
chunk_method: recursivecharacter
separators: [ " ", "\n" ]

View File

@@ -0,0 +1,8 @@
filename,module_name,module_params,execution_time
0.parquet,llama_index_chunk,"{'chunk_method': 'Token', 'chunk_size': 200, 'chunk_overlap': 200, 'add_file_name': 'ko'}",9.001352617045057e-05
1.parquet,llama_index_chunk,"{'chunk_method': 'Token', 'chunk_size': 200, 'chunk_overlap': 30, 'add_file_name': 'ko'}",3.7807608560139556e-05
2.parquet,llama_index_chunk,"{'chunk_method': 'Token', 'chunk_size': 1000, 'chunk_overlap': 200, 'add_file_name': 'ko'}",2.4567047525651943e-05
3.parquet,llama_index_chunk,"{'chunk_method': 'Token', 'chunk_size': 1000, 'chunk_overlap': 30, 'add_file_name': 'ko'}",2.697287288367227e-05
4.parquet,llama_index_chunk,"{'chunk_method': 'Semantic_llama_index', 'embed_model': 'hf_snowflake-arctic-embed-l-v2.0-ko', 'add_file_name': 'ko'}",0.006124294066363658
5.parquet,langchain_chunk,"{'chunk_method': 'recursivecharacter', 'separators': ' '}",1.7906897601052548e-06
6.parquet,langchain_chunk,"{'chunk_method': 'recursivecharacter', 'separators': '\n'}",2.2023862824233143e-06
1 filename module_name module_params execution_time
2 0.parquet llama_index_chunk {'chunk_method': 'Token', 'chunk_size': 200, 'chunk_overlap': 200, 'add_file_name': 'ko'} 9.001352617045057e-05
3 1.parquet llama_index_chunk {'chunk_method': 'Token', 'chunk_size': 200, 'chunk_overlap': 30, 'add_file_name': 'ko'} 3.7807608560139556e-05
4 2.parquet llama_index_chunk {'chunk_method': 'Token', 'chunk_size': 1000, 'chunk_overlap': 200, 'add_file_name': 'ko'} 2.4567047525651943e-05
5 3.parquet llama_index_chunk {'chunk_method': 'Token', 'chunk_size': 1000, 'chunk_overlap': 30, 'add_file_name': 'ko'} 2.697287288367227e-05
6 4.parquet llama_index_chunk {'chunk_method': 'Semantic_llama_index', 'embed_model': 'hf_snowflake-arctic-embed-l-v2.0-ko', 'add_file_name': 'ko'} 0.006124294066363658
7 5.parquet langchain_chunk {'chunk_method': 'recursivecharacter', 'separators': ' '} 1.7906897601052548e-06
8 6.parquet langchain_chunk {'chunk_method': 'recursivecharacter', 'separators': '\n'} 2.2023862824233143e-06