Initial Commit
This commit is contained in:
92
example/0/config.yaml
Executable file
92
example/0/config.yaml
Executable file
@@ -0,0 +1,92 @@
|
||||
vectordb:
|
||||
- name: chroma_dragonkue2
|
||||
db_type: chroma
|
||||
client_type: persistent
|
||||
embedding_model: huggingface_drangonku-v2-ko
|
||||
collection_name: huggingface_drangonku-v2-ko
|
||||
path: ${PROJECT_DIR}/resources/chroma
|
||||
|
||||
node_lines:
|
||||
- node_line_name: retrieve_node_line # Arbitrary node line name
|
||||
nodes:
|
||||
- node_type: retrieval
|
||||
strategy:
|
||||
metrics: [ retrieval_f1, retrieval_recall, retrieval_precision,
|
||||
retrieval_ndcg, retrieval_map, retrieval_mrr ]
|
||||
speed_threshold: 10
|
||||
top_k: 10
|
||||
modules:
|
||||
- module_type: bm25
|
||||
bm25_tokenizer: [ ko_kiwi, ko_okt ]
|
||||
- module_type: vectordb
|
||||
vectordb: chroma_dragonkue2 # chromadb
|
||||
- module_type: hybrid_cc
|
||||
normalize_method: [ mm, tmm, z, dbsf ]
|
||||
target_modules: ('bm25', 'vectordb')
|
||||
weight_range: (0.6, 0.4)
|
||||
test_weight_size: 101
|
||||
- node_type: passage_reranker # re-ranker
|
||||
strategy:
|
||||
metrics:
|
||||
- retrieval_recall
|
||||
- retrieval_precision
|
||||
- retrieval_map
|
||||
modules:
|
||||
- module_type: dragonkue2
|
||||
top_k: 5
|
||||
|
||||
- node_line_name: post_retrieve_node_line # 생성노드
|
||||
nodes:
|
||||
- node_type: prompt_maker
|
||||
strategy:
|
||||
metrics:
|
||||
- metric_name: bleu
|
||||
- metric_name: meteor
|
||||
- metric_name: rouge
|
||||
- metric_name: sem_score
|
||||
embedding_model: huggingface_drangonku-v2-ko # raise ValueError("Only one embedding model is supported")
|
||||
lang: ko
|
||||
generator_modules:
|
||||
- module_type: llama_index_llm
|
||||
llm: ollama
|
||||
model: [ gemma3:12b, phi4, deepseek-r1:14b, aya-expanse:8b ]
|
||||
request_timeout: 3000.0
|
||||
modules:
|
||||
- module_type: fstring
|
||||
prompt:
|
||||
- |
|
||||
### Task:
|
||||
Respond to the user query using the provided context.
|
||||
|
||||
### Guidelines:
|
||||
- If you don't know the answer, clearly state that.
|
||||
- If uncertain, ask the user for clarification.
|
||||
- Respond in the same language as the user's query.
|
||||
- If the context is unreadable or of poor quality, inform the user and provide the best possible answer.
|
||||
- If the answer isn't present in the context but you possess the knowledge, explain this to the user and provide the answer using your own understanding.
|
||||
- Do not use XML tags in your response.
|
||||
|
||||
### Output:
|
||||
Provide a clear and direct response to the user's query.
|
||||
|
||||
<context>
|
||||
{retrieved_contents}
|
||||
</context>
|
||||
|
||||
<user_query>
|
||||
{query}
|
||||
</user_query>
|
||||
- node_type: generator # Gen-LLM
|
||||
strategy:
|
||||
metrics:
|
||||
- metric_name: bleu
|
||||
- metric_name: meteor
|
||||
- metric_name: rouge
|
||||
- metric_name: sem_score
|
||||
modules:
|
||||
- module_type: llama_index_llm
|
||||
llm: ollama
|
||||
model: gemma3:12b # phi4, deepseek-r1:14b, aya-expanse:8b
|
||||
temperature: 0.0
|
||||
request_timeout: 30000.0
|
||||
batch: 4
|
||||
92
example/1/config.yaml
Executable file
92
example/1/config.yaml
Executable file
@@ -0,0 +1,92 @@
|
||||
vectordb:
|
||||
- name: chroma_dragonkue2
|
||||
db_type: chroma
|
||||
client_type: persistent
|
||||
embedding_model: huggingface_drangonku-v2-ko
|
||||
collection_name: huggingface_drangonku-v2-ko
|
||||
path: ${PROJECT_DIR}/resources/chroma
|
||||
|
||||
node_lines:
|
||||
- node_line_name: retrieve_node_line # Arbitrary node line name
|
||||
nodes:
|
||||
- node_type: retrieval
|
||||
strategy:
|
||||
metrics: [ retrieval_f1, retrieval_recall, retrieval_precision,
|
||||
retrieval_ndcg, retrieval_map, retrieval_mrr ]
|
||||
speed_threshold: 10
|
||||
top_k: 10
|
||||
modules:
|
||||
- module_type: bm25
|
||||
bm25_tokenizer: [ ko_kiwi ] # ko_kiwi, ko_okt
|
||||
- module_type: vectordb
|
||||
vectordb: chroma_dragonkue2 # chromadb
|
||||
- module_type: hybrid_cc
|
||||
normalize_method: [ mm, tmm, z, dbsf ]
|
||||
target_modules: ('bm25', 'vectordb')
|
||||
weight_range: (0.6, 0.4)
|
||||
test_weight_size: 101
|
||||
- node_type: passage_reranker # re-ranker
|
||||
strategy:
|
||||
metrics:
|
||||
- retrieval_recall
|
||||
- retrieval_precision
|
||||
- retrieval_map
|
||||
modules:
|
||||
- module_type: dragonkue2
|
||||
top_k: 5
|
||||
|
||||
- node_line_name: post_retrieve_node_line # 생성노드
|
||||
nodes:
|
||||
- node_type: prompt_maker
|
||||
strategy:
|
||||
metrics:
|
||||
- metric_name: bleu
|
||||
- metric_name: meteor
|
||||
- metric_name: rouge
|
||||
- metric_name: sem_score
|
||||
embedding_model: huggingface_drangonku-v2-ko # raise ValueError("Only one embedding model is supported")
|
||||
lang: ko
|
||||
generator_modules:
|
||||
- module_type: llama_index_llm
|
||||
llm: ollama
|
||||
model: gemma3:12b
|
||||
request_timeout: 3000.0
|
||||
modules:
|
||||
- module_type: fstring
|
||||
prompt:
|
||||
- |
|
||||
### 작업:
|
||||
지침에 따라 제공된 컨텍스트를 활용하여 사용자 질문에 답변하세요.
|
||||
|
||||
### 지침:
|
||||
- 답을 모를 경우, 모른다고 명확히 말하세요.
|
||||
- 확신이 없다면, 사용자에게 추가 설명을 요청하세요.
|
||||
- 사용자의 질문과 동일한 언어로 답변하세요.
|
||||
- 컨텍스트가 읽기 어렵거나 품질이 낮을 경우, 이를 사용자에게 알리고 최선의 답변을 제공하세요.
|
||||
- 컨텍스트에 답이 없지만 알고 있는 내용이라면, 이를 사용자에게 설명하고 자신의 지식을 바탕으로 답변하세요.
|
||||
- XML 태그를 사용하지 마세요.
|
||||
|
||||
### 출력:
|
||||
사용자의 질문에 대해 명확하고 직접적인 답변을 제공하세요.
|
||||
|
||||
<context>
|
||||
{retrieved_contents}
|
||||
</context>
|
||||
|
||||
<user_query>
|
||||
{query}
|
||||
</user_query>
|
||||
- node_type: generator # Gen-LLM
|
||||
strategy:
|
||||
metrics:
|
||||
- metric_name: bleu
|
||||
- metric_name: meteor
|
||||
- metric_name: rouge
|
||||
- metric_name: sem_score
|
||||
modules:
|
||||
- module_type: llama_index_llm
|
||||
llm: ollama
|
||||
model: gemma3:12b # phi4, deepseek-r1:14b, aya-expanse:8b
|
||||
temperature: 0.0
|
||||
request_timeout: 300.0
|
||||
batch: 8
|
||||
BIN
example/1/post_retrieve_node_line/generator/0.parquet
Executable file
BIN
example/1/post_retrieve_node_line/generator/0.parquet
Executable file
Binary file not shown.
BIN
example/1/post_retrieve_node_line/generator/best_0.parquet
Executable file
BIN
example/1/post_retrieve_node_line/generator/best_0.parquet
Executable file
Binary file not shown.
2
example/1/post_retrieve_node_line/generator/summary.csv
Executable file
2
example/1/post_retrieve_node_line/generator/summary.csv
Executable file
@@ -0,0 +1,2 @@
|
||||
filename,module_name,module_params,execution_time,average_output_token,bleu,meteor,rouge,sem_score,is_best
|
||||
0.parquet,LlamaIndexLLM,"{'llm': 'ollama', 'model': 'gemma3:12b', 'temperature': 0.0, 'request_timeout': 300.0, 'batch': 8}",0.8519447922706604,259.05,14.57290077698799,0.47984407229799053,0.4400396825396825,0.8177114641079747,True
|
||||
|
BIN
example/1/post_retrieve_node_line/prompt_maker/0.parquet
Executable file
BIN
example/1/post_retrieve_node_line/prompt_maker/0.parquet
Executable file
Binary file not shown.
BIN
example/1/post_retrieve_node_line/prompt_maker/best_0.parquet
Executable file
BIN
example/1/post_retrieve_node_line/prompt_maker/best_0.parquet
Executable file
Binary file not shown.
2
example/1/post_retrieve_node_line/prompt_maker/summary.csv
Executable file
2
example/1/post_retrieve_node_line/prompt_maker/summary.csv
Executable file
@@ -0,0 +1,2 @@
|
||||
filename,module_name,module_params,execution_time,average_prompt_token,is_best
|
||||
0.parquet,Fstring,"{'prompt': '### 작업: \n지침에 따라 제공된 컨텍스트를 활용하여 사용자 질문에 답변하세요. \n\n### 지침: \n- 답을 모를 경우, 모른다고 명확히 말하세요. \n- 확신이 없다면, 사용자에게 추가 설명을 요청하세요. \n- 사용자의 질문과 동일한 언어로 답변하세요. \n- 컨텍스트가 읽기 어렵거나 품질이 낮을 경우, 이를 사용자에게 알리고 최선의 답변을 제공하세요. \n- 컨텍스트에 답이 없지만 알고 있는 내용이라면, 이를 사용자에게 설명하고 자신의 지식을 바탕으로 답변하세요. \n- XML 태그를 사용하지 마세요. \n\n### 출력: \n사용자의 질문에 대해 명확하고 직접적인 답변을 제공하세요.\n\n<context>\n{retrieved_contents}\n</context>\n\n<user_query>\n{query}\n</user_query>\n'}",0.0003142237663269043,2751.85,True
|
||||
|
3
example/1/post_retrieve_node_line/summary.csv
Executable file
3
example/1/post_retrieve_node_line/summary.csv
Executable file
@@ -0,0 +1,3 @@
|
||||
node_type,best_module_filename,best_module_name,best_module_params,best_execution_time
|
||||
prompt_maker,0.parquet,Fstring,"{'prompt': '### 작업: \n지침에 따라 제공된 컨텍스트를 활용하여 사용자 질문에 답변하세요. \n\n### 지침: \n- 답을 모를 경우, 모른다고 명확히 말하세요. \n- 확신이 없다면, 사용자에게 추가 설명을 요청하세요. \n- 사용자의 질문과 동일한 언어로 답변하세요. \n- 컨텍스트가 읽기 어렵거나 품질이 낮을 경우, 이를 사용자에게 알리고 최선의 답변을 제공하세요. \n- 컨텍스트에 답이 없지만 알고 있는 내용이라면, 이를 사용자에게 설명하고 자신의 지식을 바탕으로 답변하세요. \n- XML 태그를 사용하지 마세요. \n\n### 출력: \n사용자의 질문에 대해 명확하고 직접적인 답변을 제공하세요.\n\n<context>\n{retrieved_contents}\n</context>\n\n<user_query>\n{query}\n</user_query>\n'}",0.0003142237663269
|
||||
generator,0.parquet,LlamaIndexLLM,"{'llm': 'ollama', 'model': 'gemma3:12b', 'temperature': 0.0, 'request_timeout': 300.0, 'batch': 8}",0.8519447922706604
|
||||
|
BIN
example/1/retrieve_node_line/passage_reranker/0.parquet
Executable file
BIN
example/1/retrieve_node_line/passage_reranker/0.parquet
Executable file
Binary file not shown.
BIN
example/1/retrieve_node_line/passage_reranker/best_0.parquet
Executable file
BIN
example/1/retrieve_node_line/passage_reranker/best_0.parquet
Executable file
Binary file not shown.
2
example/1/retrieve_node_line/passage_reranker/summary.csv
Executable file
2
example/1/retrieve_node_line/passage_reranker/summary.csv
Executable file
@@ -0,0 +1,2 @@
|
||||
filename,module_name,module_params,execution_time,passage_reranker_retrieval_recall,passage_reranker_retrieval_precision,passage_reranker_retrieval_map,is_best
|
||||
0.parquet,DragonKue2,{'top_k': 5},0.12188564538955689,0.3,0.06,0.18916666666666665,True
|
||||
|
BIN
example/1/retrieve_node_line/retrieval/0.parquet
Executable file
BIN
example/1/retrieve_node_line/retrieval/0.parquet
Executable file
Binary file not shown.
BIN
example/1/retrieve_node_line/retrieval/1.parquet
Executable file
BIN
example/1/retrieve_node_line/retrieval/1.parquet
Executable file
Binary file not shown.
BIN
example/1/retrieve_node_line/retrieval/2.parquet
Executable file
BIN
example/1/retrieve_node_line/retrieval/2.parquet
Executable file
Binary file not shown.
BIN
example/1/retrieve_node_line/retrieval/3.parquet
Executable file
BIN
example/1/retrieve_node_line/retrieval/3.parquet
Executable file
Binary file not shown.
BIN
example/1/retrieve_node_line/retrieval/4.parquet
Executable file
BIN
example/1/retrieve_node_line/retrieval/4.parquet
Executable file
Binary file not shown.
BIN
example/1/retrieve_node_line/retrieval/5.parquet
Executable file
BIN
example/1/retrieve_node_line/retrieval/5.parquet
Executable file
Binary file not shown.
BIN
example/1/retrieve_node_line/retrieval/best_2.parquet
Executable file
BIN
example/1/retrieve_node_line/retrieval/best_2.parquet
Executable file
Binary file not shown.
7
example/1/retrieve_node_line/retrieval/summary.csv
Executable file
7
example/1/retrieve_node_line/retrieval/summary.csv
Executable file
@@ -0,0 +1,7 @@
|
||||
filename,module_name,module_params,execution_time,retrieval_f1,retrieval_recall,retrieval_precision,retrieval_ndcg,retrieval_map,retrieval_mrr,is_best
|
||||
0.parquet,VectorDB,"{'top_k': 10, 'vectordb': 'chroma_dragonkue2'}",0.10161013603210449,0.045454545454545456,0.25,0.025,0.14013009087326042,0.10625,0.10625,False
|
||||
1.parquet,BM25,"{'top_k': 10, 'bm25_tokenizer': 'ko_kiwi'}",1.9859044432640076,0.03636363636363636,0.2,0.02,0.07248116240107563,0.034999999999999996,0.034999999999999996,False
|
||||
2.parquet,HybridCC,"{'top_k': 10, 'normalize_method': 'dbsf', 'target_modules': ('VectorDB', 'BM25'), 'weight': 0.516, 'target_module_params': ({'top_k': 10, 'vectordb': 'chroma_dragonkue2'}, {'top_k': 10, 'bm25_tokenizer': 'ko_kiwi'})}",2.087514579296112,0.06363636363636363,0.35,0.035,0.20447427813233116,0.16041666666666665,0.16041666666666665,True
|
||||
3.parquet,HybridCC,"{'top_k': 10, 'normalize_method': 'mm', 'target_modules': ('VectorDB', 'BM25'), 'weight': 0.51, 'target_module_params': ({'top_k': 10, 'vectordb': 'chroma_dragonkue2'}, {'top_k': 10, 'bm25_tokenizer': 'ko_kiwi'})}",2.087514579296112,0.06363636363636363,0.35,0.035,0.20447427813233116,0.16041666666666665,0.16041666666666665,False
|
||||
4.parquet,HybridCC,"{'top_k': 10, 'normalize_method': 'tmm', 'target_modules': ('VectorDB', 'BM25'), 'weight': 0.454, 'target_module_params': ({'top_k': 10, 'vectordb': 'chroma_dragonkue2'}, {'top_k': 10, 'bm25_tokenizer': 'ko_kiwi'})}",2.087514579296112,0.05454545454545454,0.3,0.03,0.15007396002669662,0.10499999999999998,0.10499999999999998,False
|
||||
5.parquet,HybridCC,"{'top_k': 10, 'normalize_method': 'z', 'target_modules': ('VectorDB', 'BM25'), 'weight': 0.516, 'target_module_params': ({'top_k': 10, 'vectordb': 'chroma_dragonkue2'}, {'top_k': 10, 'bm25_tokenizer': 'ko_kiwi'})}",2.087514579296112,0.06363636363636363,0.35,0.035,0.20447427813233116,0.16041666666666665,0.16041666666666665,False
|
||||
|
3
example/1/retrieve_node_line/summary.csv
Executable file
3
example/1/retrieve_node_line/summary.csv
Executable file
@@ -0,0 +1,3 @@
|
||||
node_type,best_module_filename,best_module_name,best_module_params,best_execution_time
|
||||
retrieval,2.parquet,HybridCC,"{'top_k': 10, 'normalize_method': 'dbsf', 'target_modules': ('VectorDB', 'BM25'), 'weight': 0.516, 'target_module_params': ({'top_k': 10, 'vectordb': 'chroma_dragonkue2'}, {'top_k': 10, 'bm25_tokenizer': 'ko_kiwi'})}",2.087514579296112
|
||||
passage_reranker,0.parquet,DragonKue2,{'top_k': 5},0.1218856453895568
|
||||
|
5
example/1/summary.csv
Executable file
5
example/1/summary.csv
Executable file
@@ -0,0 +1,5 @@
|
||||
node_line_name,node_type,best_module_filename,best_module_name,best_module_params,best_execution_time
|
||||
retrieve_node_line,retrieval,2.parquet,HybridCC,"{'top_k': 10, 'normalize_method': 'dbsf', 'target_modules': ('VectorDB', 'BM25'), 'weight': 0.516, 'target_module_params': ({'top_k': 10, 'vectordb': 'chroma_dragonkue2'}, {'top_k': 10, 'bm25_tokenizer': 'ko_kiwi'})}",2.087514579296112
|
||||
retrieve_node_line,passage_reranker,0.parquet,DragonKue2,{'top_k': 5},0.1218856453895568
|
||||
post_retrieve_node_line,prompt_maker,0.parquet,Fstring,"{'prompt': '### 작업: \n지침에 따라 제공된 컨텍스트를 활용하여 사용자 질문에 답변하세요. \n\n### 지침: \n- 답을 모를 경우, 모른다고 명확히 말하세요. \n- 확신이 없다면, 사용자에게 추가 설명을 요청하세요. \n- 사용자의 질문과 동일한 언어로 답변하세요. \n- 컨텍스트가 읽기 어렵거나 품질이 낮을 경우, 이를 사용자에게 알리고 최선의 답변을 제공하세요. \n- 컨텍스트에 답이 없지만 알고 있는 내용이라면, 이를 사용자에게 설명하고 자신의 지식을 바탕으로 답변하세요. \n- XML 태그를 사용하지 마세요. \n\n### 출력: \n사용자의 질문에 대해 명확하고 직접적인 답변을 제공하세요.\n\n<context>\n{retrieved_contents}\n</context>\n\n<user_query>\n{query}\n</user_query>\n'}",0.0003142237663269
|
||||
post_retrieve_node_line,generator,0.parquet,LlamaIndexLLM,"{'llm': 'ollama', 'model': 'gemma3:12b', 'temperature': 0.0, 'request_timeout': 300.0, 'batch': 8}",0.8519447922706604
|
||||
|
BIN
example/data/corpus.parquet
Executable file
BIN
example/data/corpus.parquet
Executable file
Binary file not shown.
BIN
example/data/qa.parquet
Executable file
BIN
example/data/qa.parquet
Executable file
Binary file not shown.
BIN
example/resources/bm25_ko_kiwi.pkl
Executable file
BIN
example/resources/bm25_ko_kiwi.pkl
Executable file
Binary file not shown.
BIN
example/resources/chroma/985a388c-103b-4534-a1a5-d7088ed74c0c/data_level0.bin
Executable file
BIN
example/resources/chroma/985a388c-103b-4534-a1a5-d7088ed74c0c/data_level0.bin
Executable file
Binary file not shown.
BIN
example/resources/chroma/985a388c-103b-4534-a1a5-d7088ed74c0c/header.bin
Executable file
BIN
example/resources/chroma/985a388c-103b-4534-a1a5-d7088ed74c0c/header.bin
Executable file
Binary file not shown.
Binary file not shown.
BIN
example/resources/chroma/985a388c-103b-4534-a1a5-d7088ed74c0c/length.bin
Executable file
BIN
example/resources/chroma/985a388c-103b-4534-a1a5-d7088ed74c0c/length.bin
Executable file
Binary file not shown.
BIN
example/resources/chroma/985a388c-103b-4534-a1a5-d7088ed74c0c/link_lists.bin
Executable file
BIN
example/resources/chroma/985a388c-103b-4534-a1a5-d7088ed74c0c/link_lists.bin
Executable file
Binary file not shown.
BIN
example/resources/chroma/chroma.sqlite3
Executable file
BIN
example/resources/chroma/chroma.sqlite3
Executable file
Binary file not shown.
7
example/resources/vectordb.yaml
Executable file
7
example/resources/vectordb.yaml
Executable file
@@ -0,0 +1,7 @@
|
||||
vectordb:
|
||||
- client_type: persistent
|
||||
collection_name: huggingface_drangonku-v2-ko
|
||||
db_type: chroma
|
||||
embedding_model: huggingface_drangonku-v2-ko
|
||||
name: chroma_dragonkue2
|
||||
path: ../projects/daesan-dangjin_01/benchmark/resources/chroma
|
||||
10
example/trial.json
Executable file
10
example/trial.json
Executable file
@@ -0,0 +1,10 @@
|
||||
[
|
||||
{
|
||||
"trial_name": "0",
|
||||
"start_time": "2025-03-13 07:47:00"
|
||||
},
|
||||
{
|
||||
"trial_name": "1",
|
||||
"start_time": "2025-03-13 08:03:47"
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user