Initial commit
This commit is contained in:
@@ -0,0 +1,25 @@
|
||||
# You can use only one of the following modules at a time.
|
||||
modules:
|
||||
# Use Directory Parse
|
||||
- module_type: langchain_parse
|
||||
file_type: all_files
|
||||
parse_method: directory
|
||||
# Use Unstructured
|
||||
- module_type: langchain_parse
|
||||
file_type: all_files
|
||||
parse_method: unstructured
|
||||
# Use Upsatge Document Parse
|
||||
- module_type: langchain_parse
|
||||
file_type: all_files
|
||||
parse_method: upstagedocumentparse
|
||||
# Use Naver Clova OCR
|
||||
- module_type: clova
|
||||
file_type: all_files
|
||||
table_detection: true
|
||||
# Use Llama Parse
|
||||
- module_type: llamaparse
|
||||
file_type: all_files
|
||||
result_type: markdown
|
||||
language: ko
|
||||
use_vendor_multimodal_model: true
|
||||
vendor_multimodal_model_name: openai-gpt-4o-mini
|
||||
@@ -0,0 +1,26 @@
|
||||
modules:
|
||||
# PDF
|
||||
- module_type: langchain_parse
|
||||
file_type: pdf
|
||||
parse_method: pdfminer
|
||||
# CSV
|
||||
- module_type: langchain_parse
|
||||
file_type: csv
|
||||
parse_method: csv
|
||||
# JSON
|
||||
- module_type: langchain_parse
|
||||
file_type: json
|
||||
parse_method: json
|
||||
jq_schema: .content
|
||||
# Markdown
|
||||
- module_type: langchain_parse
|
||||
file_type: md
|
||||
parse_method: unstructuredmarkdown
|
||||
# HTML
|
||||
- module_type: langchain_parse
|
||||
file_type: html
|
||||
parse_method: bshtml
|
||||
# XML
|
||||
- module_type: langchain_parse
|
||||
file_type: xml
|
||||
parse_method: unstructuredxml
|
||||
@@ -0,0 +1,12 @@
|
||||
modules:
|
||||
- module_type: table_hybrid_parse
|
||||
file_type: pdf
|
||||
text_parse_module: langchain_parse
|
||||
text_params:
|
||||
parse_method: pdfplumber
|
||||
table_parse_module: llamaparse
|
||||
table_params:
|
||||
result_type: markdown
|
||||
language: ko
|
||||
use_vendor_multimodal_model: true
|
||||
vendor_multimodal_model_name: openai-gpt-4o-mini
|
||||
11
autorag-workspace/example/sample_config/parse/parse_ko.yaml
Normal file
11
autorag-workspace/example/sample_config/parse/parse_ko.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
modules:
|
||||
- module_type: llama_parse
|
||||
file_type: all_files
|
||||
result_type: markdown
|
||||
language: ko
|
||||
- module_type: clova
|
||||
file_type: all_files
|
||||
table_detection: true
|
||||
- module_type: langchain_parse
|
||||
file_type: all_files
|
||||
parse_method: upstagedocumentparse
|
||||
@@ -0,0 +1,8 @@
|
||||
modules:
|
||||
- module_type: llamaparse
|
||||
file_type: all_files
|
||||
result_type: markdown
|
||||
language: ko
|
||||
use_vendor_multimodal_model: true
|
||||
vendor_multimodal_model_name: openai-gpt-4o-mini
|
||||
use_own_key: true
|
||||
10
autorag-workspace/example/sample_config/parse/parse_ocr.yaml
Normal file
10
autorag-workspace/example/sample_config/parse/parse_ocr.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
modules:
|
||||
- module_type: langchain_parse
|
||||
file_type: all_files
|
||||
parse_method: upstagedocumentparse
|
||||
- module_type: llama_parse
|
||||
file_type: all_files
|
||||
result_type: markdown
|
||||
- module_type: clova
|
||||
file_type: all_files
|
||||
table_detection: true
|
||||
@@ -0,0 +1,4 @@
|
||||
modules:
|
||||
- module_type: langchain_parse
|
||||
file_type: pdf
|
||||
parse_method: pdfminer
|
||||
Reference in New Issue
Block a user