Initial commit

This commit is contained in:
2025-03-14 17:28:01 +09:00
commit ba9c1a4a5f
225 changed files with 22467 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
# You can use only one of the following modules at a time.
modules:
# Use Directory Parse
- module_type: langchain_parse
file_type: all_files
parse_method: directory
# Use Unstructured
- module_type: langchain_parse
file_type: all_files
parse_method: unstructured
# Use Upsatge Document Parse
- module_type: langchain_parse
file_type: all_files
parse_method: upstagedocumentparse
# Use Naver Clova OCR
- module_type: clova
file_type: all_files
table_detection: true
# Use Llama Parse
- module_type: llamaparse
file_type: all_files
result_type: markdown
language: ko
use_vendor_multimodal_model: true
vendor_multimodal_model_name: openai-gpt-4o-mini

View File

@@ -0,0 +1,26 @@
modules:
# PDF
- module_type: langchain_parse
file_type: pdf
parse_method: pdfminer
# CSV
- module_type: langchain_parse
file_type: csv
parse_method: csv
# JSON
- module_type: langchain_parse
file_type: json
parse_method: json
jq_schema: .content
# Markdown
- module_type: langchain_parse
file_type: md
parse_method: unstructuredmarkdown
# HTML
- module_type: langchain_parse
file_type: html
parse_method: bshtml
# XML
- module_type: langchain_parse
file_type: xml
parse_method: unstructuredxml

View File

@@ -0,0 +1,12 @@
modules:
- module_type: table_hybrid_parse
file_type: pdf
text_parse_module: langchain_parse
text_params:
parse_method: pdfplumber
table_parse_module: llamaparse
table_params:
result_type: markdown
language: ko
use_vendor_multimodal_model: true
vendor_multimodal_model_name: openai-gpt-4o-mini

View File

@@ -0,0 +1,11 @@
modules:
- module_type: llama_parse
file_type: all_files
result_type: markdown
language: ko
- module_type: clova
file_type: all_files
table_detection: true
- module_type: langchain_parse
file_type: all_files
parse_method: upstagedocumentparse

View File

@@ -0,0 +1,8 @@
modules:
- module_type: llamaparse
file_type: all_files
result_type: markdown
language: ko
use_vendor_multimodal_model: true
vendor_multimodal_model_name: openai-gpt-4o-mini
use_own_key: true

View File

@@ -0,0 +1,10 @@
modules:
- module_type: langchain_parse
file_type: all_files
parse_method: upstagedocumentparse
- module_type: llama_parse
file_type: all_files
result_type: markdown
- module_type: clova
file_type: all_files
table_detection: true

View File

@@ -0,0 +1,4 @@
modules:
- module_type: langchain_parse
file_type: pdf
parse_method: pdfminer