Initial commit
This commit is contained in:
4
projects/example_01/parse/parse_config.yaml
Normal file
4
projects/example_01/parse/parse_config.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
modules:
|
||||
- module_type: langchain_parse
|
||||
file_type: pdf
|
||||
parse_method: [ pdfminer, pdfplumber, pypdfium2, pypdf, pymupdf ]
|
||||
BIN
projects/example_01/parse/parsed_result.parquet
Normal file
BIN
projects/example_01/parse/parsed_result.parquet
Normal file
Binary file not shown.
BIN
projects/example_01/parse/pdf.parquet
Normal file
BIN
projects/example_01/parse/pdf.parquet
Normal file
Binary file not shown.
6
projects/example_01/parse/summary.csv
Normal file
6
projects/example_01/parse/summary.csv
Normal file
@@ -0,0 +1,6 @@
|
||||
filename,module_name,module_params,execution_time
|
||||
pdf.parquet,langchain_parse,"{'file_type': 'pdf', 'parse_method': 'pymupdf'}",0.015248891783923638
|
||||
pdf.parquet,langchain_parse,"{'file_type': 'pdf', 'parse_method': 'pypdf'}",0.15360368810048916
|
||||
pdf.parquet,langchain_parse,"{'file_type': 'pdf', 'parse_method': 'pdfplumber'}",0.42682165052832627
|
||||
pdf.parquet,langchain_parse,"{'file_type': 'pdf', 'parse_method': 'pdfminer'}",0.44084878549343204
|
||||
pdf.parquet,langchain_parse,"{'file_type': 'pdf', 'parse_method': 'pypdfium2'}",0.008509700472761944
|
||||
|
Reference in New Issue
Block a user