53 lines
1.2 KiB
Python
53 lines
1.2 KiB
Python
from langchain_teddynote.document_loaders import HWPLoader
|
|
from markitdown import MarkItDown
|
|
|
|
|
|
def convert_hwp_to_md(input_path: str, output_path: str):
|
|
loader = HWPLoader(input_path)
|
|
docs = loader.load()
|
|
|
|
with open(output_path, "w", encoding="UTF-8") as f:
|
|
f.write(docs)
|
|
|
|
return None
|
|
|
|
|
|
def convert_txt_to_md(input_path: str, output_path: str):
|
|
return None
|
|
|
|
|
|
def convert_html_to_md(input_path: str, output_path: str):
|
|
return None
|
|
|
|
|
|
def convert_docx_to_md(input_path: str, output_path: str):
|
|
return None
|
|
|
|
|
|
def convert_pdf_to_md(input_path: str, output_path: str):
|
|
md = MarkItDown(docintel_endpoint="<document_intelligence_endpoint>")
|
|
result = md.convert(input_path)
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
f.write(result.text_content)
|
|
return None
|
|
|
|
|
|
def convert_ppt_to_md(input_path: str, output_path: str):
|
|
return None
|
|
|
|
|
|
def convert_excel_to_md(input_path: str, output_path: str):
|
|
return None
|
|
|
|
|
|
def convert_csv_to_md(input_path: str, output_path: str):
|
|
return None
|
|
|
|
|
|
def convert_json_to_md(input_path: str, output_path: str):
|
|
return None
|
|
|
|
|
|
def convert_img_to_md(input_path: str, output_path: str):
|
|
return None
|