Skeleton Code commit
This commit is contained in:
52
workspace/convert_obj_to_md.py
Normal file
52
workspace/convert_obj_to_md.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from langchain_teddynote.document_loaders import HWPLoader
|
||||
from markitdown import MarkItDown
|
||||
|
||||
|
||||
def convert_hwp_to_md(input_path: str, output_path: str):
|
||||
loader = HWPLoader(input_path)
|
||||
docs = loader.load()
|
||||
|
||||
with open(output_path, "w", encoding="UTF-8") as f:
|
||||
f.write(docs)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def convert_txt_to_md(input_path: str, output_path: str):
|
||||
return None
|
||||
|
||||
|
||||
def convert_html_to_md(input_path: str, output_path: str):
|
||||
return None
|
||||
|
||||
|
||||
def convert_docx_to_md(input_path: str, output_path: str):
|
||||
return None
|
||||
|
||||
|
||||
def convert_pdf_to_md(input_path: str, output_path: str):
|
||||
md = MarkItDown(docintel_endpoint="<document_intelligence_endpoint>")
|
||||
result = md.convert(input_path)
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
f.write(result.text_content)
|
||||
return None
|
||||
|
||||
|
||||
def convert_ppt_to_md(input_path: str, output_path: str):
|
||||
return None
|
||||
|
||||
|
||||
def convert_excel_to_md(input_path: str, output_path: str):
|
||||
return None
|
||||
|
||||
|
||||
def convert_csv_to_md(input_path: str, output_path: str):
|
||||
return None
|
||||
|
||||
|
||||
def convert_json_to_md(input_path: str, output_path: str):
|
||||
return None
|
||||
|
||||
|
||||
def convert_img_to_md(input_path: str, output_path: str):
|
||||
return None
|
||||
Reference in New Issue
Block a user