Cleanup: Deleting 03.Code/업로드용/domain/hwpx/hwpx_utils.py

This commit is contained in:
2026-03-19 14:03:03 +09:00
parent 6ff8f84021
commit 71ef8e0f1b

View File

@@ -1,8 +0,0 @@
\"\"\"\nHWPX 臾몄 듯 濡吏 吏\nHTML 蹂쇰 蹂 쨌 媛깆 泥\n\"\"\"\n\nimport os\nimport re\nimport json\nfrom bs4 import BeautifulSoup\n\ndef detox(node):\n \"\"\"\n
蹂댁 \n \"\"\"\n # Atomic Block (TABLE, IMG, FIGURE) 嫄대由ъ \n if node.name in ['table', 'img', 'figure']:\n return\n\n # 1. inline style 嫄 (遺 蹂 諛⑹)\n if node.has_attr('style'):\n del node['style']\n\n # 2. class 嫄 (紐⑹감/
)\n if node.has_attr('class'):\n # TOC 愿
蹂댁, \n cls = node.get('class', [])\n if not any(c.startswith(('toc-', 'cover-', 'highlight-')) for c in cls):\n del node['class']\n\ndef get_flat_nodes(element):\n \"\"\"\n 紐⑤
Flattening\n \"\"\"\n nodes = []\n for child in element.children:\n if child.name is None: continue\n \n detox(child)\n \n # DIV, SECTION 臾댁怨 쇰㈃ 곌껐\n if child.name in ['div', 'section', 'article']:\n nodes.extend(get_flat_nodes(child))\n else:\n nodes.append(child)\n \n return nodes\n\ndef format_toc(element):\n \"\"\"\n 紐⑹감 ㅽ 議곗\n \"\"\"\n lis = element.find_all('li')\n for li in lis:\n # Level 1 紐⑹감 媛
\n if 'toc-lvl-1' in li.get('class', []):\n li['style'] = \"font-weight: bold; margin-top: 10px;\"\n else:\n li['style'] = \"margin-left: 20px; font-size: 0.9em;\"\n\ndef render_report(html_content, config):\n \"\"\"\n HTML Report Generator Core\n \"\"\"\n soup = BeautifulSoup(html_content, 'html.parser')\n \n # 1. Section 遺
\n raw_toc = soup.find(id='box-toc')\n raw_summary = soup.find(id='box-summary')\n raw_content = soup.find(id='box-content')\n\n # 2. Flattening\n toc_nodes = get_flat_nodes(raw_toc) if raw_toc else []\n summary_nodes = get_flat_nodes(raw_summary) if raw_summary else []\n content_nodes = get_flat_nodes(raw_content) if raw_content else []\n\n return {\n 'toc': [str(n) for n in toc_nodes],\n 'summary': [str(n) for n in summary_nodes],\n 'content': [str(n) for n in content_nodes]\n }\n