feat: MD 파일 병합 및 이미지 경로 통합 스크립트 추가 (#1)
- merge_markdown.py: 96개 페이지별 MD를 단일 파일로 병합
- 이미지를 output/images/ 폴더로 통합, p{NN}_ prefix로 파일명 충돌 방지
- file_range 파라미터로 부분 테스트 가능
- docs/tutorial.md: merge 명령어 및 사용법 문서화
- docs/history: 작업 이력 파일 추가
소요 시간: 10분 | Context: input 18k / output 2k tokens
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
94
update_image_paths.py
Normal file
94
update_image_paths.py
Normal file
@@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Update image paths in markdown files to point to extracted images
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import glob
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def update_markdown_image_paths(md_path: str, output_dir: str = "output"):
|
||||
"""
|
||||
Update image paths in markdown file to point to extracted images
|
||||
"""
|
||||
md_file = Path(md_path)
|
||||
base_name = md_file.stem
|
||||
|
||||
# Path to extracted images folder
|
||||
extracted_images_dir = f"{base_name}_extracted_images"
|
||||
|
||||
# Check if extracted images folder exists
|
||||
extracted_images_path = os.path.join(output_dir, extracted_images_dir)
|
||||
if not os.path.exists(extracted_images_path):
|
||||
print(f"No extracted images folder found: {extracted_images_path}")
|
||||
return False
|
||||
|
||||
# Read markdown content
|
||||
with open(md_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
original_content = content
|
||||
|
||||
# Pattern to match image references like 
|
||||
# Replace with actual extracted images
|
||||
def replace_image_path(match):
|
||||
old_path = match.group(1)
|
||||
|
||||
# Extract page number from old path (e.g., _page_1_Figure_1.jpeg -> page 1)
|
||||
page_match = re.search(r'_page_(\d+)_', old_path)
|
||||
if page_match:
|
||||
page_num = page_match.group(1)
|
||||
# Map to extracted image: page_1_img_1.png
|
||||
new_path = f"{extracted_images_dir}/page_{page_num}_img_1.png"
|
||||
return f''
|
||||
|
||||
return match.group(0) # Return original if no match
|
||||
|
||||
# Replace all image paths
|
||||
content = re.sub(r'\!\[\]\(([^)]+\.jpeg)\)', replace_image_path, content)
|
||||
|
||||
if content == original_content:
|
||||
print(f"No changes needed for {md_file.name}")
|
||||
return True
|
||||
|
||||
# Save updated markdown
|
||||
output_path = md_path.replace('.md', '_updated.md')
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
print(f"Updated markdown saved to: {output_path}")
|
||||
|
||||
# Count replacements
|
||||
old_count = len(re.findall(r'\!\[\]\([^)]+\.jpeg\)', original_content))
|
||||
new_count = len(re.findall(r'\!\[\]\([^)]+\.png\)', content))
|
||||
print(f" Replaced {new_count} image paths (out of {old_count} references)")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def update_all_markdown_files(output_dir: str = "output"):
|
||||
"""
|
||||
Update image paths in all markdown files
|
||||
"""
|
||||
md_pattern = os.path.join(output_dir, "*.md")
|
||||
md_files = [f for f in glob.glob(md_pattern) if not f.endswith('_updated.md')]
|
||||
|
||||
if not md_files:
|
||||
print(f"No markdown files found in {output_dir}")
|
||||
return
|
||||
|
||||
print(f"Found {len(md_files)} markdown files")
|
||||
print("=" * 60)
|
||||
|
||||
for md_file in md_files:
|
||||
update_markdown_image_paths(md_file, output_dir)
|
||||
print()
|
||||
|
||||
print("=" * 60)
|
||||
print("Done!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
update_all_markdown_files()
|
||||
Reference in New Issue
Block a user