refactor(#28): IMP-28 L4 _parse_json dedup (4 modules -> src/json_utils)

Consolidate duplicate _parse_json helpers from content_editor.py /
design_director.py / kei_client.py (fuller form) and pipeline.py (simple form)
into shared src/json_utils.parse_json (strict superset). All 18 call-sites
preserved via `parse_json as _parse_json` alias import; no behavior change.

- src/json_utils.py (new): shared helper, fenced/plain-fence/bare-brace patterns
  + list-prefix cleanup fallback.
- tests/test_json_utils.py (new): 9 unit tests pinning parser semantics.
- src/content_editor.py / design_director.py: remove local helper +
  unused `import json` / `import re`.
- src/kei_client.py / pipeline.py: remove local helper; `json` / `re` retained
  (used elsewhere).

Targeted tests 9 passed; full pytest 374 passed (3 pre-existing scripts/
collection errors reproduce on baseline 909bf75, IMP-28 unrelated).
This commit is contained in:
2026-05-20 20:44:19 +09:00
parent 909bf75edc
commit 265d70ed91
6 changed files with 105 additions and 127 deletions

View File

@@ -8,9 +8,7 @@ Kei API 필수. fallback 없음. 성공할 때까지 무한 재시도.
"""
from __future__ import annotations
import json
import logging
import re
from typing import Any
import anthropic
@@ -18,6 +16,7 @@ import httpx
from src.config import settings
from src.design_director import BLOCK_SLOTS
from src.json_utils import parse_json as _parse_json
from src.sse_utils import stream_sse_tokens
logger = logging.getLogger(__name__)
@@ -441,38 +440,3 @@ async def fill_candidates(
logger.warning(f"[Phase P] 꼭지 {tid}: 텍스트 편집 파싱 실패")
return candidates
def _parse_json(text: str) -> dict[str, Any] | None:
"""텍스트에서 JSON을 추출한다.
Kei API가 마크다운 리스트 접두사(- )를 붙여 응답하는 경우에도 처리.
"""
# 전처리: 각 줄 앞의 마크다운 리스트 접두사(- ) 제거
lines = text.split("\n")
cleaned_lines = []
for line in lines:
stripped = line.lstrip()
if stripped.startswith("- "):
cleaned_lines.append(stripped[2:])
elif stripped.startswith("* "):
cleaned_lines.append(stripped[2:])
else:
cleaned_lines.append(stripped)
cleaned = "\n".join(cleaned_lines)
# 원본 먼저 시도 → 클린 버전 시도
for target in [text, cleaned]:
patterns = [
r"```json\s*(.*?)```",
r"```\s*(.*?)```",
r"(\{.*\})",
]
for pattern in patterns:
match = re.search(pattern, target, re.DOTALL)
if match:
try:
return json.loads(match.group(1).strip())
except json.JSONDecodeError:
continue
return None

View File

@@ -5,9 +5,7 @@ Step B: 프리셋 안에서 블록 매핑 + 글자 수 가이드 (Sonnet)
"""
from __future__ import annotations
import json
import logging
import re
from pathlib import Path
from typing import Any
@@ -15,6 +13,7 @@ import httpx
import yaml
from src.config import settings
from src.json_utils import parse_json as _parse_json
from src.sse_utils import stream_sse_tokens
logger = logging.getLogger(__name__)
@@ -1066,38 +1065,3 @@ def _validate_height_budget(blocks: list[dict], preset: dict) -> list[dict]:
})
return overflows
def _parse_json(text: str) -> dict[str, Any] | None:
"""텍스트에서 JSON을 추출한다.
Kei API가 마크다운 리스트 접두사(- )를 붙여 응답하는 경우에도 처리.
"""
# 전처리: 각 줄 앞의 마크다운 리스트 접두사(- ) 제거
lines = text.split("\n")
cleaned_lines = []
for line in lines:
stripped = line.lstrip()
if stripped.startswith("- "):
cleaned_lines.append(stripped[2:])
elif stripped.startswith("* "):
cleaned_lines.append(stripped[2:])
else:
cleaned_lines.append(stripped)
cleaned = "\n".join(cleaned_lines)
# 원본 먼저 시도 → 클린 버전 시도
for target in [text, cleaned]:
patterns = [
r"```json\s*(.*?)```",
r"```\s*(.*?)```",
r"(\{.*\})",
]
for pattern in patterns:
match = re.search(pattern, target, re.DOTALL)
if match:
try:
return json.loads(match.group(1).strip())
except json.JSONDecodeError:
continue
return None

46
src/json_utils.py Normal file
View File

@@ -0,0 +1,46 @@
"""JSON 추출 공용 유틸리티.
Kei / Claude API 응답 텍스트에서 JSON 객체를 추출한다.
content_editor, design_director, kei_client, pipeline 공통 헬퍼.
응답이 마크다운 리스트 접두사("- " / "* ")로 감싸진 경우에도 처리.
"""
from __future__ import annotations
import json
import re
from typing import Any
_JSON_PATTERNS: tuple[str, ...] = (
r"```json\s*(.*?)```",
r"```\s*(.*?)```",
r"(\{.*\})",
)
def parse_json(text: str) -> dict[str, Any] | None:
"""텍스트에서 JSON을 추출한다.
Kei API가 마크다운 리스트 접두사(- )를 붙여 응답하는 경우에도 처리.
원본 → 리스트 접두사 제거 버전 순서로 fenced JSON / plain fenced / 베어 brace 패턴을
차례로 시도한다. 모두 실패하면 None.
"""
lines = text.split("\n")
cleaned_lines: list[str] = []
for line in lines:
stripped = line.lstrip()
if stripped.startswith("- ") or stripped.startswith("* "):
cleaned_lines.append(stripped[2:])
else:
cleaned_lines.append(stripped)
cleaned = "\n".join(cleaned_lines)
for target in (text, cleaned):
for pattern in _JSON_PATTERNS:
match = re.search(pattern, target, re.DOTALL)
if match:
try:
return json.loads(match.group(1).strip())
except json.JSONDecodeError:
continue
return None

View File

@@ -13,6 +13,7 @@ from typing import Any
import httpx
from src.config import settings
from src.json_utils import parse_json as _parse_json
from src.sse_utils import stream_sse_tokens
logger = logging.getLogger(__name__)
@@ -889,42 +890,6 @@ async def call_kei_overflow_judgment(
return None
def _parse_json(text: str) -> dict[str, Any] | None:
"""텍스트에서 JSON을 추출한다.
Kei API가 마크다운 리스트 접두사(- )를 붙여 응답하는 경우에도 처리.
"""
# 전처리: 각 줄 앞의 마크다운 리스트 접두사(- ) 제거
# Kei API가 JSON을 마크다운 리스트로 감싸서 응답하는 경우 대응
lines = text.split("\n")
cleaned_lines = []
for line in lines:
stripped = line.lstrip()
if stripped.startswith("- "):
cleaned_lines.append(stripped[2:])
elif stripped.startswith("* "):
cleaned_lines.append(stripped[2:])
else:
cleaned_lines.append(stripped)
cleaned = "\n".join(cleaned_lines)
# 원본 + 클린 버전 둘 다 시도
for target in [text, cleaned]:
patterns = [
r"```json\s*(.*?)```",
r"```\s*(.*?)```",
r"(\{.*\})",
]
for pattern in patterns:
match = re.search(pattern, target, re.DOTALL)
if match:
try:
return json.loads(match.group(1).strip())
except json.JSONDecodeError:
continue
return None
async def select_best_candidate(
topic_results: list[dict[str, Any]],
analysis: dict[str, Any],

View File

@@ -36,6 +36,7 @@ from src.image_utils import get_image_sizes, embed_images
from src.space_allocator import calculate_container_specs
from src.slide_measurer import measure_rendered_heights, capture_slide_screenshot
from src.config import settings
from src.json_utils import parse_json as _parse_json
logger = logging.getLogger(__name__)
@@ -2080,20 +2081,3 @@ def _convert_kei_judgment(
new_adjs.append(adj)
review_result["adjustments"] = new_adjs
def _parse_json(text: str) -> dict[str, Any] | None:
"""텍스트에서 JSON을 추출한다."""
patterns = [
r"```json\s*(.*?)```",
r"```\s*(.*?)```",
r"(\{.*\})",
]
for pattern in patterns:
match = re.search(pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(1).strip())
except json.JSONDecodeError:
continue
return None

55
tests/test_json_utils.py Normal file
View File

@@ -0,0 +1,55 @@
"""Unit tests for ``src.json_utils.parse_json``.
IMP-28 L4 — `_parse_json` dedup unit u2.
Pins the shared helper semantics that previously lived in
content_editor.py / design_director.py / kei_client.py (fuller form) and
pipeline.py (simple form). The fuller form is a strict superset of the
simple form; these tests cover both axes.
"""
from __future__ import annotations
from src.json_utils import parse_json
def test_parse_json_fenced_json_block():
text = 'prefix\n```json\n{"a": 1, "b": "x"}\n```\nsuffix'
assert parse_json(text) == {"a": 1, "b": "x"}
def test_parse_json_plain_fenced_block():
text = 'prefix\n```\n{"x": 2}\n```\nsuffix'
assert parse_json(text) == {"x": 2}
def test_parse_json_bare_braces():
text = 'noise before {"y": 3, "z": [1, 2]} noise after'
assert parse_json(text) == {"y": 3, "z": [1, 2]}
def test_parse_json_list_prefix_dash_cleanup():
text = '- {"b": 4}'
assert parse_json(text) == {"b": 4}
def test_parse_json_list_prefix_star_cleanup():
text = '* {"c": 5}'
assert parse_json(text) == {"c": 5}
def test_parse_json_no_json_returns_none():
assert parse_json("no json here at all") is None
def test_parse_json_malformed_returns_none():
assert parse_json("{ invalid json") is None
def test_parse_json_prefix_free_no_op():
text = '{"d": 6}'
assert parse_json(text) == {"d": 6}
def test_parse_json_fenced_preferred_over_bare_braces():
text = 'outer {"outer": true} ```json\n{"inner": 1}\n```'
assert parse_json(text) == {"inner": 1}