diff --git a/src/content_editor.py b/src/content_editor.py index 79b1247..9ce9fd9 100644 --- a/src/content_editor.py +++ b/src/content_editor.py @@ -8,9 +8,7 @@ Kei API 필수. fallback 없음. 성공할 때까지 무한 재시도. """ from __future__ import annotations -import json import logging -import re from typing import Any import anthropic @@ -18,6 +16,7 @@ import httpx from src.config import settings from src.design_director import BLOCK_SLOTS +from src.json_utils import parse_json as _parse_json from src.sse_utils import stream_sse_tokens logger = logging.getLogger(__name__) @@ -441,38 +440,3 @@ async def fill_candidates( logger.warning(f"[Phase P] 꼭지 {tid}: 텍스트 편집 파싱 실패") return candidates - - -def _parse_json(text: str) -> dict[str, Any] | None: - """텍스트에서 JSON을 추출한다. - - Kei API가 마크다운 리스트 접두사(- )를 붙여 응답하는 경우에도 처리. - """ - # 전처리: 각 줄 앞의 마크다운 리스트 접두사(- ) 제거 - lines = text.split("\n") - cleaned_lines = [] - for line in lines: - stripped = line.lstrip() - if stripped.startswith("- "): - cleaned_lines.append(stripped[2:]) - elif stripped.startswith("* "): - cleaned_lines.append(stripped[2:]) - else: - cleaned_lines.append(stripped) - cleaned = "\n".join(cleaned_lines) - - # 원본 먼저 시도 → 클린 버전 시도 - for target in [text, cleaned]: - patterns = [ - r"```json\s*(.*?)```", - r"```\s*(.*?)```", - r"(\{.*\})", - ] - for pattern in patterns: - match = re.search(pattern, target, re.DOTALL) - if match: - try: - return json.loads(match.group(1).strip()) - except json.JSONDecodeError: - continue - return None diff --git a/src/design_director.py b/src/design_director.py index bc59a04..25fa894 100644 --- a/src/design_director.py +++ b/src/design_director.py @@ -5,9 +5,7 @@ Step B: 프리셋 안에서 블록 매핑 + 글자 수 가이드 (Sonnet) """ from __future__ import annotations -import json import logging -import re from pathlib import Path from typing import Any @@ -15,6 +13,7 @@ import httpx import yaml from src.config import settings +from src.json_utils import parse_json as _parse_json from src.sse_utils import stream_sse_tokens logger = logging.getLogger(__name__) @@ -1066,38 +1065,3 @@ def _validate_height_budget(blocks: list[dict], preset: dict) -> list[dict]: }) return overflows - - -def _parse_json(text: str) -> dict[str, Any] | None: - """텍스트에서 JSON을 추출한다. - - Kei API가 마크다운 리스트 접두사(- )를 붙여 응답하는 경우에도 처리. - """ - # 전처리: 각 줄 앞의 마크다운 리스트 접두사(- ) 제거 - lines = text.split("\n") - cleaned_lines = [] - for line in lines: - stripped = line.lstrip() - if stripped.startswith("- "): - cleaned_lines.append(stripped[2:]) - elif stripped.startswith("* "): - cleaned_lines.append(stripped[2:]) - else: - cleaned_lines.append(stripped) - cleaned = "\n".join(cleaned_lines) - - # 원본 먼저 시도 → 클린 버전 시도 - for target in [text, cleaned]: - patterns = [ - r"```json\s*(.*?)```", - r"```\s*(.*?)```", - r"(\{.*\})", - ] - for pattern in patterns: - match = re.search(pattern, target, re.DOTALL) - if match: - try: - return json.loads(match.group(1).strip()) - except json.JSONDecodeError: - continue - return None diff --git a/src/json_utils.py b/src/json_utils.py new file mode 100644 index 0000000..2b66ca6 --- /dev/null +++ b/src/json_utils.py @@ -0,0 +1,46 @@ +"""JSON 추출 공용 유틸리티. + +Kei / Claude API 응답 텍스트에서 JSON 객체를 추출한다. +content_editor, design_director, kei_client, pipeline 공통 헬퍼. + +응답이 마크다운 리스트 접두사("- " / "* ")로 감싸진 경우에도 처리. +""" +from __future__ import annotations + +import json +import re +from typing import Any + +_JSON_PATTERNS: tuple[str, ...] = ( + r"```json\s*(.*?)```", + r"```\s*(.*?)```", + r"(\{.*\})", +) + + +def parse_json(text: str) -> dict[str, Any] | None: + """텍스트에서 JSON을 추출한다. + + Kei API가 마크다운 리스트 접두사(- )를 붙여 응답하는 경우에도 처리. + 원본 → 리스트 접두사 제거 버전 순서로 fenced JSON / plain fenced / 베어 brace 패턴을 + 차례로 시도한다. 모두 실패하면 None. + """ + lines = text.split("\n") + cleaned_lines: list[str] = [] + for line in lines: + stripped = line.lstrip() + if stripped.startswith("- ") or stripped.startswith("* "): + cleaned_lines.append(stripped[2:]) + else: + cleaned_lines.append(stripped) + cleaned = "\n".join(cleaned_lines) + + for target in (text, cleaned): + for pattern in _JSON_PATTERNS: + match = re.search(pattern, target, re.DOTALL) + if match: + try: + return json.loads(match.group(1).strip()) + except json.JSONDecodeError: + continue + return None diff --git a/src/kei_client.py b/src/kei_client.py index e02652e..8ee7c30 100644 --- a/src/kei_client.py +++ b/src/kei_client.py @@ -13,6 +13,7 @@ from typing import Any import httpx from src.config import settings +from src.json_utils import parse_json as _parse_json from src.sse_utils import stream_sse_tokens logger = logging.getLogger(__name__) @@ -889,42 +890,6 @@ async def call_kei_overflow_judgment( return None -def _parse_json(text: str) -> dict[str, Any] | None: - """텍스트에서 JSON을 추출한다. - - Kei API가 마크다운 리스트 접두사(- )를 붙여 응답하는 경우에도 처리. - """ - # 전처리: 각 줄 앞의 마크다운 리스트 접두사(- ) 제거 - # Kei API가 JSON을 마크다운 리스트로 감싸서 응답하는 경우 대응 - lines = text.split("\n") - cleaned_lines = [] - for line in lines: - stripped = line.lstrip() - if stripped.startswith("- "): - cleaned_lines.append(stripped[2:]) - elif stripped.startswith("* "): - cleaned_lines.append(stripped[2:]) - else: - cleaned_lines.append(stripped) - cleaned = "\n".join(cleaned_lines) - - # 원본 + 클린 버전 둘 다 시도 - for target in [text, cleaned]: - patterns = [ - r"```json\s*(.*?)```", - r"```\s*(.*?)```", - r"(\{.*\})", - ] - for pattern in patterns: - match = re.search(pattern, target, re.DOTALL) - if match: - try: - return json.loads(match.group(1).strip()) - except json.JSONDecodeError: - continue - return None - - async def select_best_candidate( topic_results: list[dict[str, Any]], analysis: dict[str, Any], diff --git a/src/pipeline.py b/src/pipeline.py index f81d9c6..685a061 100644 --- a/src/pipeline.py +++ b/src/pipeline.py @@ -36,6 +36,7 @@ from src.image_utils import get_image_sizes, embed_images from src.space_allocator import calculate_container_specs from src.slide_measurer import measure_rendered_heights, capture_slide_screenshot from src.config import settings +from src.json_utils import parse_json as _parse_json logger = logging.getLogger(__name__) @@ -2080,20 +2081,3 @@ def _convert_kei_judgment( new_adjs.append(adj) review_result["adjustments"] = new_adjs - - -def _parse_json(text: str) -> dict[str, Any] | None: - """텍스트에서 JSON을 추출한다.""" - patterns = [ - r"```json\s*(.*?)```", - r"```\s*(.*?)```", - r"(\{.*\})", - ] - for pattern in patterns: - match = re.search(pattern, text, re.DOTALL) - if match: - try: - return json.loads(match.group(1).strip()) - except json.JSONDecodeError: - continue - return None diff --git a/tests/test_json_utils.py b/tests/test_json_utils.py new file mode 100644 index 0000000..04ca5be --- /dev/null +++ b/tests/test_json_utils.py @@ -0,0 +1,55 @@ +"""Unit tests for ``src.json_utils.parse_json``. + +IMP-28 L4 — `_parse_json` dedup unit u2. + +Pins the shared helper semantics that previously lived in +content_editor.py / design_director.py / kei_client.py (fuller form) and +pipeline.py (simple form). The fuller form is a strict superset of the +simple form; these tests cover both axes. +""" +from __future__ import annotations + +from src.json_utils import parse_json + + +def test_parse_json_fenced_json_block(): + text = 'prefix\n```json\n{"a": 1, "b": "x"}\n```\nsuffix' + assert parse_json(text) == {"a": 1, "b": "x"} + + +def test_parse_json_plain_fenced_block(): + text = 'prefix\n```\n{"x": 2}\n```\nsuffix' + assert parse_json(text) == {"x": 2} + + +def test_parse_json_bare_braces(): + text = 'noise before {"y": 3, "z": [1, 2]} noise after' + assert parse_json(text) == {"y": 3, "z": [1, 2]} + + +def test_parse_json_list_prefix_dash_cleanup(): + text = '- {"b": 4}' + assert parse_json(text) == {"b": 4} + + +def test_parse_json_list_prefix_star_cleanup(): + text = '* {"c": 5}' + assert parse_json(text) == {"c": 5} + + +def test_parse_json_no_json_returns_none(): + assert parse_json("no json here at all") is None + + +def test_parse_json_malformed_returns_none(): + assert parse_json("{ invalid json") is None + + +def test_parse_json_prefix_free_no_op(): + text = '{"d": 6}' + assert parse_json(text) == {"d": 6} + + +def test_parse_json_fenced_preferred_over_bare_braces(): + text = 'outer {"outer": true} ```json\n{"inner": 1}\n```' + assert parse_json(text) == {"inner": 1}