프로젝트 분리 이동

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
minsung
2026-05-20 14:28:27 +09:00
commit ccba1266b5
24 changed files with 7900 additions and 0 deletions

565
tools/detect_all_objects.py Normal file
View File

@@ -0,0 +1,565 @@
"""
이미지에서 객체를 SAM3.1로 검출하여 색상별로 시각화.
전략:
- cols×rows 타일로 분할 (overlap 중복)
- --tiles 로 처리할 타일 번호 지정 (예: 9-24, 1,5,9, 전체=all)
- --categories 로 JSON 설정 파일 로드 (카테고리·프롬프트·색상 정의)
- 타일당 SAM3.1 1회 호출 (모든 카테고리 프롬프트 합산)
- 병렬 처리(ThreadPoolExecutor) → NMS → 색상 시각화
사용법:
python tools/detect_all_objects.py \\
--input data/역사이미지/slope/DJI_20260306113839_0005.JPG \\
--categories configs/railway_zone.json \\
--tiles 9-24 \\
--cols 8 --rows 6 --overlap 0.10 --workers 4
"""
import argparse
import base64
import json
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
import cv2
import numpy as np
import requests
SAM3_SERVER = "http://localhost:8000"
SAM3_MODEL_ID = "segment_anything_3"
# 기본 카테고리 (--categories 미지정 시 사용)
_DEFAULT_CATEGORIES = [
{"name": "railway", "prompt": "railroad track, railway rail, steel rail",
"keywords": ["railroad", "railway rail", "steel rail"], "color_bgr": [0, 200, 255]},
{"name": "catenary_pole", "prompt": "railway catenary pole, overhead line pole, catenary mast",
"keywords": ["catenary pole", "overhead line pole", "catenary mast"], "color_bgr": [255, 130, 0]},
{"name": "highway", "prompt": "highway road, expressway asphalt, paved road lane",
"keywords": ["highway", "expressway", "paved road"], "color_bgr": [160, 160, 160]},
{"name": "vehicle", "prompt": "car, truck, vehicle, automobile",
"keywords": ["car", "truck", "vehicle", "automobile"], "color_bgr": [0, 255, 0]},
{"name": "building", "prompt": "building, house, rooftop, structure",
"keywords": ["building", "house", "rooftop", "structure"], "color_bgr": [50, 50, 255]},
{"name": "farmland", "prompt": "farmland, agricultural field, cropland, vegetable garden",
"keywords": ["farmland", "field", "cropland", "vegetable"], "color_bgr": [50, 200, 50]},
{"name": "vegetation", "prompt": "trees, forest, shrubs, vegetation, bushes",
"keywords": ["tree", "forest", "shrub", "vegetation", "bush"], "color_bgr": [0, 120, 0]},
{"name": "guardrail", "prompt": "guardrail, highway barrier, road fence, crash barrier",
"keywords": ["guardrail", "highway barrier", "road fence", "crash barrier"], "color_bgr": [200, 0, 200]},
{"name": "bridge", "prompt": "bridge, overpass, viaduct",
"keywords": ["bridge", "overpass", "viaduct"], "color_bgr": [0, 165, 255]},
{"name": "wire", "prompt": "overhead wire, catenary wire, electric cable line",
"keywords": ["catenary wire", "overhead wire", "electric cable"], "color_bgr": [200, 200, 255]},
]
# ── 타일 번호 파싱 ────────────────────────────────────────────────────────────
def parse_tiles(tile_str: str, total: int) -> set:
"""'9-24', '1,3,5', 'all' → tile index 집합 (1-based)."""
if tile_str.lower() == "all":
return set(range(1, total + 1))
result = set()
for part in tile_str.split(","):
part = part.strip()
if "-" in part:
a, b = part.split("-", 1)
result.update(range(int(a), int(b) + 1))
else:
result.add(int(part))
return result
# ── 카테고리 로드 ─────────────────────────────────────────────────────────────
def load_categories(json_path: str | None) -> list:
if json_path:
data = json.loads(Path(json_path).read_text(encoding="utf-8"))
return data["categories"]
return _DEFAULT_CATEGORIES
def label_to_category(label: str, categories: list) -> int:
label_l = label.lower()
for i, cat in enumerate(categories):
for kw in cat["keywords"]:
if kw in label_l:
return i
return -1
def build_combined_prompt(categories: list) -> str:
return ", ".join(cat["prompt"] for cat in categories)
# ── SAM3 호출 ─────────────────────────────────────────────────────────────────
def encode_image(image_bgr: np.ndarray, max_size: int = 1280) -> tuple:
h, w = image_bgr.shape[:2]
scale = 1.0
if max(h, w) > max_size:
scale = max_size / max(h, w)
image_bgr = cv2.resize(image_bgr, (int(w * scale), int(h * scale)))
_, buf = cv2.imencode(".jpg", image_bgr, [cv2.IMWRITE_JPEG_QUALITY, 90])
return base64.b64encode(buf).decode("utf-8"), scale
def sam3_segment_tile(tile_bgr: np.ndarray, prompt: str, conf: float) -> list:
b64, scale = encode_image(tile_bgr)
payload = {
"model": SAM3_MODEL_ID,
"image": b64,
"params": {"text_prompt": prompt, "conf_threshold": conf,
"show_masks": True, "show_boxes": False},
}
try:
r = requests.post(f"{SAM3_SERVER}/v1/predict", json=payload, timeout=120)
r.raise_for_status()
resp = r.json()
if not resp.get("success"):
return []
shapes = resp.get("data", {}).get("shapes", [])
shapes = [s if isinstance(s, dict) else s.dict() for s in shapes]
if scale < 1.0:
inv = 1.0 / scale
for s in shapes:
if s.get("shape_type") == "polygon":
s["points"] = [[x * inv, y * inv] for x, y in s["points"]]
return [s for s in shapes if s.get("shape_type") == "polygon"]
except Exception:
return []
# ── NMS ───────────────────────────────────────────────────────────────────────
def _bbox(pts):
xs = [p[0] for p in pts]; ys = [p[1] for p in pts]
return min(xs), min(ys), max(xs), max(ys)
def _nms_core(shapes: list, iou_thresh: float) -> list:
"""IoU 기반 NMS. shapes 각 항목에 score 필드 필요."""
if not shapes:
return []
bboxes = np.array([_bbox(s["points"]) for s in shapes], dtype=np.float32)
scores = np.array([float(s.get("score", 0.5)) for s in shapes])
order = scores.argsort()[::-1]
keep = []
while len(order):
i = order[0]; keep.append(i)
if len(order) == 1: break
xx1 = np.maximum(bboxes[i,0], bboxes[order[1:],0])
yy1 = np.maximum(bboxes[i,1], bboxes[order[1:],1])
xx2 = np.minimum(bboxes[i,2], bboxes[order[1:],2])
yy2 = np.minimum(bboxes[i,3], bboxes[order[1:],3])
inter = np.maximum(0, xx2-xx1) * np.maximum(0, yy2-yy1)
a_i = (bboxes[i,2]-bboxes[i,0])*(bboxes[i,3]-bboxes[i,1])
a_j = (bboxes[order[1:],2]-bboxes[order[1:],0])*(bboxes[order[1:],3]-bboxes[order[1:],1])
iou = inter / (a_i + a_j - inter + 1e-6)
order = order[1:][iou < iou_thresh]
return [shapes[i] for i in keep]
def nms_shapes(shapes: list, iou_thresh: float = 0.4) -> list:
return _nms_core(shapes, iou_thresh)
def _poly_orient(points: list, H: int, W: int) -> str: # post_merge_poles.py에서도 사용
"""폴리곤 장축 방향 판별 (render_skeleton_overlay.py 동일 로직).
V: 장축이 이미지 중심에서 방사형 방향과 정렬 (cos_sim > 0.7) → 세로 기둥
H: 장축이 radial 직교 방향 → 수평 빔
?: aspect ratio < 1.3 으로 판별 불가
"""
pts = np.array(points, dtype=np.float32)
rect = cv2.minAreaRect(pts)
(rx, ry), (rw, rh), angle = rect
if min(rw, rh) < 1:
return '?'
ar = max(rw, rh) / min(rw, rh)
if ar < 1.3:
return '?'
long_angle_deg = angle if rw >= rh else angle + 90
lx = float(np.cos(np.radians(long_angle_deg)))
ly = float(np.sin(np.radians(long_angle_deg)))
img_cx, img_cy = W / 2.0, H / 2.0
rdx, rdy = rx - img_cx, ry - img_cy
radial_norm = (rdx ** 2 + rdy ** 2) ** 0.5
if radial_norm < 1:
return '?'
rdx, rdy = rdx / radial_norm, rdy / radial_norm
cos_sim = abs(lx * rdx + ly * rdy)
return 'V' if cos_sim > 0.7 else 'H'
def merge_nonramen_poles(shapes: list, H: int, W: int,
x_overlap_thresh: float = 0.30,
y_gap_thresh: int = 150) -> list:
"""타일 경계 분할된 전철주 병합 — V+V 조합만 허용.
_poly_orient로 각 폴리곤 V/H 분류.
두 폴리곤 모두 V(세로 기둥)이고 공간 기준 충족 시만 병합.
H(수평 빔) 포함 쌍 = 라멘 관련 조각 → 병합 건너뜀.
"""
if len(shapes) <= 1:
return shapes
orients = [_poly_orient(s["points"], H, W) for s in shapes]
v_count = sum(1 for o in orients if o == 'V')
h_count = sum(1 for o in orients if o == 'H')
print(f" [orient] V={v_count}, H={h_count}, ?={len(orients)-v_count-h_count}")
def get_bbox(s):
xs = [p[0] for p in s["points"]]; ys = [p[1] for p in s["points"]]
return min(xs), min(ys), max(xs), max(ys)
def x_overlap_ratio(b1, b2):
ox = min(b1[2], b2[2]) - max(b1[0], b2[0])
ux = max(b1[2], b2[2]) - min(b1[0], b2[0])
return ox / ux if ux > 0 else 0.0
def y_gap(b1, b2):
return max(0.0, max(b1[1], b2[1]) - min(b1[3], b2[3]))
def merge_two(s1, s2):
mask = np.zeros((H, W), dtype=np.uint8)
for s in (s1, s2):
cv2.fillPoly(mask, [np.array(s["points"], dtype=np.int32)], 255)
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return s1
c = max(contours, key=cv2.contourArea)
eps = 0.002 * cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, eps, True)
merged = dict(s1)
merged["points"] = [[float(p[0][0]), float(p[0][1])] for p in approx]
merged["score"] = max(float(s1.get("score", 0)), float(s2.get("score", 0)))
return merged
merged_flags = [False] * len(shapes)
result = []
merged_count = 0
for i in range(len(shapes)):
if merged_flags[i]:
continue
cur = shapes[i]
cur_ori = orients[i]
cb = get_bbox(cur)
for j in range(i + 1, len(shapes)):
if merged_flags[j]:
continue
if cur_ori != 'V' or orients[j] != 'V':
continue # 둘 다 V가 아니면 병합 안 함
jb = get_bbox(shapes[j])
if x_overlap_ratio(cb, jb) >= x_overlap_thresh and y_gap(cb, jb) <= y_gap_thresh:
cur = merge_two(cur, shapes[j])
cur_ori = 'V'
cb = get_bbox(cur)
merged_flags[j] = True
merged_count += 1
result.append(cur)
print(f" [merge] 병합={merged_count}")
return result
def cross_class_nms(buckets: list, categories: list, iou_thresh: float) -> list:
"""클래스 간 NMS: 동일 영역에 다른 클래스가 중복 검출될 때 우선순위 높은 쪽 보존.
정렬 기준: (priority 오름차순, score 내림차순)
→ priority 낮은 값(=중요 클래스)이 우선 보존됨.
"""
# 모든 shape에 클래스 인덱스 태깅
tagged = []
for i, shapes in enumerate(buckets):
priority = categories[i].get("priority", 99)
for s in shapes:
tagged.append((priority, -float(s.get("score", 0.5)), i, s))
# priority 오름차순, score 내림차순 정렬
tagged.sort(key=lambda x: (x[0], x[1]))
if not tagged:
return [[] for _ in buckets]
all_shapes = [t[3] for t in tagged]
cls_ids = [t[2] for t in tagged]
bboxes = np.array([_bbox(s["points"]) for s in all_shapes], dtype=np.float32)
suppressed = [False] * len(all_shapes)
for i in range(len(all_shapes)):
if suppressed[i]:
continue
for j in range(i + 1, len(all_shapes)):
if suppressed[j]:
continue
xx1 = max(bboxes[i,0], bboxes[j,0])
yy1 = max(bboxes[i,1], bboxes[j,1])
xx2 = min(bboxes[i,2], bboxes[j,2])
yy2 = min(bboxes[i,3], bboxes[j,3])
inter = max(0, xx2-xx1) * max(0, yy2-yy1)
if inter == 0:
continue
a_i = (bboxes[i,2]-bboxes[i,0])*(bboxes[i,3]-bboxes[i,1])
a_j = (bboxes[j,2]-bboxes[j,0])*(bboxes[j,3]-bboxes[j,1])
iou = inter / (a_i + a_j - inter + 1e-6)
if iou >= iou_thresh:
suppressed[j] = True # i가 우선순위 높으므로 j 제거
new_buckets = [[] for _ in buckets]
for i, (keep, cls_i, s) in enumerate(zip(suppressed, cls_ids, all_shapes)):
if not keep:
new_buckets[cls_i].append(s)
return new_buckets
# ── 타일 그리드 검출 (병렬) ───────────────────────────────────────────────────
def detect_tiled(image_bgr: np.ndarray, cols: int, rows: int, overlap: float,
conf: float, workers: int, tile_filter: set,
prompt: str) -> list:
H, W = image_bgr.shape[:2]
base_w = W / cols
base_h = H / rows
pad_x = int(base_w * overlap)
pad_y = int(base_h * overlap)
tiles = []
for r in range(rows):
for c in range(cols):
idx = r * cols + c + 1
if idx not in tile_filter:
continue
x0 = max(0, int(c * base_w) - pad_x)
x1 = min(W, int((c + 1) * base_w) + pad_x)
y0 = max(0, int(r * base_h) - pad_y)
y1 = min(H, int((r + 1) * base_h) + pad_y)
tiles.append((idx, x0, y0, x1, y1))
total = len(tiles)
done = [0]
all_shapes = []
def process(args):
idx, x0, y0, x1, y1 = args
tile = image_bgr[y0:y1, x0:x1]
shapes = sam3_segment_tile(tile, prompt, conf)
for s in shapes:
s["points"] = [[px + x0, py + y0] for px, py in s["points"]]
return shapes
with ThreadPoolExecutor(max_workers=workers) as ex:
futs = {ex.submit(process, t): t for t in tiles}
for fut in as_completed(futs):
all_shapes.extend(fut.result())
done[0] += 1
print(f" 타일 {done[0]:02d}/{total} 완료, 누적 {len(all_shapes)}", end="\r")
print()
return all_shapes
# ── 시각화 ────────────────────────────────────────────────────────────────────
def draw_detections(image_bgr: np.ndarray, buckets: list,
categories: list, tile_filter: set,
cols: int, rows: int, overlap: float) -> np.ndarray:
vis = image_bgr.copy()
H, W = vis.shape[:2]
# 처리된 타일 경계 표시
base_w = W / cols
base_h = H / rows
for r in range(rows):
for c in range(cols):
idx = r * cols + c + 1
if idx in tile_filter:
bx0, by0 = int(c * base_w), int(r * base_h)
bx1, by1 = min(W, int((c+1)*base_w)), min(H, int((r+1)*base_h))
cv2.rectangle(vis, (bx0, by0), (bx1, by1), (255, 255, 255), 1)
# 마스크 + 순번 레이블
font = cv2.FONT_HERSHEY_SIMPLEX
font_sc = max(0.4, min(W, H) / 8000)
thickness = max(1, int(font_sc * 2))
for i, cat in enumerate(categories):
color = tuple(cat["color_bgr"])
prefix = cat["name"][:3].upper() # 예: RAI, CAT, BRA …
for seq, s in enumerate(buckets[i], start=1):
pts = np.array(s["points"], dtype=np.int32)
overlay = vis.copy()
cv2.fillPoly(overlay, [pts], color)
cv2.addWeighted(overlay, 0.35, vis, 0.65, 0, vis)
cv2.polylines(vis, [pts], True, color, 2)
# 무게중심에 순번 표시
cx = int(np.mean(pts[:, 0]))
cy = int(np.mean(pts[:, 1]))
score = float(s.get("score", 0.0))
label = f"{prefix}{seq:03d} {score:.2f}"
(tw, th), _ = cv2.getTextSize(label, font, font_sc, thickness)
tx, ty = cx - tw // 2, cy + th // 2
# 배경 박스는 검정, 텍스트는 흰색으로 변경
cv2.rectangle(vis, (tx - 2, ty - th - 2), (tx + tw + 2, ty + 2),
(0, 0, 0), -1)
cv2.putText(vis, label, (tx, ty), font, font_sc,
(255, 255, 255), thickness, cv2.LINE_AA)
# 범례
lx, ly = W - 280, 20
panel_h = len(categories) * 24 + 10
vis[0:panel_h, lx-8:W] = (vis[0:panel_h, lx-8:W] * 0.35).astype(np.uint8)
for i, cat in enumerate(categories):
color = tuple(cat["color_bgr"])
prefix = cat["name"][:3].upper()
cv2.rectangle(vis, (lx, ly-13), (lx+15, ly+3), color, -1)
cv2.putText(vis, f"[{prefix}] {cat['name']} ({len(buckets[i])})",
(lx+20, ly), cv2.FONT_HERSHEY_SIMPLEX, 0.50, color, 1, cv2.LINE_AA)
ly += 24
return vis
# ── 메인 ──────────────────────────────────────────────────────────────────────
def main():
ap = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
description=__doc__)
ap.add_argument("--input", required=True, help="입력 이미지 경로")
ap.add_argument("--output", default=None, help="출력 이미지 경로 (기본: 입력파일명_out.jpg)")
ap.add_argument("--categories", default=None, help="카테고리 JSON 경로 (기본: 내장 10개)")
ap.add_argument("--tiles", default="all", help="처리할 타일 번호: 9-24 / 1,5,9 / all (기본: all)")
ap.add_argument("--cols", type=int, default=8, help="가로 타일 수 (기본: 8)")
ap.add_argument("--rows", type=int, default=6, help="세로 타일 수 (기본: 6)")
ap.add_argument("--overlap", type=float, default=0.10, help="타일 중복 비율 (기본: 0.10)")
ap.add_argument("--conf", type=float, default=0.20, help="SAM3 신뢰도 임계값 (기본: 0.20)")
ap.add_argument("--workers", type=int, default=4, help="병렬 스레드 수 (기본: 4)")
ap.add_argument("--save-labels", action="store_true", help="YOLO 폴리곤 포맷 .txt 라벨 파일 저장")
ap.add_argument("--save-json", action="store_true", help="AnyLabeling JSON 포맷 저장 (railway.json 동일 양식)")
args = ap.parse_args()
img_path = Path(args.input)
buf = np.fromfile(str(img_path), dtype=np.uint8)
image_bgr = cv2.imdecode(buf, cv2.IMREAD_COLOR)
if image_bgr is None:
print(f"이미지 로드 실패: {img_path}"); return
H, W = image_bgr.shape[:2]
total_tiles = args.cols * args.rows
tile_filter = parse_tiles(args.tiles, total_tiles)
categories = load_categories(args.categories)
combined_prompt = build_combined_prompt(categories)
# cross-class NMS IoU: JSON > 기본값 0.45
cc_iou = 0.45
if args.categories:
raw = json.loads(Path(args.categories).read_text(encoding="utf-8"))
cc_iou = raw.get("cross_class_nms_iou", cc_iou)
# SAM3 호출용 conf = 모든 카테고리 conf 중 최솟값 (낮은 쪽부터 받아 후처리로 필터)
sam3_conf = min(cat.get("conf", args.conf) for cat in categories)
print(f"이미지 : {W}×{H}")
print(f"타일 그리드: {args.cols}×{args.rows}={total_tiles}개 | 처리 대상: {sorted(tile_filter)}")
print(f"카테고리 : {len(categories)}개 | 중복: {args.overlap*100:.0f}%")
print(f"SAM3 conf : {sam3_conf} (전체 최솟값) | cross-class NMS IoU: {cc_iou}")
print(f"SAM3 호출 : {len(tile_filter)}회 | 병렬: {args.workers}스레드\n")
t0 = time.time()
all_shapes = detect_tiled(image_bgr, args.cols, args.rows, args.overlap,
sam3_conf, args.workers, tile_filter, combined_prompt)
print(f"전체 검출 {len(all_shapes)}개 → 분류 + per-class conf 필터 + NMS...")
buckets = [[] for _ in categories]
unmatched = 0
for s in all_shapes:
idx = label_to_category(s.get("label", ""), categories)
if idx < 0:
unmatched += 1
continue
# per-class conf 필터
cat_conf = categories[idx].get("conf", args.conf)
if float(s.get("score", 0.0)) < cat_conf:
continue
buckets[idx].append(s)
# 클래스 내 NMS
print(" [1] 클래스 내 NMS")
for i, cat in enumerate(categories):
before = len(buckets[i])
buckets[i] = nms_shapes(buckets[i])
print(f" {cat['name']:18s}: {before:3d}{len(buckets[i]):3d}개 (conf≥{cat.get('conf', args.conf)})")
# 클래스 간 NMS
total_before = sum(len(b) for b in buckets)
print(f" [2] cross-class NMS (IoU≥{cc_iou})")
buckets = cross_class_nms(buckets, categories, cc_iou)
total_after = sum(len(b) for b in buckets)
print(f" {total_before}개 → {total_after}")
if unmatched:
print(f" (미분류/conf미달 {unmatched}개 제외)")
print(f"\n완료: {time.time()-t0:.0f}")
vis = draw_detections(image_bgr, buckets, categories,
tile_filter, args.cols, args.rows, args.overlap)
h, w = vis.shape[:2]
if max(h, w) > 4096:
s = 4096 / max(h, w)
vis = cv2.resize(vis, (int(w*s), int(h*s)))
if args.output:
out_path = Path(args.output)
out_path.parent.mkdir(parents=True, exist_ok=True)
else:
tile_tag = args.tiles.replace(",", "_").replace("-", "to")
cat_tag = Path(args.categories).stem if args.categories else "default"
out_dir = Path("output") / "detect" / img_path.stem
out_dir.mkdir(parents=True, exist_ok=True)
base_name = f"tiles{tile_tag}_{cat_tag}"
n = 1
while True:
out_path = out_dir / f"{base_name}_{n:03d}.jpg"
if not out_path.exists():
break
n += 1
cv2.imencode(".jpg", vis, [cv2.IMWRITE_JPEG_QUALITY, 93])[1].tofile(str(out_path))
print(f"저장: {out_path}")
if args.save_labels:
label_path = out_path.with_suffix(".txt")
with open(label_path, "w", encoding="utf-8") as f:
for cls_idx, shapes in enumerate(buckets):
for s in shapes:
pts_norm = [[px / W, py / H] for px, py in s["points"]]
coords = " ".join(f"{x:.6f} {y:.6f}" for x, y in pts_norm)
f.write(f"{cls_idx} {coords}\n")
print(f"라벨 저장: {label_path}")
if args.save_json:
import json as _json
json_shapes = []
for cls_idx, shapes in enumerate(buckets):
cat_name = categories[cls_idx]["name"] if cls_idx < len(categories) else str(cls_idx)
for s in shapes:
json_shapes.append({
"label": cat_name,
"score": float(s.get("score", 0.0)),
"points": [[float(px), float(py)] for px, py in s["points"]],
"group_id": None,
"description": None,
"shape_type": "polygon",
"flags": None,
})
anylabel = {
"version": "3.3.9",
"flags": {},
"shapes": json_shapes,
"imagePath": img_path.name,
"imageData": None,
"imageHeight": H,
"imageWidth": W,
}
json_path = out_path.with_suffix(".json")
json_path.write_text(_json.dumps(anylabel, ensure_ascii=False, indent=2),
encoding="utf-8")
print(f"JSON 저장: {json_path}")
if __name__ == "__main__":
main()