Files
BaronSSO/baron-sso/scripts/map_wasm_stack.py

241 lines
7.6 KiB
Python

#!/usr/bin/env python3
"""
WASM 스택의 `wasm-function[IDX]:0xOFFSET`를 이름/소스 라인으로 매핑합니다.
사용 예시:
python3 scripts/map_wasm_stack.py \
--wasm userfront/build/web/main.dart.wasm \
--sourcemap userfront/build/web/main.dart.wasm.map \
--frame "19112:0x2cd913" --frame "765:0x10af0e"
"""
from __future__ import annotations
import argparse
import bisect
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple
BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
BASE64_MAP = {c: i for i, c in enumerate(BASE64_CHARS)}
def read_u32_leb128(buf: bytes, i: int) -> Tuple[int, int]:
value = 0
shift = 0
while True:
b = buf[i]
i += 1
value |= (b & 0x7F) << shift
if b < 0x80:
return value, i
shift += 7
def decode_vlq_segment(segment: str) -> List[int]:
out: List[int] = []
i = 0
while i < len(segment):
shift = 0
value = 0
while True:
d = BASE64_MAP[segment[i]]
i += 1
value |= (d & 0x1F) << shift
shift += 5
if (d & 0x20) == 0:
break
sign = value & 1
value >>= 1
out.append(-value if sign else value)
return out
@dataclass
class SourcePoint:
generated_col: int
source_index: Optional[int]
source_line: Optional[int]
source_col: Optional[int]
name_index: Optional[int]
class WasmSourceMap:
def __init__(self, sourcemap_path: Path):
data = json.loads(sourcemap_path.read_text(encoding="utf-8"))
self.sources: List[str] = data["sources"]
self.names: List[str] = data.get("names", [])
mappings: str = data["mappings"]
# wasm sourcemap은 generated line 1개를 쓰는 형태라 ',' 단위로만 파싱합니다.
segments = mappings.split(",")
points: List[SourcePoint] = []
generated_col = 0
source_index = 0
source_line = 0
source_col = 0
name_index = 0
for seg in segments:
if not seg:
continue
vals = decode_vlq_segment(seg)
generated_col += vals[0]
si: Optional[int] = None
sl: Optional[int] = None
sc: Optional[int] = None
ni: Optional[int] = None
if len(vals) >= 4:
source_index += vals[1]
source_line += vals[2]
source_col += vals[3]
si = source_index
sl = source_line
sc = source_col
if len(vals) >= 5:
name_index += vals[4]
ni = name_index
points.append(
SourcePoint(
generated_col=generated_col,
source_index=si,
source_line=sl,
source_col=sc,
name_index=ni,
)
)
self.points = points
self.columns = [p.generated_col for p in points]
def lookup(self, offset: int) -> Optional[SourcePoint]:
idx = bisect.bisect_right(self.columns, offset) - 1
if idx < 0:
return None
return self.points[idx]
def source_name(self, index: Optional[int]) -> Optional[str]:
if index is None or index < 0 or index >= len(self.sources):
return None
return self.sources[index]
def symbol_name(self, index: Optional[int]) -> Optional[str]:
if index is None or index < 0 or index >= len(self.names):
return None
return self.names[index]
def parse_wasm_function_names(wasm_path: Path) -> Dict[int, str]:
b = wasm_path.read_bytes()
if b[:4] != b"\x00asm":
raise ValueError(f"Not a wasm binary: {wasm_path}")
function_names: Dict[int, str] = {}
i = 8 # magic + version
while i < len(b):
section_id = b[i]
i += 1
section_size, i = read_u32_leb128(b, i)
section_start = i
section_end = i + section_size
if section_id == 0: # custom section
name_len, j = read_u32_leb128(b, i)
custom_name = b[j : j + name_len].decode("utf-8", errors="replace")
payload_start = j + name_len
if custom_name == "name":
k = payload_start
while k < section_end:
subsection_id = b[k]
k += 1
subsection_size, k = read_u32_leb128(b, k)
subsection_end = k + subsection_size
if subsection_id == 1: # function names
count, k = read_u32_leb128(b, k)
for _ in range(count):
fn_idx, k = read_u32_leb128(b, k)
nlen, k = read_u32_leb128(b, k)
name = b[k : k + nlen].decode("utf-8", errors="replace")
k += nlen
function_names[fn_idx] = name
else:
k = subsection_end
i = section_end
return function_names
def parse_frame(raw: str) -> Tuple[int, int]:
m = re.match(r"^\s*(\d+)\s*:\s*(0x[0-9a-fA-F]+)\s*$", raw)
if not m:
raise ValueError(f"Invalid --frame format: {raw!r} (expected IDX:0xOFFSET)")
return int(m.group(1)), int(m.group(2), 16)
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Map wasm stack frames to source locations")
p.add_argument("--wasm", required=True, type=Path, help="WASM binary path")
p.add_argument("--sourcemap", required=True, type=Path, help="WASM sourcemap path")
p.add_argument(
"--frame",
action="append",
default=[],
help="Frame in IDX:0xOFFSET format (repeatable)",
)
p.add_argument(
"--offset",
action="append",
default=[],
help="Offset only (hex), function index unknown",
)
return p.parse_args()
def main() -> None:
args = parse_args()
source_map = WasmSourceMap(args.sourcemap)
function_names = parse_wasm_function_names(args.wasm)
targets: List[Tuple[Optional[int], int]] = []
for f in args.frame:
idx, off = parse_frame(f)
targets.append((idx, off))
for off in args.offset:
targets.append((None, int(off, 16)))
if not targets:
raise SystemExit("No targets. Provide --frame or --offset.")
for fn_idx, off in targets:
point = source_map.lookup(off)
fn_name = function_names.get(fn_idx) if fn_idx is not None else None
mapped_col = point.generated_col if point else None
src = source_map.source_name(point.source_index) if point else None
src_line = (point.source_line + 1) if point and point.source_line is not None else None
src_col = (point.source_col + 1) if point and point.source_col is not None else None
symbol = source_map.symbol_name(point.name_index) if point else None
print(
json.dumps(
{
"function_index": fn_idx,
"function_name": fn_name,
"offset_hex": hex(off),
"mapped_generated_col_hex": hex(mapped_col) if mapped_col is not None else None,
"source": src,
"source_line": src_line,
"source_column": src_col,
"symbol": symbol,
},
ensure_ascii=False,
)
)
if __name__ == "__main__":
main()