forked from baron/baron-sso
241 lines
7.6 KiB
Python
241 lines
7.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
WASM 스택의 `wasm-function[IDX]:0xOFFSET`를 이름/소스 라인으로 매핑합니다.
|
|
|
|
사용 예시:
|
|
python3 scripts/map_wasm_stack.py \
|
|
--wasm userfront/build/web/main.dart.wasm \
|
|
--sourcemap userfront/build/web/main.dart.wasm.map \
|
|
--frame "19112:0x2cd913" --frame "765:0x10af0e"
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import bisect
|
|
import json
|
|
import re
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
|
|
BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
|
BASE64_MAP = {c: i for i, c in enumerate(BASE64_CHARS)}
|
|
|
|
|
|
def read_u32_leb128(buf: bytes, i: int) -> Tuple[int, int]:
|
|
value = 0
|
|
shift = 0
|
|
while True:
|
|
b = buf[i]
|
|
i += 1
|
|
value |= (b & 0x7F) << shift
|
|
if b < 0x80:
|
|
return value, i
|
|
shift += 7
|
|
|
|
|
|
def decode_vlq_segment(segment: str) -> List[int]:
|
|
out: List[int] = []
|
|
i = 0
|
|
while i < len(segment):
|
|
shift = 0
|
|
value = 0
|
|
while True:
|
|
d = BASE64_MAP[segment[i]]
|
|
i += 1
|
|
value |= (d & 0x1F) << shift
|
|
shift += 5
|
|
if (d & 0x20) == 0:
|
|
break
|
|
sign = value & 1
|
|
value >>= 1
|
|
out.append(-value if sign else value)
|
|
return out
|
|
|
|
|
|
@dataclass
|
|
class SourcePoint:
|
|
generated_col: int
|
|
source_index: Optional[int]
|
|
source_line: Optional[int]
|
|
source_col: Optional[int]
|
|
name_index: Optional[int]
|
|
|
|
|
|
class WasmSourceMap:
|
|
def __init__(self, sourcemap_path: Path):
|
|
data = json.loads(sourcemap_path.read_text(encoding="utf-8"))
|
|
self.sources: List[str] = data["sources"]
|
|
self.names: List[str] = data.get("names", [])
|
|
mappings: str = data["mappings"]
|
|
# wasm sourcemap은 generated line 1개를 쓰는 형태라 ',' 단위로만 파싱합니다.
|
|
segments = mappings.split(",")
|
|
|
|
points: List[SourcePoint] = []
|
|
generated_col = 0
|
|
source_index = 0
|
|
source_line = 0
|
|
source_col = 0
|
|
name_index = 0
|
|
|
|
for seg in segments:
|
|
if not seg:
|
|
continue
|
|
vals = decode_vlq_segment(seg)
|
|
generated_col += vals[0]
|
|
si: Optional[int] = None
|
|
sl: Optional[int] = None
|
|
sc: Optional[int] = None
|
|
ni: Optional[int] = None
|
|
if len(vals) >= 4:
|
|
source_index += vals[1]
|
|
source_line += vals[2]
|
|
source_col += vals[3]
|
|
si = source_index
|
|
sl = source_line
|
|
sc = source_col
|
|
if len(vals) >= 5:
|
|
name_index += vals[4]
|
|
ni = name_index
|
|
points.append(
|
|
SourcePoint(
|
|
generated_col=generated_col,
|
|
source_index=si,
|
|
source_line=sl,
|
|
source_col=sc,
|
|
name_index=ni,
|
|
)
|
|
)
|
|
self.points = points
|
|
self.columns = [p.generated_col for p in points]
|
|
|
|
def lookup(self, offset: int) -> Optional[SourcePoint]:
|
|
idx = bisect.bisect_right(self.columns, offset) - 1
|
|
if idx < 0:
|
|
return None
|
|
return self.points[idx]
|
|
|
|
def source_name(self, index: Optional[int]) -> Optional[str]:
|
|
if index is None or index < 0 or index >= len(self.sources):
|
|
return None
|
|
return self.sources[index]
|
|
|
|
def symbol_name(self, index: Optional[int]) -> Optional[str]:
|
|
if index is None or index < 0 or index >= len(self.names):
|
|
return None
|
|
return self.names[index]
|
|
|
|
|
|
def parse_wasm_function_names(wasm_path: Path) -> Dict[int, str]:
|
|
b = wasm_path.read_bytes()
|
|
if b[:4] != b"\x00asm":
|
|
raise ValueError(f"Not a wasm binary: {wasm_path}")
|
|
|
|
function_names: Dict[int, str] = {}
|
|
i = 8 # magic + version
|
|
|
|
while i < len(b):
|
|
section_id = b[i]
|
|
i += 1
|
|
section_size, i = read_u32_leb128(b, i)
|
|
section_start = i
|
|
section_end = i + section_size
|
|
|
|
if section_id == 0: # custom section
|
|
name_len, j = read_u32_leb128(b, i)
|
|
custom_name = b[j : j + name_len].decode("utf-8", errors="replace")
|
|
payload_start = j + name_len
|
|
if custom_name == "name":
|
|
k = payload_start
|
|
while k < section_end:
|
|
subsection_id = b[k]
|
|
k += 1
|
|
subsection_size, k = read_u32_leb128(b, k)
|
|
subsection_end = k + subsection_size
|
|
if subsection_id == 1: # function names
|
|
count, k = read_u32_leb128(b, k)
|
|
for _ in range(count):
|
|
fn_idx, k = read_u32_leb128(b, k)
|
|
nlen, k = read_u32_leb128(b, k)
|
|
name = b[k : k + nlen].decode("utf-8", errors="replace")
|
|
k += nlen
|
|
function_names[fn_idx] = name
|
|
else:
|
|
k = subsection_end
|
|
|
|
i = section_end
|
|
return function_names
|
|
|
|
|
|
def parse_frame(raw: str) -> Tuple[int, int]:
|
|
m = re.match(r"^\s*(\d+)\s*:\s*(0x[0-9a-fA-F]+)\s*$", raw)
|
|
if not m:
|
|
raise ValueError(f"Invalid --frame format: {raw!r} (expected IDX:0xOFFSET)")
|
|
return int(m.group(1)), int(m.group(2), 16)
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
p = argparse.ArgumentParser(description="Map wasm stack frames to source locations")
|
|
p.add_argument("--wasm", required=True, type=Path, help="WASM binary path")
|
|
p.add_argument("--sourcemap", required=True, type=Path, help="WASM sourcemap path")
|
|
p.add_argument(
|
|
"--frame",
|
|
action="append",
|
|
default=[],
|
|
help="Frame in IDX:0xOFFSET format (repeatable)",
|
|
)
|
|
p.add_argument(
|
|
"--offset",
|
|
action="append",
|
|
default=[],
|
|
help="Offset only (hex), function index unknown",
|
|
)
|
|
return p.parse_args()
|
|
|
|
|
|
def main() -> None:
|
|
args = parse_args()
|
|
source_map = WasmSourceMap(args.sourcemap)
|
|
function_names = parse_wasm_function_names(args.wasm)
|
|
|
|
targets: List[Tuple[Optional[int], int]] = []
|
|
for f in args.frame:
|
|
idx, off = parse_frame(f)
|
|
targets.append((idx, off))
|
|
for off in args.offset:
|
|
targets.append((None, int(off, 16)))
|
|
|
|
if not targets:
|
|
raise SystemExit("No targets. Provide --frame or --offset.")
|
|
|
|
for fn_idx, off in targets:
|
|
point = source_map.lookup(off)
|
|
fn_name = function_names.get(fn_idx) if fn_idx is not None else None
|
|
mapped_col = point.generated_col if point else None
|
|
src = source_map.source_name(point.source_index) if point else None
|
|
src_line = (point.source_line + 1) if point and point.source_line is not None else None
|
|
src_col = (point.source_col + 1) if point and point.source_col is not None else None
|
|
symbol = source_map.symbol_name(point.name_index) if point else None
|
|
|
|
print(
|
|
json.dumps(
|
|
{
|
|
"function_index": fn_idx,
|
|
"function_name": fn_name,
|
|
"offset_hex": hex(off),
|
|
"mapped_generated_col_hex": hex(mapped_col) if mapped_col is not None else None,
|
|
"source": src,
|
|
"source_line": src_line,
|
|
"source_column": src_col,
|
|
"symbol": symbol,
|
|
},
|
|
ensure_ascii=False,
|
|
)
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|