0%

IDAapi学习

Angr的力量是有极限的 所以我要转行学idaapi了!

获取二进制文件信息

获取硬件信息

inf_is_64bit() -> bool | inf_is_32bit_exactly() -> bool

判断当前IDA打开的二进制文件是64位或32位的

inf_is_be() -> bool

判断当前程序的模式是大端序(返回 True)或是小端序

inf_get_procname()[.lower()] -> str

返回程序框架名

1
2
3
4
5
6
7
8
9
{
== 'metapc' : 'ARCH_X86',
.startswith('arm') : ['ARCH_ARM64', 'ARCH_ARM'],
.startswith('sparc') : 'ARCH_SPARC',
.startswith('ppc') : 'ARCH_PPC'
.startswith('mips') : 'ARCH_MIPS'
.startswith('systemz') : 'ARCH_SYSTEMZ'
.startswith('s390x') : 'ARCH_SYSTEMZ'
}

idc.get_sreg(ea, reg) -> int

Thumb模式下在用户代码段处的第20个段寄存器存放的值是 1 通过这个可以判断当前ARM框架下的模式是否是Thumb模式:

1
2
3
4
5
6
def is_thumb(address):
return idc.get_sreg(address, 'T') == 1
# idc中获取寄存器可以使用字母代表 这里将'A'转化为0 则'T'转化为20
# <=>
def is_thumb(address):
return idaapi.get_sreg(address, 20) == 1

获取内存信息

is_mapped(ea) -> bool

判断地址ea是否是有效地址(在程序内)

get_bytes(ea, size[, gmb_flags=0x01]) -> bytes | get_wide_byte(ea) -> int

分别用来以大端序获取位于地址ea处的size个字节数据和1个字节的数据 除此之外还有包括获取1字 双字等的函数 但是这两个应该够用了

[2024 长城杯] tmaze

主函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
int __fastcall __noreturn main(int argc, const char **argv, const char **envp)
{
const char *in; // rsi
unsigned int v5; // eax
__int64 v6; // rcx
__int64 v7; // rdi
int v8; // eax
__int64 v9; // rcx
int v10; // eax
int v11; // eax
bool v12; // al
__int64 *v13; // rax
unsigned int step; // ebx
__int64 *map; // rcx
size_t _step; // r15
size_t len; // rax
int now; // edx
__int64 v19; // r14

in = argv[1];
v5 = 0;
do
v6 = v5++;
while ( in[v6] );
if ( v5 == 43 )
{
v7 = sub_7FF6C8B81230(&unk_7FF6C8BB7000, dword_7FF6C8BB7FA0);
mapz = v7;
v8 = 6;
v9 = v7;
do
{
v9 = **(v9 + 16);
--v8;
}
while ( v8 );
mapx = v9;
v10 = 6;
do
{
v7 = **(v7 + 16);
--v10;
}
while ( v10 );
v11 = 12;
do
{
v7 = *(v7 + 8);
--v11;
}
while ( v11 );
mapy = v7;
if ( argc != 1 )
{
v12 = *in == 0;
if ( *in )
{
step = 1;
map = mapx;
_step = 0LL;
do
{
now = in[_step];
switch ( now )
{
case 'z':
v19 = map[2];
if ( !v19 || *(map + 26) )
goto check;
break;
case 'y':
v19 = map[1];
if ( !v19 || *(map + 25) )
goto check;
break;
case 'x':
v19 = *map;
if ( !*map || *(map + 24) )
goto check;
break;
default:
goto check;
}
mapx = v19;
*(v19 + 27) = 1;
_step = step;
len = strlen(in);
++step;
map = v19;
v12 = len <= _step;
}
while ( !v12 );
}
check:
if ( v12 && mapx == v7 )
{
v13 = sub_7FF6C8B81770(&qword_7FF6C8BB92C0, "yes flag is flag{UUID(md5(your input))}");
sub_7FF6C8B81B70(v13);
}
}
}
exit(0);
}

前面的逻辑不解释 总之就是选择x, y, z来选择要走分支 然后根据这个位置有没有能解引用的地址 最终目标是让mapx到达mapy 每条分支除了要看能不能解引用还要看接下来的三个字节 为0代表对饮的分支可走

image-20240908202100528

这意味着这题如果手动找解的话要不停的telescope来手绘地图 这里就可以用到get_bytes()get_wide_byte()来轻松获取全部路径:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import idaapi, idc

starts = [0x1A9F16BD830]
way = {}

print('\n' * 100)

while starts:
next = []
for start in starts:
to_add = []
x = int.from_bytes(idaapi.get_bytes(start, 8), 'little')
y = int.from_bytes(idaapi.get_bytes(start+8, 8), 'little')
z = int.from_bytes(idaapi.get_bytes(start+16, 8), 'little')
to_go = [x, y, z]
is_x = idaapi.get_wide_byte(start + 24) ^ 1
is_y = idaapi.get_wide_byte(start + 25) ^ 1
is_z = idaapi.get_wide_byte(start + 26) ^ 1
can_go = [is_x, is_y, is_z]
to_add = [to_go[i] * can_go[i] for i in range(3)]
print(hex(start) + '->' + str([hex(x) for x in to_add]))
for path in way:
for i in range(3):
if to_add[i] and to_add[i] in way[path] or to_add[i] == path:
print(f'Deleting {hex(to_add[i])} for it in {[hex(x) for x in way[path]]}')
to_add[i] = 0
way[start] = to_add
next += [x for x in to_add if x]
starts = next
with open('XXXX', 'w') as f:
for path in way:
f.write(f'{hex(path)} -> {[hex(x) for x in way[path]]}\n')

得到result:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
0x1a9f16bd830 -> ['0x0', '0x1a9f16bd880', '0x1a9f16bdc90']
0x1a9f16bd880 -> ['0x0', '0x1a9f16bd8d0', '0x1a9f16bd4c0']
0x1a9f16bdc90 -> ['0x1a9f16bdc40', '0x0', '0x0']
0x1a9f16bd8d0 -> ['0x0', '0x0', '0x1a9f16bdd30']
0x1a9f16bd4c0 -> ['0x0', '0x1a9f16bd510', '0x0']
0x1a9f16bdc40 -> ['0x0', '0x0', '0x1a9f16be140']
0x1a9f16bdd30 -> ['0x0', '0x1a9f16bdd80', '0x0']
0x1a9f16bd510 -> ['0x0', '0x1a9f16bd560', '0x1a9f16bd1f0']
0x1a9f16be140 -> ['0x1a9f16be0f0', '0x0', '0x0']
0x1a9f16bdd80 -> ['0x0', '0x1a9f16bddd0', '0x1a9f16be280']
0x1a9f16bd560 -> ['0x0', '0x0', '0x1a9f16bd920']
0x1a9f16bd1f0 -> ['0x0', '0x1a9f16bd240', '0x0']
0x1a9f16be0f0 -> ['0x0', '0x0', '0x1a9f16be690']
0x1a9f16bddd0 -> ['0x0', '0x0', '0x0']
0x1a9f16be280 -> ['0x1a9f16be230', '0x0', '0x0']
0x1a9f16bd920 -> ['0x0', '0x1a9f16bd970', '0x0']
0x1a9f16bd240 -> ['0x0', '0x0', '0x1a9f16bcfc0']
0x1a9f16be690 -> ['0x1a9f16be640', '0x1a9f16be6e0', '0x0']
0x1a9f16be230 -> ['0x0', '0x0', '0x1a9f16be7d0']
0x1a9f16bd970 -> ['0x0', '0x1a9f16bd9c0', '0x0']
0x1a9f16bcfc0 -> ['0x0', '0x1a9f16bd010', '0x0']
0x1a9f16be640 -> ['0x0', '0x0', '0x0']
0x1a9f16be6e0 -> ['0x0', '0x0', '0x0']
0x1a9f16be7d0 -> ['0x1a9f16be780', '0x1a9f16be820', '0x0']
0x1a9f16bd9c0 -> ['0x0', '0x1a9f16bda10', '0x0']
0x1a9f16bd010 -> ['0x0', '0x0', '0x1a9f16bca60']
0x1a9f16be780 -> ['0x1a9f16be730', '0x0', '0x0']
0x1a9f16be820 -> ['0x0', '0x0', '0x0']
0x1a9f16bda10 -> ['0x0', '0x0', '0x0']
0x1a9f16bca60 -> ['0x0', '0x1a9f16bcab0', '0x0']
0x1a9f16be730 -> ['0x0', '0x0', '0x1a9f16be190']
0x1a9f16bcab0 -> ['0x0', '0x1a9f16bcb00', '0x1a9f16bc970']
0x1a9f16be190 -> ['0x0', '0x1a9f16be1e0', '0x0']
0x1a9f16bcb00 -> ['0x0', '0x0', '0x1a9f16bd0b0']
0x1a9f16bc970 -> ['0x0', '0x1a9f16bc9c0', '0x0']
0x1a9f16be1e0 -> ['0x0', '0x0', '0x1a9f16bdce0']
0x1a9f16bd0b0 -> ['0x1a9f16bd060', '0x0', '0x0']
0x1a9f16bc9c0 -> ['0x0', '0x1a9f16bca10', '0x1a9f16bc920']
0x1a9f16bdce0 -> ['0x0', '0x0', '0x0']
0x1a9f16bd060 -> ['0x0', '0x0', '0x1a9f16bd2e0']
0x1a9f16bca10 -> ['0x0', '0x0', '0x1a9f16bcb50']
0x1a9f16bc920 -> ['0x0', '0x0', '0x0']
0x1a9f16bd2e0 -> ['0x1a9f16bd290', '0x1a9f16bd330', '0x0']
0x1a9f16bcb50 -> ['0x0', '0x1a9f16bcba0', '0x0']
0x1a9f16bd290 -> ['0x0', '0x0', '0x1a9f16bd5b0']
0x1a9f16bd330 -> ['0x0', '0x1a9f16bd380', '0x0']
0x1a9f16bcba0 -> ['0x0', '0x0', '0x1a9f16bd150']
0x1a9f16bd5b0 -> ['0x0', '0x1a9f16bd600', '0x0']
0x1a9f16bd380 -> ['0x0', '0x1a9f16bd3d0', '0x0']
0x1a9f16bd150 -> ['0x1a9f16bd100', '0x1a9f16bd1a0', '0x0']
0x1a9f16bd600 -> ['0x0', '0x1a9f16bd650', '0x0']
0x1a9f16bd3d0 -> ['0x0', '0x0', '0x1a9f16bd6f0']
0x1a9f16bd100 -> ['0x0', '0x0', '0x0']
0x1a9f16bd1a0 -> ['0x0', '0x0', '0x1a9f16bd420']
0x1a9f16bd650 -> ['0x0', '0x0', '0x0']
0x1a9f16bd6f0 -> ['0x1a9f16bd6a0', '0x0', '0x0']
0x1a9f16bd420 -> ['0x0', '0x1a9f16bd470', '0x0']
0x1a9f16bd6a0 -> ['0x0', '0x0', '0x1a9f16bda60']
0x1a9f16bd470 -> ['0x0', '0x0', '0x0']
0x1a9f16bda60 -> ['0x0', '0x1a9f16bdab0', '0x0']
0x1a9f16bdab0 -> ['0x0', '0x1a9f16bdb00', '0x1a9f16bdf10']
0x1a9f16bdb00 -> ['0x0', '0x0', '0x1a9f16bd740']
0x1a9f16bdf10 -> ['0x1a9f16bdec0', '0x0', '0x0']
0x1a9f16bd740 -> ['0x0', '0x1a9f16bd790', '0x0']
0x1a9f16bdec0 -> ['0x1a9f16bde70', '0x0', '0x1a9f16be3c0']
0x1a9f16bd790 -> ['0x0', '0x1a9f16bd7e0', '0x0']
0x1a9f16bde70 -> ['0x1a9f16bde20', '0x0', '0x0']
0x1a9f16be3c0 -> ['0x0', '0x1a9f16be410', '0x0']
0x1a9f16bd7e0 -> ['0x0', '0x0', '0x0']
0x1a9f16bde20 -> ['0x0', '0x0', '0x1a9f16be320']
0x1a9f16be410 -> ['0x0', '0x1a9f16be460', '0x0']
0x1a9f16be320 -> ['0x1a9f16be2d0', '0x1a9f16be370', '0x0']
0x1a9f16be460 -> ['0x0', '0x1a9f16be4b0', '0x1a9f16bdf60']
0x1a9f16be2d0 -> ['0x0', '0x0', '0x1a9f16be870']
0x1a9f16be370 -> ['0x0', '0x0', '0x0']
0x1a9f16be4b0 -> ['0x0', '0x0', '0x0']
0x1a9f16bdf60 -> ['0x0', '0x0', '0x0']
0x1a9f16be870 -> ['0x0', '0x1a9f16be8c0', '0x0']
0x1a9f16be8c0 -> ['0x0', '0x1a9f16be910', '0x0']
0x1a9f16be910 -> ['0x0', '0x1a9f16be960', '0x0']
0x1a9f16be960 -> ['0x0', '0x1a9f16be9b0', '0x0']
0x1a9f16be9b0 -> ['0x0', '0x1a9f16bffd0', '0x0']
0x1a9f16bffd0 -> ['0x0', '0x1a9f16c0020', '0x0']
0x1a9f16c0020 -> ['0x0', '0x1a9f16c0070', '0x0']
0x1a9f16c0070 -> ['0x0', '0x1a9f16c00c0', '0x0']
0x1a9f16c00c0 -> ['0x0', '0x1a9f16c0110', '0x1a9f16be550']
0x1a9f16c0110 -> ['0x0', '0x1a9f16c0160', '0x0']
0x1a9f16be550 -> ['0x1a9f16be500', '0x1a9f16be5a0', '0x0']
0x1a9f16c0160 -> ['0x0', '0x1a9f16c01b0', '0x1a9f16be5f0']
0x1a9f16be500 -> ['0x0', '0x0', '0x1a9f16be000']
0x1a9f16be5a0 -> ['0x0', '0x0', '0x1a9f16be0a0']
0x1a9f16c01b0 -> ['0x0', '0x0', '0x0']
0x1a9f16be5f0 -> ['0x0', '0x0', '0x0']
0x1a9f16be000 -> ['0x1a9f16bdfb0', '0x0', '0x0']
0x1a9f16be0a0 -> ['0x1a9f16be050', '0x0', '0x0']
0x1a9f16bdfb0 -> ['0x0', '0x0', '0x1a9f16bdb50']
0x1a9f16be050 -> ['0x0', '0x0', '0x0']
0x1a9f16bdb50 -> ['0x0', '0x1a9f16bdba0', '0x0']
0x1a9f16bdba0 -> ['0x0', '0x1a9f16bdbf0', '0x0']
0x1a9f16bdbf0 -> ['0x0', '0x0', '0x0']

与IDA交互

让IDA生成信息更具可操作性

get_name_value(_from, name) -> (typ, value) | get_dtype_size(dtype)

获取IDA生成信息对应的值和值的类型 例如:

image-20240904220744244

1
2
3
4
# test.py
import idaapi, idc
print(f"Type:{idaapi.get_name_value(0x40D2C2, 'var_18')[0]}\nValue:{hex(idaapi.get_name_value(0x40D2C2, 'var_18')[1])}")
print(f'NT_SEG:{idaapi.NT_SEG}, NT_NONE:{idaapi.NT_NONE}')

image-20240904220843115

其中typ以及后面会出现的dtype成员代表该数据的类型 参考IDA SDK: Operand value types 如果传入的name在程序未开始运行时不存在值时返回的value是-1且typNT_NONE(0x0) 否则value为这个IDA名称所对应的真实值 可以用来编写汇编代码 若不确定_from的话应该使用idc.BADADDR占位

get_dtype_size(dtype) 返回某个dtype代号对应的数据类型长度(in bytes)

get_item_head(ea) -> int | get_item_size(ea) -> int

get_item_head用于获取IDA中某个地址所在Item的首地址 而get_item_size用于获取某个地址距离所在Item结尾地址的长度 例如汇编指令:

image-20240905134621494

1
2
3
import idaapi, idc
print(f'The Start address of the ITEM at 0x40D2A8 : {hex(idaapi.get_item_head(0x40D2A8))}')
print(f'The lenth of the rest of the ITEM at 0x40D2A9 : {hex(idaapi.get_item_size(0x40D2A9))}')

image-20240905194255927

class insn_t() | decode_insn(insn, ea) -> int

insn_t对象实例化时不需要也不能传递任何参数 实例化出来的对象所有的成员都被初始化成一个固定的初始值(大部分是-1) 使用decode_insn()传入已经实例化的insn_t对象和要解析的指令的首地址 函数返回指令的长度(in bytes) 此时insn就记录了这条指令的所有信息 其中最重要的是这条指令操作数(op_t) 操作数对象还包含了更多相关信息 介绍一下其中两个比较常用的成员

insn.ops[n].dtype : 操作数的数据类别 参考IDA SDK: Operand value types

insn.ops[n].type : 操作数的类别 参考IDA SDK: Operand types

insn.ops[n].value : 如果该操作数是立即数 返回这个值

get_func_attr(ea, attr) -> any | set_func_addr(ea, attr, value) -> int

分别用于获取和设置ea地址所在的函数的attr属性 设置属性时返回1表示成功返回0表示失败

这些属性是:

Attribution (idc.) Value
FUNCATTR_ARGSIZE 28
FUNCATTR_COLOR 36
FUNCATTR_END 4
FUNCATTR_FLAGS 8
FUNCATTR_FPD 32
FUNCATTR_FRAME 16
FUNCATTR_FRREGS 24
FUNCATTR_FRSIZE 20
FUNCATTR_OWNER 16
FUNCATTR_REFQTY 20
FUNCATTR_START 0
FUNC_BOTTOMBP 256
FUNC_FAR 2
FUNC_FRAME 16
FUNC_HIDDEN 64
FUNC_LIB 4
FUNC_LUMINA 65536
FUNC_NORET 1
FUNC_NORET_PENDING 512
FUNC_OUTLINE 131072
FUNC_PURGED_OK 16384
FUNC_SP_READY 1024
FUNC_STATIC 8
FUNC_TAIL 32768
FUNC_THUNK 128
FUNC_USERFAR 32

对二进制文件进行操作

patch_byte(ea, x) -> bool | patch_bytes(ea, buf) -> bool

分别用于将ea处的1个和多个字节patch成目标字节x或目标字节串buf 返回patch的结果是否成功

用IDA去虚拟跳转混淆

之前使用Angr符号执行的方式去虚拟跳转时提到过 IDA可以直接计算出跳转的终点 可以利用这个特性来大大简化去除混淆的过程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import idaapi, idc, idautils
from keystone import *

print('\n' * 20)
ks = Ks(KS_ARCH_ARM, KS_MODE_THUMB)

funcs = list(idautils.Functions())
for i in range(len(funcs)):
ea = funcs[i]
end = idc.get_func_attr(funcs[i], idc.FUNCATTR_END)
while ea < end:
opcode = idc.GetDisasm(ea).split()
if opcode[0] == 'MOV' and opcode[1] == 'PC,':
new_op = f"B {'0x' + opcode[-1].partition('_')[2]}"
new_opcode = ks.asm(new_op, ea - 2)[0]
NOP = ks.asm('NOP')[0]
# print(f"0x{ea:08x}: {idc.GetDisasm(ea)} -> {new_op} with {new_opcode}")
idaapi.patch_bytes(ea - 2, bytes(NOP * 2))
idaapi.patch_bytes(ea - 2, bytes(new_opcode))
lenth = idaapi.decode_insn(idaapi.insn_t(), ea)
ea += max(lenth, 2)