Baby MIPS

感谢奈沙夜影师傅!
IDA打开发现几个字符串结构都很清晰,提供16个变量,然后进行16次方程校验,但是运行会发现在中间就因为段错误而异常,尝试许久以后发现几个不太对劲的指令,突兀出现的t, sp, 跳转等等的机器码都为EB02开头,猜测为花指令,于是使用IDC脚本去花。

注意MIPS为定长指令集,每个指令都为4字节,因此需要固定监测指令的头部,否则可能会误清除掉正常指令,例如方程参数的赋值 (╯‵□′)╯︵┻━┻

#include <idc.idc>
static matchBytes(StartAddr, Match) 
{ 
auto Len, i, PatSub, SrcSub; 
Len = strlen(Match);
while (i < Len) 
{ 
   PatSub = substr(Match, i, i+1); 
   SrcSub = form("%02X", Byte(StartAddr)); 
   SrcSub = substr(SrcSub, i % 2, (i % 2) + 1); 
   if (PatSub != "?" && PatSub != SrcSub) 
   { 
    return 0; 
   } 
   if (i % 2 == 1) 
   { 
    StartAddr++; 
   } 
   i++; 
}
return 1; 
}
static main() 
{ 
   auto StartVa, SavedStartVa, StopVa, Size, i, j;
StartVa = 0x400420; 
StopVa = 0x403233;
Size = StopVa - StartVa; 
SavedStartVa = StartVa;
for (i = 0; i < Size/4; i++) 
{ 
   if (matchBytes(StartVa, "EB02????")) 
   { 
    Message("Find%x:%02x%02x%02x%02xn", StartVa,Byte(StartVa),Byte(StartVa+1),Byte(StartVa+2),Byte(StartVa+3));
    for (j = 0; j < 4; j++) 
    { 
     PatchByte(StartVa, 0x00); 
     MakeCode(StartVa); 
     StartVa++; 
    } 
   } 
    else
    StartVa=StartVa+4; 
}
AnalyzeArea(SavedStartVa, StopVa); 
Message("Clear eb02 Opcode Ok "); 
}

去花后再次分析即可得到清晰的赋值和check过程

有三种求解方法:

方法一:简单粗暴反汇编

写了一个伪执行汇编的py脚本来得到参数,最后清洗一下即可得到方程,通过z3限制BitVec即可跑出整数解

f = open("code.txt", "r")
flower = ["slti", "sdc1"]
a0 = 0x76ff270
v0 = 0xd0000
v1 = 8
fp = [0 for i in range(0x500)]
table = [0x0, 0x42d1f0, 0x0, 0x42d1f0,
0xa, 0xa, 0x0, 0x9,
0x4250bc, 0x9, 0x426630, 0x42d1f0,
0x40a3ec, 0x37343431, 0x363434, 0x0,
0x0, 0x42d1f0, 0x0, 0x4250bc,
0x0, 0x0, 0x425060, 0x42d1f0,
0x403ad0, 0x0, 0x0, 0x1000,
0x425088, 0x76fff184, 0x412fcd, 0x1,
0x410570, 0x425190, 0x40ca48, 0x0,
0x0, 0x42d1f0, 0x0, 0x42d1f0,
0x425088, 0xffffffff, 0x4106c4, 0xffffffff,
0x76fff184, 0x412fcd, 0x1, 0x42d1f0,
0x0, 0x425088, 0x40ccac, 0x0,
0x0, 0x0, 0x0, 0x42d1f0,
0x0, 0x425190, 0x76ffeef8, 0x425190,
0x10, 0x425088, 0x40baac, 0x42d1f0,
0x412fcd, 0x1, 0x425088, 0x40baac,
0x76fff184, 0x412fce, 0x40b684, 0x0,
0x0, 0x0, 0x0, 0x42d1f0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x42d1f0, 0x0, 0x42d1f0,
0x0, 0x4250bc, 0x413081, 0x9,
0x403f24, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x42d1f0,
0x0, 0x413078, 0x0, 0x0,
0x0, 0x0, 0xd0000, 0xf1f4,
0xcf8, 0xf5f1, 0x7883, 0xe2c6,
0x67, 0xeccc, 0xc630, 0xba2e,
0x6e41, 0x641d, 0x716d, 0x4505,
0x76fff224, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0xfffffffe, 0x0,
0x76fff2ac, 0x412fcd, 0x1, 0x0,
0x6, 0x7fffffff, 0x1, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0xa, 0xa, 0x425088, 0x8,
0x7ffffff8, 0x100, 0x413f38, 0x1,
0x413f38, 0x0, 0x2, 0x76fff0f8,
0x0, 0x0, 0x7fffffff, 0x76fff220,
0x405050, 0x550001, 0x0, 0x425000,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x76fff220,
0x404d84, 0x42d1f0, 0x0, 0x500,
0x5, 0x42d1f0, 0xb3b, 0x76fff224,
0x115, 0x1a131100, 0x76fff220, 0x76fff270,
0x76fff2ac, 0xffbecf88, 0xa, 0x405880]
j = 0
functions = 0
for i in range(0xb4, 0x410, 4):
    fp[i] = table[j]
    j += 1
input = [int(str(i)*3, 16) for i in range(16)]
try:
    while(True):
        code = f.readline()
        if(code == ""):
            print("finish")
            break
        if(code[:3] == "loc"):
            # print("n[s]:t" + code[:-1])
            continue
        if(code.find("nop")!=-1):
            continue
        code = code.split("$")
        # print(code)
        c = code[0].strip()
        if(c=="sw"):
            n1 = code[1].split(",")[0]
            n2 = 0x410 - int("0x" + code[1].split("_")[1].split("(")[0], 16)
            code = ("fp[" + hex(n2) + "] = " + n1)
        elif(c=="li"):
            n1 = code[1].split(",")[0]
            n2 = code[1].split(",")[1].strip()
            code = (n1 + " = " + n2)
        elif(c=="lw"):
            n1 = code[1].split(",")[0]
            if("".join(code).find("fp")!=-1):
                n2 = 0x410 - int("0x" + code[1].split("_")[1].split("(")[0], 16)
                code = (n1 + " = fp[" + hex(n2) + "]")
                # print("# " + hex(fp[n2]))
                #输出方程
                print("0x%x*"%fp[n2],end='')
            else:
                # print("[c]:t" + "".join(code)[:-1], "v0=%x"%v0)
                n2 = ((v0) + int(code[1].split(",")[1].replace("(", "")))//4
                code = (n1 + " = input[" + str(n2) + "]")
                print("a[%d]"%n2)
                # print(code)
                # print(hex(v0))
                # break
        elif(c=="sll"):
            n1 = code[1].split(",")[0]
            n2 = code[1].split(",")[1].strip()
            code = (n1 + " = " + n1 + "<<" + n2)
        elif(c=="sra"):
            n1 = code[1].split(",")[0]
            n2 = code[1].split(",")[1].strip()
            code = (n1 + " = " + n1 + ">>" + n2)
        elif(c=="xori"):
            n1 = code[1].split(",")[0]
            n2 = code[1].split(",")[1].strip()
            code = (n1 + " = " + n1 + "^" + n2)
        elif(c=="addiu"):
            n1 = code[1].split(",")[0]
            n2 = code[1].split(",")[1].strip()
            code = (n1 + " = " + n1 + "+" + n2)
            # print("+")
        elif(c=="mul"):
            n1 = code[1].split(",")[0]
            n2 = code[2].split(",")[0].strip()
            n3 = code[3].strip()
            code = (n1 + " = " + n2 + "*" + n3)
        elif(c=="addu"):
            n1 = code[1].split(",")[0]
            n2 = code[2].split(",")[0].strip()
            code = (n1 + " = " + n1 + "+" + n2)
            print("+")
        elif(c=="subu"):
            n1 = code[1].split(",")[0]
            n2 = code[2].split(",")[0].strip()
            code = (n1 + " = " + n1 + "-" + n2)
            print("-")
        elif(c=="beq"):
            print("=0x%x"%(v0))
            print("================================================one function=====================================")
            functions +=1
            continue
        elif(c=="negu"):
            n1 = code[1].split(",")[0]
            n2 = code[2].split(",")[0].strip()
            code = (n1 + " = " + "-" + n2)
            print("-")
        elif(c=="nop"):
            continue
        elif(c=="lui"):
            n1 = code[1].split(",")[0]
            n2 = code[1].split(",")[1].strip()
            code = (n1 + " = " + n2 + "<<32")
        elif(c=="move" or c=="and"):
            continue
        elif(c in flower):
            # print("[f]:t" + "".join(code)[:-1])
            continue
        else:
            print("[x]:tFind unknown code | " + "".join(code))
            break
        # print("[-]:t" + code)
        exec(code)
except Exception as e:
    print(repr(e))
    print(code)
print(functions)
# print(fp)

方法二:优雅反编译

在某zhao师傅的提醒下想起来jeb的MIPS版本可以对汇编进行简单的反编译:

虽然数组全部是通过指针+偏移的方式来调用,不过可以全部复制下来再用正则来整理数据,将*(par00+x)替换为par00[x/4]的形式(可不要像某zhao师傅一样将参数一个个抄下来哟(不然就会像他一样把参数不慎抄错几个然后纠结若干小时XDDDDDD

上述两种方法得到方程以后就可以通过z3, numpy, matlab一类的数学工具求解方程组了,下面给出z3py的示例代码

from z3 import *
a = [BitVec("a%d"%i, 32) for i in range(16)]
s = Solver()
s.add(0xca6a*a[0] -0xd9ee*a[1] +0xc5a7*a[2] +0x19ee*a[3] +0xb223*a[4] +0x42e4*a[5] +0xc112*a[6] -0xcf45*a[7] +0x260d*a[8] +0xd78d*a[9] +0x99cb*a[10] -0x3e58*a[11] -0x97cb*a[12] +0xfba9*a[13] -0xdc28*a[14] +0x859b*a[15]  == 0xaa2ed7)
s.add(0xf47d*a[0] +0x12d3*a[1] -0x4102*a[2] +0xcedf*a[3] -0xafcf*a[4] -0xeb20*a[5] -0x2065*a[6] +0x36d2*a[7] -0x30fc*a[8] -0x7e5c*a[9] +0xeea8*a[10] +0xd8dd*a[11] -0xae2*a[12] +0xc053*a[13] +0x5158*a[14] -0x8d42*a[15]  == 0x69d32e)
s.add(0xffff52cf*a[0] -0x4fea*a[1] +0x2075*a[2] +0x9941*a[3] -0xbd78*a[4] +0x9e58*a[5] +0x40ad*a[6] -0x8637*a[7] -0x2e08*a[8] +0x4414*a[9] +0x2748*a[10] +0x1773*a[11] +0xe414*a[12] -0x7b19*a[13] +0x6b71*a[14] -0x3dcf*a[15]  == 0x3b89d9)
s.add(0xffffedd7*a[0] -0x1df0*a[1] +0x8115*a[2] +0x54bd*a[3] -0xf2ba*a[4] +0xdbd*a[5] +0x1dcf*a[6] +0x272*a[7] -0x2fcc*a[8] -0x93d8*a[9] -0x6f6c*a[10] -0x98ff*a[11] +0x2148*a[12] -0x6be2*a[13] +0x2e56*a[14] -0x7bdf*a[15]  == 0xff6a5aea)
s.add(0xffffa8c1*a[0] +0xdc78*a[1] -0x380f*a[2] +0x33c0*a[3] -0x7252*a[4] -0xe5a9*a[5] +0x7a53*a[6] -0x4082*a[7] -0x584a*a[8] +0xc8db*a[9] +0xd941*a[10] +0x6806*a[11] -0x8b97*a[12] +0x23d4*a[13] +0xac2a*a[14] +0x20ad*a[15]  == 0x953584)
s.add(0x5bb7*a[0] -0xfdb2*a[1] +0xaaa5*a[2] -0x50a2*a[3] -0xa318*a[4] +0xbcba*a[5] -0x5e5a*a[6] +0xf650*a[7] +0x4ab6*a[8] -0x7e3a*a[9] -0x660c*a[10] +0xaed9*a[11] -0xa60f*a[12] +0xf924*a[13] -0xff1d*a[14] +0xc888*a[15]  == 0xffd31341)
s.add(0x812d*a[0] -0x402c*a[1] +0xaa99*a[2] -0x33b*a[3] +0x311b*a[4] -0xc0d1*a[5] -0xfad*a[6] -0xc1bf*a[7] -0x1560*a[8] -0x445b*a[9] -0x9b78*a[10] +0x3b94*a[11] +0x2531*a[12] -0xfb03*a[13] +0x8*a[14] +0x8721*a[15]  == 0xff9a6b57)
s.add(0x15c5*a[0] +0xb128*a[1] -0x957d*a[2] +0xdf80*a[3] +0xee68*a[4] -0x3483*a[5] -0x4b39*a[6] -0x3807*a[7] -0x4f77*a[8] +0x652f*a[9] -0x686f*a[10] -0x7fc1*a[11] -0x5d2b*a[12] -0xb326*a[13] -0xacde*a[14] +0x1f11*a[15]  == 0xffd6b3d3)
s.add(0xaf37*a[0] +0x709*a[1] +0x4a95*a[2] -0xa445*a[3] -0x4c32*a[4] -0x6e5c*a[5] -0x45a6*a[6] +0xb989*a[7] +0xf5b7*a[8] +0x3980*a[9] -0x151d*a[10] +0xaf13*a[11] +0xa134*a[12] +0x67ff*a[13] +0xce*a[14] +0x79cf*a[15]  == 0xc6ea77)
s.add(0xffff262a*a[0] +0xdf05*a[1] -0x148e*a[2] -0x4758*a[3] -0xc6b2*a[4] -0x4f94*a[5] -0xf1f4*a[6] +0xcf8*a[7] +0xf5f1*a[8] -0x7883*a[9] -0xe2c6*a[10] -0x67*a[11] +0xeccc*a[12] -0xc630*a[13] -0xba2e*a[14] -0x6e41*a[15]  == 0xff1daae5)
s.add(0xffff9be3*a[0] -0x716d*a[1] +0x4505*a[2] -0xb99d*a[3] +0x1f00*a[4] +0x72bc*a[5] -0x7ff*a[6] +0x8945*a[7] -0xcc33*a[8] -0xab8f*a[9] +0xde9e*a[10] -0x6b69*a[11] -0x6380*a[12] +0x8cee*a[13] -0x7a60*a[14] +0xbd39*a[15]  == 0xff5be0b4)
s.add(0x245e*a[0] +0xf2c4*a[1] -0xeb20*a[2] -0x31d8*a[3] -0xe329*a[4] +0xa35a*a[5] +0xaacb*a[6] +0xe24d*a[7] +0xeb33*a[8] +0xcb45*a[9] -0xdf3a*a[10] +0x27a1*a[11] +0xb775*a[12] +0x713e*a[13] +0x5946*a[14] +0xac8e*a[15]  == 0x144313b)
s.add(0x157*a[0] -0x5f9c*a[1] -0xf1e6*a[2] +0x550*a[3] -0x441b*a[4] +0x9648*a[5] +0x8a8f*a[6] +0x7d23*a[7] -0xe1b2*a[8] -0x5a46*a[9] -0x5461*a[10] +0xee5f*a[11] -0x47e6*a[12] +0xa1bf*a[13] +0x6cf0*a[14] -0x746b*a[15]  == 0xffd18bd2)
s.add(0xf81b*a[0] -0x76cb*a[1] +0x543d*a[2] -0x4a85*a[3] +0x1468*a[4] +0xd95a*a[5] +0xfbb1*a[6] +0x6275*a[7] +0x30c4*a[8] -0x9595*a[9] -0xdbff*a[10] +0x1d1d*a[11] +0xb1cf*a[12] -0xa261*a[13] +0xf38e*a[14] +0x895c*a[15]  == 0xb5cb52)
s.add(0xffff6b97*a[0] +0xd61d*a[1] +0xe843*a[2] -0x8c64*a[3] +0xda06*a[4] +0xc5ad*a[5] +0xd02a*a[6] -0x2168*a[7] +0xa89*a[8] +0x2dd*a[9] -0x80cc*a[10] -0x9340*a[11] -0x3f07*a[12] +0x4f74*a[13] +0xb834*a[14] +0x1819*a[15]  == 0xa6014d)
s.add(0x48ed*a[0] +0x2141*a[1] +0x33ff*a[2] +0x85a9*a[3] -0x1c88*a[4] +0xa7e6*a[5] -0xde06*a[6] +0xbaf6*a[7] +0xc30f*a[8] -0xada6*a[9] -0xa114*a[10] -0x86e9*a[11] +0x70f9*a[12] +0x7580*a[13] -0x51f8*a[14] -0x492f*a[15]  == 0x2fde7c)
if(s.check()==sat):
    c = b''
    m = s.model()
    for i in range(16):
        print("a[%d]=%d"%(i, m[a[i]].as_long()))
    for i in range(16):
        print(chr(m[a[i]].as_long()&0xff), end='')

方法三:符号执行

无名侠师傅提出了使用angr来全自动求解的方法,注意二进制文件也需要去过花。我这边不知道是因为capstone没有mips反编译的版本还是地址扒错了跑不出来,只好直接附上师傅的脚本。

注意其中find和avoid的值由于各人的bin文件不同,因此地址需要自行修正。

from angr import *
import logging
import IPython
logging.getLogger('angr.manager').setLevel(logging.DEBUG)
p = Project('mips2')
state = p.factory.blank_state(addr=0x400420)
DATA_ADDR = 0xA0000
state.regs.a0 = DATA_ADDR
for i in range(16*4):
 vec = state.solver.BVS("c{}".format(i),8,explicit_name=True)
 cond = state.solver.And(vec>=32,vec<=126) # low byte
 state.memory.store(DATA_ADDR+i,vec)
 if i % 4 == 0:
 pass
#state.add_constraints(cond)
sm = p.factory.simulation_manager(state)
res = sm.explore(find=0x403150,avoid=[0x403644,0x401940,0x0401ADC,0x401C74
,0x401E10 ,0x401FA8,0x402144
,0x4022DC,0x402478,0x402610,0x4027A8,0x402940,0x402AD8,0x402C74,0x402E10,0x
402FA8,0x403144])
# 这些地址不同⼈的bin会不⼀样。
found = res.found[0]
mem = found.memory.load(DATA_ADDR,16*4)
print found.solver.eval(mem)

本文章首发在 网安wangan.com 网站上。

上一篇 下一篇
讨论数量: 0
只看当前版本


暂无话题~