LLVM字符串加密学习笔记
之前看chenx6大佬的博客学习了一下编写基础的LLVM Pass,但是那个有很明显的问题是,作者为了处理Function内部重复引用的多次解密的问题,特判了引用次数,如果存在多处对global string的引用是无法进行混淆的。
但是实际的编程中很难不会引用多处字符串,所以那个只能混淆简单代码。我后面根据评论区的说法对此优化了一下,改成Function的EntryBasicBlock处解密,但是过不了llvm-dis,感觉是在alloc栈变量的时候出了问题,暂时不知道怎么解决,后面如果有能力的话再重新写一遍吧。
之后学习了一下pluto-obfuscator(https://github.com/bluesadi/Pluto-Obfuscator)项目,里面有一份GlobalEncryption.cpp
,借此机会学习一下,顺便写一份New PassManager版本的。
pluto-obfuscator的全局变量加密对全局整数和数组变量进行了处理,采用的方式是编译期加密,运行期解密,就是在IR阶段对全局变量加密,然后对每个全局变量调用解密函数并添加到.ctors
中,让程序在运行期进行全局构造的时候解密。
看雪有一个帖子(https://bbs.kanxue.com/thread-263107.htm)总结几种加密方式总结得比较好,搬运一下。
runOnModule
首先获取Module的LLVMContext,获取所有的全局变量,添加到GVs
中。
INIT_CONTEXT(M); vector GVs; for (GlobalVariable &GV : M.getGlobalList()) { GVs.push_back(&GV); }
然后筛选出需要加密的全局变量,ObfuTimes
是混淆次数,默认为1。
for (int i = 0; i < ObfuTimes; i++) { for (GlobalVariable *GV : GVs) { // 只对Integer和Array类型进行加密 if (!GV->getValueType()->isIntegerTy() && !GV->getValueType()->isArrayTy()) { continue; } // 筛出".str"全局变量,LLVM IR的metadata同样也要保留 if (GV->hasInitializer() && GV->getInitializer() && (GV->getName().contains(".str") || !OnlyStr) // Do not encrypt globals having a section named "llvm.metadata" && !GV->getSection().equals("llvm.metadata")) { Constant *initializer = GV->getInitializer(); ConstantInt *intData = dyn_cast(initializer); ConstantDataArray *arrData = dyn_cast(initializer);
然后分别处理数组和整数类型的全局变量,在这里直接进行加密,然后调用insertArrayDecryption
和insertIntDecryption
将解密函数添加到全局构造函数表中。
if (arrData) { // 获取数组的长度和数组元素的大小 uint32_t eleSize = arrData->getElementByteSize(); uint32_t eleNum = arrData->getNumElements(); uint32_t arrLen = eleNum * eleSize; char *data = const_cast(arrData->getRawDataValues().data()); char *dataCopy = new char[arrLen]; memcpy(dataCopy, data, arrLen); // 生成密钥 uint64_t key = cryptoutils->get_uint64_t(); // A simple xor encryption for (uint32_t i = 0; i < arrLen; i++) { dataCopy[i] ^= ((char *)&key)[i % eleSize]; } GV->setInitializer(ConstantDataArray::getRaw( StringRef(dataCopy, arrLen), eleNum, arrData->getElementType())); GV->setConstant(false); insertArrayDecryption(M, {GV, key, eleNum}); } else if (intData) { uint64_t key = cryptoutils->get_uint64_t(); ConstantInt *enc = CONST(intData->getType(), key ^ intData->getZExtValue()); GV->setInitializer(enc); GV->setConstant(false); insertIntDecryption(M, {GV, key, 1LL}); }
insertArrayDecryption
因为在解密的方式上其实很像,这里只记录比较复杂的数组解密,这里用IRBuilder构造了一个for循环来实现解密,整数全局变量只要和密钥异或就行了,原理是一样的。
首先构造一个函数,返回值是void。
vector args; FunctionType *funcType = FunctionType::get(Type::getVoidTy(M.getContext()), args, false); string funcName = genHashedName(encGV.GV); FunctionCallee callee = M.getOrInsertFunction(funcName, funcType); Function *func = cast(callee.getCallee());
然后构造四个BasicBlock,用于实现for循环的几个阶段。
BasicBlock *entry = BasicBlock::Create(*CONTEXT, "entry", func); // for(int i = 0; i < length; i++) // 条件判断 BasicBlock *forCond = BasicBlock::Create(*CONTEXT, "for.cond", func); // 循环体 BasicBlock *forBody = BasicBlock::Create(*CONTEXT, "for.body", func); // i++ BasicBlock *forInc = BasicBlock::Create(*CONTEXT, "for.inc", func); // 结束 BasicBlock *forEnd = BasicBlock::Create(*CONTEXT, "for.inc", func);
首先获取func
的EntryBasicBlock,然后初始化循环变量。
IRBuilder<> builder(*CONTEXT); builder.SetInsertPoint(entry); AllocaInst *indexPtr = builder.CreateAlloca(TYPE_I32, CONST_I32(1), "i"); builder.CreateStore(CONST_I32(0), indexPtr); builder.CreateBr(forCond);
forCond
实现i < length
的部分,如果i < length
成立则跳转到循环体,如果不成立则跳出循环。
builder.SetInsertPoint(forCond); LoadInst *index = builder.CreateLoad(TYPE_I32, indexPtr); ICmpInst *cond = cast(builder.CreateICmpSLT(index, CONST_I32(encGV.len))); builder.CreateCondBr(cond, forBody, forEnd);
循环体内就是解密的过程了,其实就是和密钥的每一位进行异或,最后跳转到i++。
builder.SetInsertPoint(forBody); Value *indexList[2] = {CONST_I32(0), index}; Value *ele = builder.CreateGEP(encGV.GV, ArrayRef(indexList, 2)); ArrayType *arrTy = cast(encGV.GV->getValueType()); Type *eleTy = arrTy->getElementType(); Value *encEle = builder.CreateXor(builder.CreateLoad(ele), CONST(eleTy, encGV.key)); builder.CreateStore(encEle, ele); builder.CreateBr(forInc);
forInc
实现的就是i++
了,这里比较简单,最后跳转到forCond
进行条件判断,这样就实现了循环。
builder.SetInsertPoint(forInc); builder.CreateStore(builder.CreateAdd(index, CONST_I32(1)), indexPtr); builder.CreateBr(forCond);
forEnd
就是返回ret
,最后再将函数写入.ctors
中,实现运行期全局构造。
builder.SetInsertPoint(forEnd); builder.CreateRetVoid(); appendToGlobalCtors(M, func, 0);
以下是我改写成New PassManager的GlobalsEncryption.cpp
,加密部分没做修改,所有代码在libObfuscator/tree/pluto-enc(https://github.com/AimiP02/libObfuscator/tree/pluto-enc)。
#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include #include #include #include "CryptoUtils.h" using namespace llvm; struct EncryptedGV { GlobalVariable *GV; uint64_t key; uint32_t len; }; namespace { static cl::opt ObfuTimes("gvobfus-times", cl::init(1), cl::desc("Run GlobalsEncryption pass time(s)")); static cl::opt OnlyStr("onlystr", cl::init(false), cl::desc("Encrypt string variable only")); class GVObfuscator : public PassInfoMixin { public: GVObfuscator() {} ~GVObfuscator() {} LLVMContext *ctx; virtual void InsertIntDecryption(Module &M, EncryptedGV encGV); virtual void InsertArrayDecryption(Module &M, EncryptedGV encGV); virtual PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); }; std::string GenHashedName(GlobalVariable *GV) { Module &M = *GV->getParent(); std::string funcName = formatv("{0}_{1:x-}", M.getName(), M.getMDKindID(GV->getName())); SHA1 sha1; sha1.update(funcName); StringRef digest = sha1.final(); std::stringstream ss; ss << std::hex; for (size_t i = 0; i < digest.size(); i++) { ss << std::setw(2) << std::setfill('0') << (unsigned)(digest[i] & 0xFF); } return ss.str(); } void GVObfuscator::InsertIntDecryption(Module &M, EncryptedGV encGV) { std::vector funcArgs; FunctionType *funcType = FunctionType::get(Type::getVoidTy(M.getContext()), funcArgs, false); std::string funcName = GenHashedName(encGV.GV); FunctionCallee callee = M.getOrInsertFunction(funcName, funcType); Function *func = cast(callee.getCallee()); BasicBlock *entry = BasicBlock::Create(*ctx, "entry", func); IRBuilder<> builder(*ctx); builder.SetInsertPoint(entry); LoadInst *val = builder.CreateLoad(encGV.GV); Value *xorVal = builder.CreateXor( val, ConstantInt::get(encGV.GV->getValueType(), encGV.key)); builder.CreateStore(xorVal, encGV.GV); builder.CreateRetVoid(); appendToGlobalCtors(M, func, 0); } void GVObfuscator::InsertArrayDecryption(Module &M, EncryptedGV encGV) { std::vector funcArgs; FunctionType *funcType = FunctionType::get(Type::getVoidTy(M.getContext()), funcArgs, false); std::string funcName = GenHashedName(encGV.GV); FunctionCallee callee = M.getOrInsertFunction(funcName, funcType); Function *func = cast(callee.getCallee()); BasicBlock *entry = BasicBlock::Create(*ctx, "entry", func); BasicBlock *forCond = BasicBlock::Create(*ctx, "for.cond", func); BasicBlock *forBody = BasicBlock::Create(*ctx, "for.body", func); BasicBlock *forInc = BasicBlock::Create(*ctx, "for.inc", func); BasicBlock *forEnd = BasicBlock::Create(*ctx, "for.inc", func); IRBuilder<> builder(*ctx); Type *Int32Ty = builder.getInt32Ty(); builder.SetInsertPoint(entry); AllocaInst *indexPtr = builder.CreateAlloca(Int32Ty, ConstantInt::get(Int32Ty, 1, false), "i"); builder.CreateStore(ConstantInt::get(Int32Ty, 0), indexPtr); builder.CreateBr(forCond); builder.SetInsertPoint(forCond); LoadInst *index = builder.CreateLoad(Int32Ty, indexPtr); ICmpInst *cond = cast( builder.CreateICmpSLT(index, ConstantInt::get(Int32Ty, encGV.len))); builder.CreateCondBr(cond, forBody, forEnd); builder.SetInsertPoint(forBody); Value *indexList[2] = {ConstantInt::get(Int32Ty, 0), index}; Value *ele = builder.CreateGEP(encGV.GV, ArrayRef(indexList, 2)); ArrayType *arrTy = cast(encGV.GV->getValueType()); Type *eleTy = arrTy->getElementType(); Value *encEle = builder.CreateXor(builder.CreateLoad(ele), ConstantInt::get(eleTy, encGV.key)); builder.CreateStore(encEle, ele); builder.CreateBr(forInc); builder.SetInsertPoint(forInc); builder.CreateStore(builder.CreateAdd(index, ConstantInt::get(Int32Ty, 1)), indexPtr); builder.CreateBr(forCond); builder.SetInsertPoint(forEnd); builder.CreateRetVoid(); appendToGlobalCtors(M, func, 0); } PreservedAnalyses GVObfuscator::run(Module &M, ModuleAnalysisManager &MAM) { outs() << "Pass start..."; ctx = &M.getContext(); std::vector GVs; for (auto &GV : M.globals()) { GVs.push_back(&GV); } for (int i = 0; i < ObfuTimes; i++) { outs() << "Current ObfuTimes: " << i << ""; for (auto *GV : GVs) { // 只对Integer和Array类型进行加密 if (!GV->getValueType()->isIntegerTy() && !GV->getValueType()->isArrayTy()) { continue; } // 筛出".str"全局变量,LLVM IR的metadata同样也要保留 if (GV->hasInitializer() && GV->getInitializer() && (GV->getName().contains(".str") || !OnlyStr) && !GV->getName().contains("llvm.metadata")) { Constant *initializer = GV->getInitializer(); ConstantInt *intData = dyn_cast(initializer); ConstantDataArray *arrayData = dyn_cast(initializer); // 处理数组 if (arrayData) { // 获取数组的长度和数组元素的大小 outs() << "Get global arraydata"; uint32_t eleSize = arrayData->getElementByteSize(); uint32_t eleNum = arrayData->getNumElements(); uint32_t arrLen = eleNum * eleSize; outs() << "Global Variable: " << *GV << "" << "Array Length: " << eleSize << " * " << eleNum << " = " << arrLen << ""; char *data = const_cast(arrayData->getRawDataValues().data()); char *dataCopy = new char[arrLen]; memcpy(dataCopy, data, arrLen); // 生成密钥 uint64_t key = cryptoutils->get_uint64_t(); for (uint32_t i = 0; i < arrLen; i++) { dataCopy[i] ^= ((char *)&key)[i % eleSize]; } GV->setInitializer( ConstantDataArray::getRaw(StringRef(dataCopy, arrLen), eleNum, arrayData->getElementType())); GV->setConstant(false); InsertArrayDecryption(M, {GV, key, eleNum}); } // 处理整数 else if (intData) { uint64_t key = cryptoutils->get_uint64_t(); ConstantInt *enc = ConstantInt::get(intData->getType(), key ^ intData->getZExtValue()); GV->setInitializer(enc); InsertIntDecryption(M, {GV, key, 1LL}); } } } } outs() << "Pass end..."; return PreservedAnalyses::all(); } } // namespace // 注册Pass extern "C" PassPluginLibraryInfo llvmGetPassPluginInfo() { return {.APIVersion = LLVM_PLUGIN_API_VERSION, .PluginName = "GVObfuscator", .PluginVersion = LLVM_VERSION_STRING, .RegisterPassBuilderCallbacks = [](PassBuilder &PB) { PB.registerPipelineParsingCallback( [](StringRef Name, ModulePassManager &MPM, ArrayRef) -> bool { if (Name == "gvobfus") { MPM.addPass(GVObfuscator()); return true; } return false; }); }}; }
写个代码测试一下。
#include int a = 10; void func(const char *s) { puts("!!!The testing string!!!"); puts(s); } int main() { puts("This is a testing string!"); char ch; if ((ch = getchar()) == '6') { printf("6666%c", ch); } else { printf("WTF?!"); } func("!!!The testing string!!!"); return 0; }
混淆前后的IR对比。
; 混淆前 @a = dso_local global i32 10, align 4 @.str = private unnamed_addr constant [25 x i8] c"!!!The testing string!!!\00", align 1 @.str.1 = private unnamed_addr constant [26 x i8] c"This is a testing string!\00", align 1 @.str.2 = private unnamed_addr constant [8 x i8] c"6666%c\0A\00", align 1 @.str.3 = private unnamed_addr constant [7 x i8] c"WTF?!\0A\00", align 1 ; 混淆后 @a = dso_local global i32 -660274230, align 4 @.str = private unnamed_addr global [25 x i8] c"\C5\C5\C5\B0\8C\81\C4\90\81\97\90\8D\8A\83\C4\97\90\96\8D\8A\83\C5\C5\C5\E4", align 1 @.str.1 = private unnamed_addr global [26 x i8] c"\1B'&@.str.2 = private unnamed_addr global [8 x i8] c"\F6\F6\F6\F6\E5\A3\CA\C0", align 1 @.str.3 = private unnamed_addr global [7 x i8] c"ji{\02\1C7=", align 1 @llvm.global_ctors = appending global [5 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @cc2b6b071cb0cb47a4171a4b1d76a06963d6f5e6, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @e176df9cb36840d9378338da84362465dd29b20a, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @"2ec1d2c5cdff4d08047220c5c1ee639ae45deb5a", i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @d7db60557e37f256d7c62e73e03a42051365a247, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @"50c74527ef4457f6934c3f7d6291948f2f509e58", i8* null }]
当然这对动调来说是没啥用的。
对静态分析来说,这个强度的加密还是有点弱,加密函数很容易就会被看出来,还可以加点另外的平坦化之类的混淆,对分析加密函数也提高难度。
