背景:
在分析某站点接口时发现以前发现漏洞的JS修复后进行了强混淆,看起来十分抽象
image-20221118194253741.png
于是乎搁置在一边没有继续分析,直到前几日在图书馆发现了小肩膀大佬写的爬虫混淆AST对抗,书中描述的几种混淆方式与该站点使用的十分相似,遂尝试使用AST对该JS进行一定程度的还原
本篇针对该JS中的字符串混淆进行还原
字符串是如何混淆的
解密方式
想要对字符串反混淆就要先分析该样本是如何对字符串进行混淆的
以一个字符串的解密为例子,可以发现他将字符串解密拆分成一串函数调用并对立即数进行减法操作来防止通用解密
image-20221118203212673.png
而处于全局作用域的_0x1f1a68
实际上也是对另一个函数的调用
function _0x1f1a68(_0x1be822, _0x79fd7, _0x340561, _0x170aa8, _0x35407a) { return _0x4903(_0x35407a - 0x252, _0x340561);}
经过在VSCode
中对每个字符串解密函数查找定义,发现所有的字符串解密最终都是调用的_0x4903
由于每个函数的调用时机跟作用域都不同,获取每一个字符串解密函数的结果是不明智的
于是这里需要实现的第一个功能就是将每一个字符串的解析还原成对_0x4903
的调用,也就是将不同字符串解密函数的调用替换成对最根本的解密函数_0x4903
的幂等形式
还原
函数调用还原实现
举个例子
function _0x3cb10b(_0x9056d3, _0xd6da67, _0x4e8aa3, _0x575cfa, _0x50067e) { return _0x1f1a68(_0x9056d3 - 0x1ca, _0xd6da67 - 0x97, _0x4e8aa3, _0x575cfa - 0x13c, _0xd6da67 - 0x119); }function _0x362f86(_0xeb8495, _0x2bb06b, _0x3bc6ce, _0x59c29b, _0x141499) { return _0x3cb10b(_0xeb8495 - 0x1a0, _0xeb8495 - -0x370, _0x3bc6ce, _0x59c29b - 0x19c, _0x141499 - 0x120);}function _0x1f1a68(_0x1be822, _0x79fd7, _0x340561, _0x170aa8, _0x35407a) { return _0x4903(_0x35407a - 0x252, _0x340561);}
我们的目标是将
_0x362f86(0x9a3, 0xef2, '1vkx', 0x369, 0xb40)
转换成
_0x3cb10b(0x9a3- 0x1a0, 0x9a3 - -0x370, '1vkx', 0x369 - 0x19c, 0xb40 - 0x120);
继而转换成
_0x1f1a68(0x9a3- 0x1a0 - 0x1ca, 0x9a3 - -0x370 - 0x97, '1vkx', 0x369 - 0x19c - 0x13c, 0x9a3 - -0x370 - 0x119);
最终转换成
_0x4903(0x9a3 - -0x370 - 0x119 - 0x252, '1vkx');
image-20221118211109561.png
image-20221118211121682.png
那么如何使用AST实现呢,为了尽可能实现上下文无关减少状态,这里采用像示例中的一样一层一层的处理
在代码实现上我将其分为了多个部分
function replaceArgsToIndex(funcargs, arg) { if (arg.type == "BinaryExpression") { return replaceArgsToIndex(funcargs, arg.left); } if (arg.name.startsWith("arg")) { return true; } for (let i = 0; i < funcargs.length; i++) { if (funcargs[i].name == arg.name) { arg.name = "arg" + i; return true; } } console.log("not found arg " + arg.name + " at " + arg.loc?.start.line); return false;}
第一步是将函数内的参数名转换成参数下标,这样就可以从CallExpression
中直接用下标获取对应的参数进行表达式替换,这里处理了BinaryExpression
是因为参数中存在减法表达式的情况,但变量永远在第一位,所以递归到最左面的变量再进行处理,同时如果参数已经被转化成argN的形式便不做处理。
这里放一下关于二值表达式的表示
image-20221120130744482.png
如图,每个红框都是一个二值表达式,外层的二值表达式将内层的二值表达式作为左值,所以当变量为
xxx - 0x123 -0x456 -0x789
形式时我们要递归的获取左值。
转换后的形式为:
function _0x1f1a68(_0x1be822, _0x79fd7, _0x340561, _0x170aa8, _0x35407a) { return _0x4903(arg4 - 0x252, arg2);}
这样就可以检测所有对0x1f1a68
的调用,获取其中的第5个参数和第三个参数并把其放入_0x4903
调用的对应位置,然后将0x1f1a68
替换为_0x4903
将参数下标替换成参数的代码如下
function convertIndexToArg(funcargs, arg) { if (arg.type == "BinaryExpression") { return btypes.binaryExpression(arg.operator, convertIndexToArg(funcargs, arg.left), arg.right); } if (arg.name.startsWith("arg")) { let index = parseInt(arg.name.substr(3)); if (index < funcargs.length) { return funcargs[index]; } else { console.log("not found arg index with name " + arg.name + " at " + arg.loc?.start.line); } } else { return arg; }}
其中funcargs
为CallExpression
中的参数,该函数同样递归处理二值表达式
实现函数展开只需要遍历所有的函数定义,判断是否满足混淆函数的格式,然后通过binding寻找他的调用表达式进行处理,下面为代码实现
let doFlatten = { FunctionDeclaration(path) { let refBinding = path.scope.getBinding(path.node.id?.name); if (!refBinding.referenced) { path.remove(); //如果函数没有被引用则直接删除并更新作用域 path.scope.crawl(); return; } if (path.node.body.body.length != 1) return; let body = path.node.body.body[0]; if (!btypes.isReturnStatement(body)) return; let callExp = body.argument; if (!btypes.isCallExpression(callExp)) return; //以上三个判断是否满足混淆函数的格式 let calleeArgs = callExp.arguments; //混淆函数里面调用函数的参数 let funcArgs = path.node.params; //混淆函数的参数 for (let arg of calleeArgs) { let type = arg.type; switch (arg.type) { case "BinaryExpression": replaceArgsToIndex(funcArgs, (arg as btypes.BinaryExpression).left as btypes.Identifier); //这里可以不case,已经在replaceArgsToIndex中实现了递归,这里case是为了防止有未预期的形式,但是经过测试不存在该情况 break; case "Identifier": replaceArgsToIndex(funcArgs, arg as btypes.Identifier); break; default: console.log("callee arg not recognizable at line: " + path.node.loc?.start.line); return; } } let { id } = path.node; let binding = path.scope.getBinding((id as btypes.Identifier).name); for (let refer_path of binding!.referencePaths) { //获取所有调用 if (!btypes.isCallExpression(refer_path.parent)) { console.log("abnormal reference at line: " + refer_path.node.loc?.start.line); continue; } let args = (refer_path.parent as btypes.CallExpression).arguments; let newArgs: btypes.Expression[] = []; //重组的表调用参数 let argExp: btypes.Expression; for (let arg of calleeArgs) { let type = arg.type; switch (arg.type) { case "BinaryExpression": argExp = convertIndexToArg(args, (arg as btypes.BinaryExpression).left as btypes.Identifier); let exp = btypes.binaryExpression((arg as btypes.BinaryExpression).operator, argExp, (arg as btypes.BinaryExpression).right) newArgs.push(exp); //处理重组,按照嵌套二值表达式的方式组装并把变量参数放在最左边 break; case "Identifier": argExp = convertIndexToArg(args, arg as btypes.Identifier); newArgs.push(argExp); break; } } let newCallExp = btypes.callExpression(callExp.callee, newArgs); refer_path.parentPath.replaceWith(newCallExp);//替换callExpression } path.parentPath.scope.crawl(); //console.log("modified code: " + codegen["default"](path.node).code); //path.remove(); } }; traverse["default"](root, doFlatten);
由于每次我们仅处理一层,所以这里多次处理,这样就不必为先后顺序发愁
for (let level = 0; level < 3; level++) { removeConstFunc(root)}
字符串函数调用
上一步中我们将字符串混淆替换成了形似_0x4903(0x9a3 - -0x370 - 0x119 - 0x252, '1vkx');
的调用,这一步中我们要将对该函数的调用还原为字符串。
以下为_0x4903
的实现
function _0x4903(_0x41f1e9, _0x3130bc) { var _0x5e7ec4 = _0x8976(); return _0x4903 = function (_0x899a2d, _0x5835f7) { _0x899a2d = _0x899a2d - 109; var _0x3e8c46 = _0x5e7ec4[_0x899a2d]; if (_0x4903.HfpBsi === undefined) { var _0x1cbb5e = function (_0x50d26d) { var _0x5a42a9 = '', _0x12cc8d = '', _0x5f42a1 = _0x5a42a9 + _0x1cbb5e; for (var _0x2829d6 = 0, _0x49459b, _0x390f91, _0x46a986 = 0; _0x390f91 = _0x50d26d.charAt(_0x46a986++); ~_0x390f91 && (_0x49459b = _0x2829d6 % 4 ? _0x49459b * 64 + _0x390f91 : _0x390f91, _0x2829d6++ % 4) ? _0x5a42a9 += _0x5f42a1.charCodeAt(_0x46a986 + 10) - 10 !== 0 ? String.fromCharCode(255 & _0x49459b (-2 * _0x2829d6 & 6)) : _0x2829d6 : 0) { _0x390f91 = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/='.indexOf(_0x390f91); } for (var _0x42631c = 0, _0x4ab4be = _0x5a42a9.length; _0x42631c < _0x4ab4be; _0x42631c++) { _0x12cc8d += '%' + ('00' + _0x5a42a9.charCodeAt(_0x42631c).toString(16)).slice(-2); } return decodeURIComponent(_0x12cc8d); }; var _0x6b448 = function (_0x1b5efd, _0x1ba8dd) { var _0x45d44d = [], _0xd3ad3a = 0, _0x1e5c57, _0x567392 = ''; _0x1b5efd = _0x1cbb5e(_0x1b5efd); var _0x579ae7; for (_0x579ae7 = 0; _0x579ae7 < 256; _0x579ae7++) { _0x45d44d[_0x579ae7] = _0x579ae7; } for (_0x579ae7 = 0; _0x579ae7 < 256; _0x579ae7++) { _0xd3ad3a = (_0xd3ad3a + _0x45d44d[_0x579ae7] + _0x1ba8dd.charCodeAt(_0x579ae7 % _0x1ba8dd.length)) % 256, _0x1e5c57 = _0x45d44d[_0x579ae7], _0x45d44d[_0x579ae7] = _0x45d44d[_0xd3ad3a], _0x45d44d[_0xd3ad3a] = _0x1e5c57; } _0x579ae7 = 0, _0xd3ad3a = 0; for (var _0x577b0d = 0; _0x577b0d < _0x1b5efd.length; _0x577b0d++) { _0x579ae7 = (_0x579ae7 + 1) % 256, _0xd3ad3a = (_0xd3ad3a + _0x45d44d[_0x579ae7]) % 256, _0x1e5c57 = _0x45d44d[_0x579ae7], _0x45d44d[_0x579ae7] = _0x45d44d[_0xd3ad3a], _0x45d44d[_0xd3ad3a] = _0x1e5c57, _0x567392 += String.fromCharCode(_0x1b5efd.charCodeAt(_0x577b0d) ^ _0x45d44d[(_0x45d44d[_0x579ae7] + _0x45d44d[_0xd3ad3a]) % 256]); } return _0x567392; }; _0x4903.MMnWus = _0x6b448, _0x41f1e9 = arguments, _0x4903.HfpBsi = !![]; } var _0x22abc4 = _0x5e7ec4[0], _0x244987 = _0x899a2d + _0x22abc4, _0x238d8e = _0x41f1e9[_0x244987]; if (!_0x238d8e) { if (_0x4903.CBAcVv === undefined) { var _0xdfbdcc = function (_0xacf633) { this.kDnxVr = _0xacf633, this.IoBPQs = [1, 0, 0], this.wpMLDB = function () {return 'newState';}, this.kJPlqW = '\x5cw+\x20*\x5c(\x5c)\x20*{\x5cw+\x20*', this.YBMSQk = '[\x27|\x22].+[\x27|\x22];?\x20*}'; //这是正则表达式 }; _0xdfbdcc.prototype.NgtXeG = function () { var _0x18c3d0 = new RegExp(this.kJPlqW + this.YBMSQk), _0xa664a8 = _0x18c3d0.test(this.wpMLDB.toString()) ? --this.IoBPQs[1] : --this.IoBPQs[0]; //这里检测函数文本是否满足正则,实际上是检测JS有没有被格式化,在这里将wpMLDB手动的改回了最小化的格式绕过检测 return this.vbKnou(_0xa664a8); }, _0xdfbdcc.prototype.vbKnou = function (_0x561c7b) { if (!Boolean(~_0x561c7b)) return _0x561c7b; return this.DtzlIA(this.kDnxVr); //检测到被格式化,调用该函数溢满内存 }, _0xdfbdcc.prototype.DtzlIA = function (_0x386581) { for (var _0x2adaa0 = 0, _0x5245a5 = this.IoBPQs.length; _0x2adaa0 < _0x5245a5; _0x2adaa0++) { this.IoBPQs.push(Math.round(Math.random())), _0x5245a5 = this.IoBPQs.length; } return _0x386581(this.IoBPQs[0]); }, new _0xdfbdcc(_0x4903).NgtXeG(), _0x4903.CBAcVv = !![]; } _0x3e8c46 = _0x4903.MMnWus(_0x3e8c46, _0x5835f7), _0x41f1e9[_0x244987] = _0x3e8c46; } else _0x3e8c46 = _0x238d8e; return _0x3e8c46; }, _0x4903(_0x41f1e9, _0x3130bc); } (function (_0x4dbad8, _0x3b7f07) { var _0x37755d = _0x4dbad8(); while (!![]) { try { var _0x39c0da = parseInt(_0x4903(4069, 'u]yp')) / 1 * (parseInt(_0x4903(2125, 'Jx@]')) / 2) + parseInt(_0x4903(140, 'kFVy')) / 3 + -parseInt(_0x4903(2566, 'j1TD')) / 4 + parseInt(_0x4903(3272, 'a*Xk')) / 5 + -parseInt(_0x4903(2587, 'EnP@')) / 6 + -parseInt(_0x4903(743, '5h*C')) / 7 + parseInt(_0x4903(5102, 'dVlJ')) / 8; if (_0x39c0da === _0x3b7f07) break;else _0x37755d.push(_0x37755d.shift()); } catch (_0x696156) { _0x37755d.push(_0x37755d.shift()); } } })(_0x8976, 214580); //字符串数组顺序还原,_0x8976为一个返回全局数组的函数,数组太长了就不放上来了