Android源码分析 之 分析Dalvik下Dex加载流程寻找
前言
本文将讨论Android Dalvik加载dex时所经过的流程,本文重点在native的c++代码中。了解c++中dex加载可以在加载时对dex文件进行dump。从而实现脱壳的通用解决方案。
我们需要找到加载进内存的dex起始地址和dex的大小,这样我们才能进行内存dump
本文代码选择Android4.4.2_r1
分析
我们知道dex加载最后的逻辑实现基本都是在BaseDexClassLoader中所以我们直接来到BaseDexClassLoader的构造方法中
public class BaseDexClassLoader extends ClassLoader {
private final DexPathList pathList;
...
public BaseDexClassLoader(String dexPath, File optimizedDirectory,
String libraryPath, ClassLoader parent) {
//这里就是设置parent的地方
super(parent);
this.pathList = new DexPathList(this, dexPath, libraryPath, optimizedDirectory);
}
...
}
我们注意力来到DexPathList的构造方法中
public DexPathList(ClassLoader definingContext, String dexPath,
String libraryPath, File optimizedDirectory) {
...
this.definingContext = definingContext;
ArrayList<IOException> suppressedExceptions = new ArrayList<IOException>();
this.dexElements = makeDexElements(splitDexPath(dexPath), optimizedDirectory,
suppressedExceptions);
...
}
省略了一堆判断,其中最关键的代码就是makeDexElements从名字也可以看出这个是创建dex的真正方法
private static Element[] makeDexElements(ArrayList<File> files, File optimizedDirectory,
ArrayList<IOException> suppressedExceptions) {
ArrayList<Element> elements = new ArrayList<Element>();
/*
* Open all files and load the (direct or contained) dex files
* up front.
*/
for (File file : files) {
File zip = null;
DexFile dex = null;
String name = file.getName();
if (name.endsWith(DEX_SUFFIX)) {
// Raw dex file (not inside a zip/jar).
try {
dex = loadDexFile(file, optimizedDirectory);
} catch (IOException ex) {
System.logE("Unable to load dex file: " + file, ex);
}
} else if (name.endsWith(APK_SUFFIX) || name.endsWith(JAR_SUFFIX)
...
} else if (file.isDirectory()) {
...
} else {
System.logW("Unknown file type for: " + file);
}
if ((zip != null) || (dex != null)) {
elements.add(new Element(file, false, zip, dex));
}
}
return elements.toArray(new Element[elements.size()]);
}
makeDexElements方法中循环去文件数组,最后构建Element添加到elements 中我们的dex文件最后走的就是loadDexFile方法
private static DexFile loadDexFile(File file, File optimizedDirectory)
throws IOException {
if (optimizedDirectory == null) {
return new DexFile(file);
} else {
String optimizedPath = optimizedPathFor(file, optimizedDirectory);
return DexFile.loadDex(file.getPath(), optimizedPath, 0);
}
}
这里optimizedDirectory不为null,所以走的就是DexFile.loadDex方法,继续追
static public DexFile loadDex(String sourcePathName, String outputPathName,
int flags) throws IOException {
/*
* TODO: we may want to cache previously-opened DexFile objects.
* The cache would be synchronized with close(). This would help
* us avoid mapping the same DEX more than once when an app
* decided to open it multiple times. In practice this may not
* be a real issue.
*/
return new DexFile(sourcePathName, outputPathName, flags);
}
private DexFile(String sourceName, String outputName, int flags) throws IOException {
...
mCookie = openDexFile(sourceName, outputName, flags);
mFileName = sourceName;
guard.open("close");
}
看名字就知道openDexFile就是我们打开dex的方法
private static int openDexFile(String sourceName, String outputName,
int flags) throws IOException {
return openDexFileNative(new File(sourceName).getCanonicalPath(),
(outputName == null) ? null : new File(outputName).getCanonicalPath(),
flags);
}
native private static int openDexFileNative(String sourceName, String outputName,
int flags) throws IOException;
sourceName参数就是我们要加载dex的路径啦~,可以看到openDexFileNative是一个native方法所以我们现在要进入到c++代码中的,对于jni不懂的可以看看我以前的文章
Android源码中native文件命名就是以native方法所在路径命名的,所以我们直接找到dalvik_system_DexFile.cpp
static void Dalvik_dalvik_system_DexFile_openDexFileNative(const u4* args,
JValue* pResult)
{
...
/*
* Try to open it directly as a DEX if the name ends with ".dex".
* If that fails (or isn't tried in the first place), try it as a
* Zip with a "classes.dex" inside.
*/
if (hasDexExtension(sourceName)
&& dvmRawDexFileOpen(sourceName, outputName, &pRawDexFile, false) == 0) {
ALOGV("Opening DEX file '%s' (DEX)", sourceName);
pDexOrJar = (DexOrJar*) malloc(sizeof(DexOrJar));
pDexOrJar->isDex = true;
pDexOrJar->pRawDexFile = pRawDexFile;
pDexOrJar->pDexMemory = NULL;
} else if (dvmJarFileOpen(sourceName, outputName, &pJarFile, false) == 0) {
...
} else {
...
}
...
}
hasDexExtension判断文件是否是dex后缀,dvmRawDexFileOpen就是去打开dex文件了
int dvmRawDexFileOpen(const char* fileName, const char* odexOutputName,
RawDexFile** ppRawDexFile, bool isBootstrap)
{
...
dexFd = open(fileName, O_RDONLY);
...
//生成优化后的dex文件路径
if (odexOutputName == NULL) {
cachedName = dexOptGenerateCacheFileName(fileName, NULL);
if (cachedName == NULL)
goto bail;
} else {
cachedName = strdup(odexOutputName);
}
...
if (newFile) {
u8 startWhen, copyWhen, endWhen;
bool result;
off_t dexOffset;
dexOffset = lseek(optFd, 0, SEEK_CUR);
result = (dexOffset > 0);
if (result) {
startWhen = dvmGetRelativeTimeUsec();
result = copyFileToFile(optFd, dexFd, fileSize) == 0;
copyWhen = dvmGetRelativeTimeUsec();
}
if (result) {
//进入对dex文件的优化流程中
result = dvmOptimizeDexFile(optFd, dexOffset, fileSize,
fileName, modTime, adler32, isBootstrap);
}
if (!result) {
ALOGE("Unable to extract+optimize DEX from '%s'", fileName);
goto bail;
}
endWhen = dvmGetRelativeTimeUsec();
ALOGD("DEX prep '%s': copy in %dms, rewrite %dms",
fileName,
(int) (copyWhen - startWhen) / 1000,
(int) (endWhen - copyWhen) / 1000);
}
...
return result;
}
我们来关注一下优化dex的函数dvmOptimizeDexFile
bool dvmOptimizeDexFile(int fd, off_t dexOffset, long dexLength,
const char* fileName, u4 modWhen, u4 crc, bool isBootstrap)
{
const char* lastPart = strrchr(fileName, '/');
if (lastPart != NULL)
lastPart++;
else
lastPart = fileName;
ALOGD("DexOpt: --- BEGIN '%s' (bootstrap=%d) ---", lastPart, isBootstrap);
pid_t pid;
/*
* This could happen if something in our bootclasspath, which we thought
* was all optimized, got rejected.
*/
if (gDvm.optimizing) {
ALOGW("Rejecting recursive optimization attempt on '%s'", fileName);
return false;
}
pid = fork();
if (pid == 0) {
static const int kUseValgrind = 0;
//需要调用优化的程序dexopt
static const char* kDexOptBin = "/bin/dexopt";
static const char* kValgrinder = "/usr/bin/valgrind";
static const int kFixedArgCount = 10;
static const int kValgrindArgCount = 5;
static const int kMaxIntLen = 12; // '-'+10dig+'\0' -OR- 0x+8dig
int bcpSize = dvmGetBootPathSize();
int argc = kFixedArgCount + bcpSize
+ (kValgrindArgCount * kUseValgrind);
const char* argv[argc+1]; // last entry is NULL
char values[argc][kMaxIntLen];
char* execFile;
const char* androidRoot;
int flags;
/* change process groups, so we don't clash with ProcessManager */
setpgid(0, 0);
/* full path to optimizer */
androidRoot = getenv("ANDROID_ROOT");
if (androidRoot == NULL) {
ALOGW("ANDROID_ROOT not set, defaulting to /system");
androidRoot = "/system";
}
execFile = (char*)alloca(strlen(androidRoot) + strlen(kDexOptBin) + 1);
strcpy(execFile, androidRoot);
strcat(execFile, kDexOptBin);
/*
* Create arg vector.
*/
int curArg = 0;
if (kUseValgrind) {
/* probably shouldn't ship the hard-coded path */
argv[curArg++] = (char*)kValgrinder;
argv[curArg++] = "--tool=memcheck";
argv[curArg++] = "--leak-check=yes"; // check for leaks too
argv[curArg++] = "--leak-resolution=med"; // increase from 2 to 4
argv[curArg++] = "--num-callers=16"; // default is 12
assert(curArg == kValgrindArgCount);
}
//拼接命令
argv[curArg++] = execFile;
argv[curArg++] = "--dex";
sprintf(values[2], "%d", DALVIK_VM_BUILD);
argv[curArg++] = values[2];
sprintf(values[3], "%d", fd);
argv[curArg++] = values[3];
sprintf(values[4], "%d", (int) dexOffset);
argv[curArg++] = values[4];
sprintf(values[5], "%d", (int) dexLength);
argv[curArg++] = values[5];
argv[curArg++] = (char*)fileName;
sprintf(values[7], "%d", (int) modWhen);
argv[curArg++] = values[7];
sprintf(values[8], "%d", (int) crc);
argv[curArg++] = values[8];
...
if (kUseValgrind)
execv(kValgrinder, const_cast<char**>(argv));
else
//这里执行了/bin/dexopt程序
execv(execFile, const_cast<char**>(argv));
ALOGE("execv '%s'%s failed: %s", execFile,
kUseValgrind ? " [valgrind]" : "", strerror(errno));
exit(1);
} else {
...
}
}
此函数中会拼接命令行,然后调用execv是执行/bin/dexopt程序等待此程序去优化dex文件
它的源码就在/dalvik/dexopt/OptMain.cpp下
它的main函数如下
int main(int argc, char* const argv[])
{
...
if (argc > 1) {
if (strcmp(argv[1], "--zip") == 0)
return fromZip(argc, argv);
else if (strcmp(argv[1], "--dex") == 0)
return fromDex(argc, argv);
else if (strcmp(argv[1], "--preopt") == 0)
return preopt(argc, argv);
}
...
return 1;
}
此处肯定是调用了fromDex函数了
static int fromDex(int argc, char* const argv[])
{
...
/* do the optimization */
if (!dvmContinueOptimization(fd, offset, length, debugFileName,
modWhen, crc, (flags & DEXOPT_IS_BOOTSTRAP) != 0))
{
ALOGE("Optimization failed");
goto bail;
}
result = 0;
...
}
省略掉一顿准备工作,我们重点关注dvmContinueOptimization
bool dvmContinueOptimization(int fd, off_t dexOffset, long dexLength,
const char* fileName, u4 modWhen, u4 crc, bool isBootstrap)
{
...
{
/*
* Map the entire file (so we don't have to worry about page
* alignment). The expectation is that the output file contains
* our DEX data plus room for a small header.
*/
bool success;
void* mapAddr;
//对当前dex进行内存映射
mapAddr = mmap(NULL, dexOffset + dexLength, PROT_READ|PROT_WRITE,
MAP_SHARED, fd, 0);
...
//重写dex
success = rewriteDex(((u1*) mapAddr) + dexOffset, dexLength,
doVerify, doOpt, &pClassLookup, NULL);
if (success) {
DvmDex* pDvmDex = NULL;
u1* dexAddr = ((u1*) mapAddr) + dexOffset;
if (dvmDexFileOpenPartial(dexAddr, dexLength, &pDvmDex) != 0) {
ALOGE("Unable to create DexFile");
success = false;
} else {
/*
* If configured to do so, generate register map output
* for all verified classes. The register maps were
* generated during verification, and will now be serialized.
*/
if (gDvm.generateRegisterMaps) {
pRegMapBuilder = dvmGenerateRegisterMaps(pDvmDex);
if (pRegMapBuilder == NULL) {
ALOGE("Failed generating register maps");
success = false;
}
}
DexHeader* pHeader = (DexHeader*)pDvmDex->pHeader;
updateChecksum(dexAddr, dexLength, pHeader);
dvmDexFileFree(pDvmDex);
}
}
...
}
...
return result;
}
我们来看看rewriteDex
/**
第一个参数:dex的起始地址
第二个参数:dex的字节数
**/
static bool rewriteDex(u1* addr, int len, bool doVerify, bool doOpt,
DexClassLookup** ppClassLookup, DvmDex** ppDvmDex)
{
DexClassLookup* pClassLookup = NULL;
u8 prepWhen, loadWhen, verifyOptWhen;
DvmDex* pDvmDex = NULL;
bool result = false;
const char* msgStr = "???";
/* if the DEX is in the wrong byte order, swap it now */
if (dexSwapAndVerify(addr, len) != 0)
goto bail;
/*
* Now that the DEX file can be read directly, create a DexFile struct
* for it.
*/
//此函数也有我们需要的dex的起始地址和字节数
if (dvmDexFileOpenPartial(addr, len, &pDvmDex) != 0) {
ALOGE("Unable to create DexFile");
goto bail;
}
...
result = true;
bail:
...
return result;
}
这个函数就有我们想要的东西了,就是dex的起始地址和dex的字节数
其中dvmDexFileOpenPartial方法就是一个脱壳点,其代码如下
int dvmDexFileOpenPartial(const void* addr, int len, DvmDex** ppDvmDex)
{
...
pDexFile = dexFileParse((u1*)addr, len, parseFlags);
...
result = 0;
bail:
return result;
}
可以看出dexFileParse也有我们想要的参数,所以这里也可以进行内存dump脱壳
至此我们就找到了俩个脱壳点
dvmDexFileOpenPartial
dexFileParse
尾言
其实脱壳点还有很多我这里值举例说明一下,感兴趣的同学可以去继续深究android源码