您的位置:首页 > 其它

MachO文件详解--逆向开发

2019-11-23 23:05 3139 查看

今天是逆向开发的第5天内容--MachO文件(Mac 和 iOS 平台可执行的文件),在逆向开发中是比较重要的,下面我们着重讲解一下MachO文件的基本内容和使用。

一、MachO概述

1. 概述

Mach-O是Mach Object文件格式的缩写,iOS以及Mac上可执行的文件格式,类似Window的exe格式,Linux上的elf格式。Mach-O是一个可执行文件、动态库以及目标代码的文件格式,是a.out格式的替代,提供了更高更强的扩展性。

2.常见格式

Mach-O常见格式如下:

  • 目标文件 .o
  • 库文件
  1. .a
  2. .dylib
  3. .framework
  • 可执行文件
  • dyld
  • .dsym

  通过file文件路径查看文件类型

我们通过部分实例代码来简单研究一下。

2.1目标文件.o

通过test.c 文件,可以使用clang命令将其编译成目标文件.o

//
// Entry point for dyld.  The kernel loads dyld and jumps to __dyld_start which
// sets up some registers and call this function.
//
// Returns address of main() in target program which __dyld_start jumps to
//
uintptr_t
_main(const macho_header* mainExecutableMH, uintptr_t mainExecutableSlide,
int argc, const char* argv[], const char* envp[], const char* apple[],
uintptr_t* startGlue)
{
// Grab the cdHash of the main executable from the environment
// 第一步,设置运行环境
uint8_t mainExecutableCDHashBuffer[20];
const uint8_t* mainExecutableCDHash = nullptr;
if ( hexToBytes(_simple_getenv(apple, "executable_cdhash"), 40, mainExecutableCDHashBuffer) )
// 获取主程序的hash
mainExecutableCDHash = mainExecutableCDHashBuffer;

// Trace dyld's load
notifyKernelAboutImage((macho_header*)&__dso_handle, _simple_getenv(apple, "dyld_file"));
#if !TARGET_IPHONE_SIMULATOR
// Trace the main executable's load
notifyKernelAboutImage(mainExecutableMH, _simple_getenv(apple, "executable_file"));
#endif

uintptr_t result = 0;
// 获取主程序的macho_header结构
sMainExecutableMachHeader = mainExecutableMH;
// 获取主程序的slide值
sMainExecutableSlide = mainExecutableSlide;

CRSetCrashLogMessage("dyld: launch started");
// 设置上下文信息
setContext(mainExecutableMH, argc, argv, envp, apple);

// Pickup the pointer to the exec path.
// 获取主程序路径
sExecPath = _simple_getenv(apple, "executable_path");

// <rdar://problem/13868260> Remove interim apple[0] transition code from dyld
if (!sExecPath) sExecPath = apple[0];

if ( sExecPath[0] != '/' ) {
// have relative path, use cwd to make absolute
char cwdbuff[MAXPATHLEN];
if ( getcwd(cwdbuff, MAXPATHLEN) != NULL ) {
// maybe use static buffer to avoid calling malloc so early...
char* s = new char[strlen(cwdbuff) + strlen(sExecPath) + 2];
strcpy(s, cwdbuff);
strcat(s, "/");
strcat(s, sExecPath);
sExecPath = s;
}
}

// Remember short name of process for later logging
// 获取进程名称
sExecShortName = ::strrchr(sExecPath, '/');
if ( sExecShortName != NULL )
++sExecShortName;
else
sExecShortName = sExecPath;

// 配置进程受限模式
configureProcessRestrictions(mainExecutableMH);

// 检测环境变量
checkEnvironmentVariables(envp);
defaultUninitializedFallbackPaths(envp);

// 如果设置了DYLD_PRINT_OPTS则调用printOptions()打印参数
if ( sEnv.DYLD_PRINT_OPTS )
printOptions(argv);
// 如果设置了DYLD_PRINT_ENV则调用printEnvironmentVariables()打印环境变量
if ( sEnv.DYLD_PRINT_ENV )
printEnvironmentVariables(envp);
// 获取当前程序架构
getHostInfo(mainExecutableMH, mainExecutableSlide);
//-------------第一步结束-------------

// load shared cache
// 第二步,加载共享缓存
// 检查共享缓存是否开启,iOS必须开启
checkSharedRegionDisable((mach_header*)mainExecutableMH);
if ( gLinkContext.sharedRegionMode != ImageLoader::kDontUseSharedRegion ) {
mapSharedCache();
}
...

try {
// add dyld itself to UUID list
addDyldImageToUUIDList();

// instantiate ImageLoader for main executable
// 第三步 实例化主程序
sMainExecutable = instantiateFromLoadedImage(mainExecutableMH, mainExecutableSlide, sExecPath);
gLinkContext.mainExecutable = sMainExecutable;
gLinkContext.mainExecutableCodeSigned = hasCodeSignatureLoadCommand(mainExecutableMH);

// Now that shared cache is loaded, setup an versioned dylib overrides
#if SUPPORT_VERSIONED_PATHS
checkVersionedPaths();
#endif

// dyld_all_image_infos image list does not contain dyld
// add it as dyldPath field in dyld_all_image_infos
// for simulator, dyld_sim is in image list, need host dyld added
#if TARGET_IPHONE_SIMULATOR
// get path of host dyld from table of syscall vectors in host dyld
void* addressInDyld = gSyscallHelpers;
#else
// get path of dyld itself
void*  addressInDyld = (void*)&__dso_handle;
#endif
char dyldPathBuffer[MAXPATHLEN+1];
int len = proc_regionfilename(getpid(), (uint64_t)(long)addressInDyld, dyldPathBuffer, MAXPATHLEN);
if ( len > 0 ) {
dyldPathBuffer[len] = '\0'; // proc_regionfilename() does not zero terminate returned string
if ( strcmp(dyldPathBuffer, gProcessInfo->dyldPath) != 0 )
gProcessInfo->dyldPath = strdup(dyldPathBuffer);
}

// load any inserted libraries
// 第四步 加载插入的动态库
if  ( sEnv.DYLD_INSERT_LIBRARIES != NULL ) {
for (const char* const* lib = sEnv.DYLD_INSERT_LIBRARIES; *lib != NULL; ++lib)
loadInsertedDylib(*lib);
}
// record count of inserted libraries so that a flat search will look at
// inserted libraries, then main, then others.
// 记录插入的动态库数量
sInsertedDylibCount = sAllImages.size()-1;

// link main executable
// 第五步 链接主程序
gLinkContext.linkingMainExecutable = true;
#if SUPPORT_ACCELERATE_TABLES
if ( mainExcutableAlreadyRebased ) {
// previous link() on main executable has already adjusted its internal pointers for ASLR
// work around that by rebasing by inverse amount
sMainExecutable->rebase(gLinkContext, -mainExecutableSlide);
}
#endif
link(sMainExecutable, sEnv.DYLD_BIND_AT_LAUNCH, true, ImageLoader::RPathChain(NULL, NULL), -1);
sMainExecutable->setNeverUnloadRecursive();
if ( sMainExecutable->forceFlat() ) {
gLinkContext.bindFlat = true;
gLinkContext.prebindUsage = ImageLoader::kUseNoPrebinding;
}

// link any inserted libraries
// do this after linking main executable so that any dylibs pulled in by inserted
// dylibs (e.g. libSystem) will not be in front of dylibs the program uses
// 第六步 链接插入的动态库
if ( sInsertedDylibCount > 0 ) {
for(unsigned int i=0; i < sInsertedDylibCount; ++i) {
ImageLoader* image = sAllImages[i+1];
link(image, sEnv.DYLD_BIND_AT_LAUNCH, true, ImageLoader::RPathChain(NULL, NULL), -1);
image->setNeverUnloadRecursive();
}
// only INSERTED libraries can interpose
// register interposing info after all inserted libraries are bound so chaining works
for(unsigned int i=0; i < sInsertedDylibCount; ++i) {
ImageLoader* image = sAllImages[i+1];
image->registerInterposing();
}
}

// <rdar://problem/19315404> dyld should support interposition even without DYLD_INSERT_LIBRARIES
for (long i=sInsertedDylibCount+1; i < sAllImages.size(); ++i) {
ImageLoader* image = sAllImages[i];
if ( image->inSharedCache() )
continue;
image->registerInterposing();
}
...

// apply interposing to initial set of images
for(int i=0; i < sImageRoots.size(); ++i) {
sImageRoots[i]->applyInterposing(gLinkContext);
}
gLinkContext.linkingMainExecutable = false;

// <rdar://problem/12186933> do weak binding only after all inserted images linked
// 第七步 执行弱符号绑定
sMainExecutable->weakBind(gLinkContext);

// If cache has branch island dylibs, tell debugger about them
if ( (sSharedCacheLoadInfo.loadAddress != NULL) && (sSharedCacheLoadInfo.loadAddress->header.mappingOffset >= 0x78) && (sSharedCacheLoadInfo.loadAddress->header.branchPoolsOffset != 0) ) {
uint32_t count = sSharedCacheLoadInfo.loadAddress->header.branchPoolsCount;
dyld_image_info info[count];
const uint64_t* poolAddress = (uint64_t*)((char*)sSharedCacheLoadInfo.loadAddress + sSharedCacheLoadInfo.loadAddress->header.branchPoolsOffset);
// <rdar://problem/20799203> empty branch pools can be in development cache
if ( ((mach_header*)poolAddress)->magic == sMainExecutableMachHeader->magic ) {
for (int poolIndex=0; poolIndex < count; ++poolIndex) {
uint64_t poolAddr = poolAddress[poolIndex] + sSharedCacheLoadInfo.slide;
info[poolIndex].imageLoadAddress = (mach_header*)(long)poolAddr;
info[poolIndex].imageFilePath = "dyld_shared_cache_branch_islands";
info[poolIndex].imageFileModDate = 0;
}
// add to all_images list
addImagesToAllImages(count, info);
// tell gdb about new branch island images
gProcessInfo->notification(dyld_image_adding, count, info);
}
}

CRSetCrashLogMessage("dyld: launch, running initializers");
...
// run all initializers
// 第八步 执行初始化方法
initializeMainExecutable();

// notify any montoring proccesses that this process is about to enter main()
dyld3::kdebug_trace_dyld_signpost(DBG_DYLD_SIGNPOST_START_MAIN_DYLD2, 0, 0);
notifyMonitoringDyldMain();

// find entry point for main executable
// 第九步 查找入口点并返回
result = (uintptr_t)sMainExecutable->getThreadPC();
if ( result != 0 ) {
// main executable uses LC_MAIN, needs to return to glue in libdyld.dylib
if ( (gLibSystemHelpers != NULL) && (gLibSystemHelpers->version >= 9) )
*startGlue = (uintptr_t)gLibSystemHelpers->startGlueToCallExit;
else
halt("libdyld.dylib support not present for LC_MAIN");
}
else {
// main executable uses LC_UNIXTHREAD, dyld needs to let "start" in program set up for main()
result = (uintptr_t)sMainExecutable->getMain();
*startGlue = 0;
}
}
catch(const char* message) {
syncAllImages();
halt(message);
}
catch(...) {
dyld::log("dyld: launch failed\n");
}
...

return result;
}
View Code 折叠开dyld main函数,步骤总结如下

  1. 配置运行环境,获取当前运行架构
  2. 加载共享缓存,映射到当前运行架构
  3. 进行实例化主程序
  4. 开始加载插入的动态库
  5. 然后链接主程序
  6. 开始链接插入的动态库
  7. 弱符号绑定
  8. 初始化方法
  9. 寻找主程序的入口

对待dyld的讲述,是非常不易的,因为本身过程是比较复杂的,上面仅仅是自身的抽出来的。下面再画一张流程图,帮助大家理解。

 

四、总结

MachO文件对于逆向开发是非常重要的,通过本次讲解,希望对大家理解逆向开发有所帮助,也希望大家真正可以提高技术,应对iOS市场的大环境,下一篇我们将讲述Hook原理--逆向开发。谢谢!!!

 

 

 

 

 

 

 

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: