PythonPython学习程序员

Python之利用机器学习检测安卓恶意软件实现(一)

2016-09-27  本文已影响1992人  CaptainXero

前言

上一篇文章写了如何使用Python写一个简单的爬虫,批量抓取APK的下载链接。这篇文章记录下如何批量拆包APK文件并提取想要的信息。

准备工作

下载Androguard.png

下载完成后得到一个压缩包,解压后进入目录,把Androguard目录下所有的文件拷贝至Python的根目录下,合并同名文件夹即可。

检查环境.png

注意:在cmd下直接调python命令行需要将Python加入到环境变量中。

基础知识

首先APK文件可以用普通解压缩的方式拆包,如下图。

解压完毕.png

熟悉安卓开发的人肯定对解压完毕后的文件很熟悉。这里主要介绍部分文件的作用。

assets.png 库文件.png 乱码.png

没错,是乱码。这就说明仅仅使用将APK解压缩的形式去获得我们关心的信息是不可行的。

使用Python获取APK信息

到此为止,相信大家对APK的结构有了一定了解。下面以获取APP申请权限为例子使用Python完成APK的拆包提取信息。

__author__ = 'Administrator'
#coding=utf-8
from androguard.core.bytecodes import apk, dvm
from androguard.core.analysis import analysis
import re
global count
count = 1

def get_permissions(path, filename):
    str = "Permission:"
    app = apk.APK(path)
    permission = app.get_permissions()
    file = permission
    print permission
    writeToTxt(str, file, filename)
    return permission

def get_apis(path, filename):
  app = apk.APK(path)
  app_dex = dvm.DalvikVMFormat(app.get_dex())
  app_x = analysis.newVMAnalysis(app_dex)
  methods = set()
  cs = [cc.get_name() for cc in app_dex.get_classes()]

  for method in app_dex.get_methods():
    g = app_x.get_method(method)
    if method.get_code() == None:
      continue

    for i in g.get_basic_blocks().get():
      for ins in i.get_instructions():
        output = ins.get_output()
        match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output)
        if match and match.group(1) not in cs:
          methods.add(match.group())

  methods = list(methods)
  methods.sort()
  print "methods:"+"\n"
  print methods
  str = "Methods:"
  file = methods
  writeToTxt(str, file, filename)
  return methods
def get_providers(path, filename):
    app = apk.APK(path)
    providers = app.get_providers()
    print "providers:"+"\n"
    print providers
    str = "Providers:"
    file = providers
    writeToTxt(str, file, filename)
    return providers
def get_package(path, filename):
    app = apk.APK(path)
    packname = app.get_package()
    print "packageName:"+"\n"
    print packname
    str = "PackageName:"
    file = packname
    writeToTxt(str, file, filename)
    return packname
def get_activities(path, filename):
    app = apk.APK(path)
    activitys = app.get_activities()
    print "ActivityName:"+"\n"
    print activitys
    str = "Activitys:"
    file = activitys
    writeToTxt(str, file, filename)
    return activitys
def get_receivers(path, filename):
    app = apk.APK(path)
    receivers = app.get_receivers()
    print "Receivers:"+"\n"
    print receivers
    str = "Receivers:"
    file = receivers
    writeToTxt(str, file, filename)
    return receivers
def get_services(path, filename):
    app = apk.APK(path)
    services = app.get_services()
    print "Services:"+"\n"
    print services
    str = "Services:"
    file = services
    writeToTxt(str, file, filename)
    return services
def writeToTxt(str, file, filename):
    global count
    fm = open('%d'%count+'.txt', 'w')
    #fm.write(str)
    #fm.write("\n")
    for i in file:
        tmp = i.split('.')
        final = tmp[-1]
        fm.write(final)
        fm.write("\t")
    fm.close()
    count += 1

def main(path, apkname):
  get_permissions(path, apkname)
  #get_apis(path, apkname)
  #get_providers(path, apkname)
  #get_package(path, apkname)
  #get_activities(path, apkname)
  #get_receivers(path, apkname)
  #get_services(path, apkname)

if __name__ == '__main__':
    path = "D:/sample/Good"
    filename = "sampleInfo.txt"
    main(path, filename)

__author__ = 'Administrator'
#-*- coding:GBK -*-
import os
import os.path
import sys
import subprocess
import getFeatures

rootdir = "D:/Sample/Good//"
destdir = "D:/Sample/workSample/badDone//"
command = "java -jar D://apktool.jar"
class Packages:
    def __init__(self, srcdir, desdir):
        self.sdir = srcdir
        self.ddir = desdir
    def check(self):
        print("--------------------starting unpackage!---------------------")
        for dirpath, dirnames, filenames in os.walk(rootdir):
            for filename in filenames:
                thefile = os.path.join(dirpath, filename)
                apkfile = os.path.split(thefile)[1]
                apkname = os.path.splitext(apkfile)[0]
                print apkfile
                try:
                    if os.path.splitext(thefile)[1] == ".apk":
                        # name = os.path.splitext(thefile)[0]
                        str1= '"'+thefile+'"'
                        str2= '"'+destdir + os.path.splitext(filename)[0]+'"'
                        # cmdExtract = r'%s d -f %s %s'% (command, str2, str1)
                        getFeatures.main(thefile, apkname)
                        print "******************well done******************"
                except IOError, err:
                        print err
                        sys.exit()

if __name__ == "__main__":
    dir=Packages(rootdir, 'e:/')
    dir.check()
程序运行.png 处理前.png 输出的txt.png txt中权限.png

总结

到此为止,我们就把安卓中的权限信息提取出来了。这为后面使用机器学习方式对安卓应用进行检测提供了基本的数据。在接下来的文章中将会进一步介绍如何使用Python实现机器学习的方式检测安卓恶意应用。

上一篇 下一篇

猜你喜欢

热点阅读