您的位置:首页 > 其它

malware detection and machine learning(EMBER)

2022-05-29 17:24 1431 查看

EMBER

https://github.com/elastic/ember\
paper:  https://arxiv.org/abs/1804.04637

特征

9个特征组,可以分为两大部分

文件结构无关特征

  • 字节直方图

  • 字节熵直方图

  • 可打印字符串统计

    {'numstrings': 3967,
    'avlength': 16.07159062263675,
    'printabledist': [3729,65,……],
    'printables': 63756,
    'entropy': 5.877838134765625,
    'paths': 4,
    'urls': 26,
    'registry': 0,
    'MZ': 11}

文件结构相关特征

  • general
  • file header
  • sections
  • imports
  • exports
  • datadirections

分别如下:

  • general

    # 直接使用数值作为特征数值
    {'size': 1237896,
    'vsize': 1241088,
    'has_debug': 1,
    'exports': 0,
    'imports': 314,
    'has_relocations': 1,
    'has_resources': 1,
    'has_signature': 1,
    'has_tls': 1,
    'symbols': 0}
  • file header

    coff header
  • option header
# 数值保持原始;文本进行hash
{'coff': {'timestamp': 1639042586,
'machine': 'I386',
'characteristics': ['CHARA_32BIT_MACHINE', 'EXECUTABLE_IMAGE']},
'optional': {'subsystem': 'WINDOWS_GUI',
'dll_characteristics': ['DYNAMIC_BASE',
'NX_COMPAT',
'TERMINAL_SERVER_AWARE'],
'magic': 'PE32',
'major_image_version': 0,
'minor_image_version': 0,
'major_linker_version': 14,
'minor_linker_version': 29,
'major_operating_system_version': 6,
'minor_operating_system_version': 0,
'major_subsystem_version': 6,
'minor_subsystem_version': 0,
'sizeof_code': 368640,
'sizeof_headers': 1024,
'sizeof_heap_commit': 4096}}
  • sections

    # 数值+hash
    {'entry': '.text',
    'sections': [{'name': '.text',
    'size': 368640,
    'entropy': 6.463957857941052,
    'vsize': 368140,
    'props': ['CNT_CODE', 'MEM_EXECUTE', 'MEM_READ']},
    {'name': '.rdata',
    'size': 104960,
    'entropy': 4.837026560868303,
    'vsize': 104760,
    'props': ['CNT_INITIALIZED_DATA', 'MEM_READ']},
    {'name': '.data',
    'size': 28672,
    'entropy': 0.6108592144000272,
    'vsize': 32760,
    'props': ['CNT_INITIALIZED_DATA', 'MEM_READ', 'MEM_WRITE']},
    {'name': '.rsrc',
    'size': 703488,
    'entropy': 5.868256562445707,
    'vsize': 703408,
    'props': ['CNT_INITIALIZED_DATA', 'MEM_READ']},
    {'name': '.reloc',
    'size': 22016,
    'entropy': 6.754089624508025,
    'vsize': 21584,
    'props': ['CNT_INITIALIZED_DATA', 'MEM_DISCARDABLE', 'MEM_READ']}]}
  • imports

    # dll+导入函数名: hash
    {'NETAPI32.dll': ['NetUserGetGroups', 'NetUserGetLocalGroups'],
    'RPCRT4.dll': ['UuidFromStringW'],
    'VERSION.dll': ['GetFileVersionInfoW',
    'GetFileVersionInfoSizeW',
    'VerQueryValueW'],
    'KERNEL32.dll': ['FindFirstFileExW',
    'FindClose',
    'GetConsoleOutputCP',
    'SetFilePointerEx',
    'GetFileSizeEx',
    'ReadConsoleW',
    'ReadConsoleInputW',
    'SetConsoleMode',
    ……}
  • exports

    # 导出函数: hash
  • datadirectories

    # 直接使用 size 和 virtual_address 数值作为特征数值
    [{'name': 'EXPORT_TABLE', 'size': 0, 'virtual_address': 0},
    {'name': 'IMPORT_TABLE', 'size': 300, 'virtual_address': 470148},
    {'name': 'RESOURCE_TABLE', 'size': 703408, 'virtual_address': 512000},
    {'name': 'EXCEPTION_TABLE', 'size': 0, 'virtual_address': 0},
    {'name': 'CERTIFICATE_TABLE', 'size': 9096, 'virtual_address': 1228800},
    {'name': 'BASE_RELOCATION_TABLE', 'size': 21584, 'virtual_address': 1216512},
    {'name': 'DEBUG', 'size': 112, 'virtual_address': 452584},
    {'name': 'ARCHITECTURE', 'size': 0, 'virtual_address': 0},
    {'name': 'GLOBAL_PTR', 'size': 0, 'virtual_address': 0},
    {'name': 'TLS_TABLE', 'size': 24, 'virtual_address': 452928},
    {'name': 'LOAD_CONFIG_TABLE', 'size': 64, 'virtual_address': 452696},
    {'name': 'BOUND_IMPORT', 'size': 0, 'virtual_address': 0},
    {'name': 'IAT', 'size': 1368, 'virtual_address': 372736},
    {'name': 'DELAY_IMPORT_DESCRIPTOR', 'size': 0, 'virtual_address': 0},
    {'name': 'CLR_RUNTIME_HEADER', 'size': 0, 'virtual_address': 0}]
  • 模型

    lightgbm

    params = {
    "boosting": "gbdt",
    "objective": "binary",
    "num_iterations": 1000,
    "learning_rate": 0.05,
    "num_leaves": 2048,
    "max_depth": 15,
    "min_data_in_leaf": 50,
    "feature_fraction": 0.5
    }

    malconv

    maxlen = 2**20 # 1MB
    embedding_size = 8
    
    # define model structure
    inp = Input( shape=(maxlen,))
    emb = Embedding( input_dim, embedding_size )( inp )
    filt = Conv1D( filters=128, kernel_size=500, strides=500, use_bias=True, activation='relu', padding='valid' )(emb)
    attn = Conv1D( filters=128, kernel_size=500, strides=500, use_bias=True, activation='sigmoid', padding='valid')(emb)
    gated = Multiply()([filt,attn])
    feat = GlobalMaxPooling1D()( gated )
    dense = Dense(128, activation='relu')(feat)
    outp = Dense(1, activation='sigmoid')(dense)
    
    basemodel = Model( inp, outp )
    内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
    标签: 
    相关文章推荐