#!/usr/bin/python # Copyright (C) 2010 Michael Ligh # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # [NOTES] ----------------------------------------------------------- # 1) Tested on Linux (Ubuntu), Windows XP/7, and Mac OS X # 2) This script requires the avsubmit.py module from Chapter 4 # 3) You must NOT use this script if any respective vendors prohibit # you from doing so. See all relevant acceptable usage policies. #-------------------------------------------------------------------- import os, sys from sqlite3 import * from avsubmit import ThreatExpert from optparse import OptionParser import string DBNAME = "artifacts.db" class FileSystem: def __init__(self, data): self.data = data self.column_names = {0 : 'ID', 1 : 'Name', 2 : 'Size', 3 : 'Hash', 4 : 'Alias'} def process_filenames(self, str): files = [] lines = str.split('
') for line in lines: line = line.strip() line = line.rstrip() a_start = line.find('') f_end = copy.find('<') if f_start != -1 and f_end != -1: files.append("%s%s" % (line[0:a_start], copy[f_start+1:f_end])) elif line.lower().startswith('c:'): files.append(line) return files def process_hashes(self, str): hashes = {} lines = str.split('
') for line in lines: pair = line.split(':') hashes[pair[0]] = pair[1].strip() return hashes def process_column(self, column, ncol): start_value = column.find('>') if start_value == -1: return column = column[start_value+1:] end_column = column.find('') if end_column == -1: return str = column[0:end_column] if self.column_names[ncol] == 'Name': files = self.process_filenames(str) return {'files': files} elif self.column_names[ncol] == 'Hash': hashes = self.process_hashes(str) return {'hashes': hashes} return None def process_row(self, row): end_row = row.find('') if end_row == -1: return row = row[0:end_row] offset = 0 ncol = 0 row_info = {} while row[offset:].find('') if end_table != -1: table_data = data[start_table:start_table+end_table] offset = 0 nrow = 0 file_info = [] while table_data[offset:].find('') != -1: ofs = table_data[offset:].find('') + 4 row = table_data[offset+ofs:] if nrow > 0: row_info = self.process_row(row) file_info.append(row_info) offset += ofs nrow += 1 return file_info class BulletParser: def __init__(self, data, mark): self.data = data self.mark = mark def parse(self): data = self.data mark = self.mark values = [] mark = data.find(mark) if mark != -1: start = data[mark:].find('

') if start != -1: end = data[mark+start+4:].find('') if end != -1: data = data[mark+start+4:mark+start+4+end] str = data.split('

') for s in str: s = s.rstrip() if s.endswith('

'): values.append(s[0:-5]) return values def bulkimport(page): import httplib conn = httplib.HTTPConnection('www.threatexpert.com') conn.request('GET', '/reports.aspx?page=%d' % page) response = conn.getresponse().read() lines = response.split('\n') for line in lines: if line.startswith('