#!/usr/bin/env python3

#    DNSRecon Data Parser
#
#    Copyright (C) 2012  Carlos Perez
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; Applies version 2 of the License.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

__version__ = '0.0.7'
__author__ = 'Carlos Perez, Carlos_Perez@darkoperator.com'

import csv
import getopt
import os
import re
import sys
import xml.etree.ElementTree as cElementTree

from netaddr import *

# Function Definitions
# ------------------------------------------------------------------------------


def print_status(message=''):
    print(f'\033[1;34m[*]\033[1;m {message}')


def print_good(message=''):
    print(f'\033[1;32m[*]\033[1;m {message}')


def print_error(message=''):
    print(f'\033[1;31m[-]\033[1;m {message}')


def print_debug(message=''):
    print(f'\033[1;31m[!]\033[1;m {message}')


def print_line(message=''):
    print(f'{message}')


def process_range(arg):
    """
    This function will take a string representation of a range for IPv4 or IPv6 in
    CIDR or Range format and return a list of IPs.
    """
    try:
        ip_list = None
        range_vals = []
        if re.match(r'\S*/\S*', arg):
            ip_list = IPNetwork(arg)

        range_vals.extend(arg.split('-'))
        if len(range_vals) == 2:
            ip_list = IPNetwork(IPRange(range_vals[0], range_vals[1])).cidrs()[-1]
    except Exception:
        print_error(f'Range provided is not valid: {arg()}')
        return []
    return ip_list


def xml_parse(xm_file, ifilter, tfilter, nfilter, list):
    """
    Function for parsing XML files created by DNSRecon and apply filters.
    """
    iplist = []
    for event, elem in cElementTree.iterparse(xm_file):
        # Check if it is a record
        if elem.tag == 'record':
            # Check that it is a RR Type that has an IP Address
            if 'address' in elem.attrib:
                # Check if the IP is in the filter list of IPs to ignore
                if (len(ifilter) == 0 or IPAddress(elem.attrib['address']) in ifilter) and (elem.attrib['address'] != 'no_ip'):
                    # Check if the RR Type against the types
                    if re.match(tfilter, elem.attrib['type'], re.I):
                        # Process A, AAAA and PTR Records
                        if re.search(r'PTR|^[A]$|AAAA', elem.attrib['type']) and re.search(nfilter, elem.attrib['name'], re.I):
                            if list:
                                if elem.attrib['address'] not in iplist:
                                    print(elem.attrib['address'])
                            else:
                                print_good(f"{elem.attrib['type']} {elem.attrib['name']} {elem.attrib['address']}")

                        # Process NS Records
                        elif re.search(r'NS', elem.attrib['type']) and re.search(nfilter, elem.attrib['target'], re.I):
                            if list:
                                if elem.attrib['address'] not in iplist:
                                    iplist.append(elem.attrib['address'])
                            else:
                                print_good(f"{elem.attrib['type']} {elem.attrib['target']} {elem.attrib['address']}")

                        # Process SOA Records
                        elif re.search(r'SOA', elem.attrib['type']) and re.search(nfilter, elem.attrib['mname'], re.I):
                            if list:
                                if elem.attrib['address'] not in iplist:
                                    iplist.append(elem.attrib['address'])
                            else:
                                print_good(f"{elem.attrib['type']} {elem.attrib['mname']} {elem.attrib['address']}")

                        # Process MS Records
                        elif re.search(r'MX', elem.attrib['type']) and re.search(nfilter, elem.attrib['exchange'], re.I):
                            if list:
                                if elem.attrib['address'] not in iplist:
                                    iplist.append(elem.attrib['address'])
                            else:
                                print_good(f"{elem.attrib['type']} {elem.attrib['exchange']} {elem.attrib['address']}")

                        # Process SRV Records
                        elif re.search(r'SRV', elem.attrib['type']) and re.search(nfilter, elem.attrib['target'], re.I):
                            if list:
                                if elem.attrib['address'] not in iplist:
                                    iplist.append(elem.attrib['address'])
                            else:
                                print_good(
                                    '{0} {1} {2} {3} {4}'.format(
                                        elem.attrib['type'],
                                        elem.attrib['name'],
                                        elem.attrib['address'],
                                        elem.attrib['target'],
                                        elem.attrib['port'],
                                    )
                                )
            else:
                if re.match(tfilter, elem.attrib['type'], re.I):
                    # Process TXT and SPF Records
                    if re.search(r'TXT|SPF', elem.attrib['type']):
                        if not list:
                            print_good('{0} {1}'.format(elem.attrib['type'], elem.attrib['strings']))
    # Process IPs in a list
    if len(iplist) > 0:
        try:
            for ip in filter(None, iplist):
                print_line(ip)
        except OSError:
            sys.exit(0)


def csv_parse(csv_file, ifilter, tfilter, nfilter, list):
    """
    Function for parsing CSV files created by DNSRecon and apply filters.
    """
    iplist = []
    reader = csv.reader(open(csv_file), delimiter=',')
    next(reader)
    for row in reader:
        # Check if IP is in the filter list of addresses to ignore
        if ((len(ifilter) == 0) or (IPAddress(row[2]) in ifilter)) and (row[2] != 'no_ip'):
            # Check Host Name regex and type list
            if re.search(tfilter, row[0], re.I) and re.search(nfilter, row[1], re.I):
                if list:
                    if row[2] not in iplist:
                        print(row[2])
                else:
                    print_good(' '.join(row))
    # Process IPs for target list if available
    # if len(iplist) > 0:
    #    for ip in filter(None, iplist):
    #        print_line(ip)


def extract_hostnames(file):
    host_names = []
    hostname_pattern = re.compile('(^[^.]*)')
    file_type = detect_type(file)
    if file_type == 'xml':
        for event, elem in cElementTree.iterparse(file):
            # Check if it is a record
            if elem.tag == 'record':
                # Check that it is a RR Type that has an IP Address
                if 'address' in elem.attrib:
                    # Process A, AAAA and PTR Records
                    if re.search(r'PTR|^[A]$|AAAA', elem.attrib['type']):
                        host_names.append(re.search(hostname_pattern, elem.attrib['name']).group(1))

                    # Process NS Records
                    elif re.search(r'NS', elem.attrib['type']):
                        host_names.append(re.search(hostname_pattern, elem.attrib['target']).group(1))

                    # Process SOA Records
                    elif re.search(r'SOA', elem.attrib['type']):
                        host_names.append(re.search(hostname_pattern, elem.attrib['mname']).group(1))

                    # Process MX Records
                    elif re.search(r'MX', elem.attrib['type']):
                        host_names.append(re.search(hostname_pattern, elem.attrib['exchange']).group(1))

                    # Process SRV Records
                    elif re.search(r'SRV', elem.attrib['type']):
                        host_names.append(re.search(hostname_pattern, elem.attrib['target']).group(1))

    elif file_type == 'csv':
        reader = csv.reader(open(file), delimiter=',')
        reader.next()
        for row in reader:
            host_names.append(re.search(hostname_pattern, row[1]).group(1))

    host_names = list(set(host_names))
    # Return list with no empty values
    return filter(None, host_names)


def detect_type(file):
    """
    Function for detecting the file type by checking the first line of the file.
    Returns xml, csv or None.
    """
    ftype = None

    # Get the fist lile of the file for checking
    with open(file) as file:
        firs_line = file.readline()

    # Determine file type based on the fist line content
    if re.search('(xml version)', firs_line):
        ftype = 'xml'
    elif re.search(r'\w*,[^,]*,[^,]*', firs_line):
        ftype = 'csv'
    else:
        raise Exception('Unsupported File Type')
    return ftype


def usage():
    print(f'Version: {__version__}')
    print('DNSRecon output file parser')
    print('Usage: parser.py <options>\n')
    print('Options:')
    print('   -h, --help               Show this help message and exit')
    print('   -f, --file    <file>     DNSRecon XML or CSV output file to parse.')
    print('   -l, --list               Output an unique IP List that can be used with other tools.')
    print('   -i, --ips     <ranges>   IP Ranges in a comma separated list each in formats (first-last)')
    print('                            or in (range/bitmask) for ranges to be included from output.')
    print('                            For A, AAAA, NS, MX, SOA, SRV and PTR Records.')
    print('   -t, --type    <type>     Resource Record Types as a regular expression to filter output.')
    print('                            For A, AAAA, NS, MX, SOA, TXT, SPF, SRV and PTR Records.')
    print('   -s, --str     <regex>    Regular expression between quotes for filtering host names on.')
    print('                            For A, AAAA, NS, MX, SOA, SRV and PTR Records.')
    print('   -n, --name               Return list of unique host names.')
    print('                            For A, AAAA, NS, MX, SOA, SRV and PTR Records.')
    sys.exit(0)


def main():
    #
    # Option Variables
    #
    ip_filter = []
    name_filter = '(.*)'
    type_filter = '(.*)'
    target_list = False
    file = None
    names = False

    #
    # Define options
    #
    try:
        options, args = getopt.getopt(
            sys.argv[1:],
            'hi:t:s:lf:n',
            ['help', 'ips=', 'type=', 'str=', 'list', 'file=', 'name'],
        )

    except getopt.GetoptError as error:
        print_error('Wrong Option Provided!')
        print_error(error)
        return

    #
    # Parse options
    #
    for opt, arg in options:
        if opt in ('-t', '--type'):
            type_filter = arg

        elif opt in ('-i', '--ips'):
            ipranges = arg.split(',')
            for r in ipranges:
                ip_filter.extend(process_range(r))

        elif opt in ('-s', '--str'):
            name_filter = f'({arg})'

        elif opt in ('-l', '--list'):
            target_list = True

        elif opt in ('-f', '--file'):
            # Check if the dictionary file exists
            if os.path.isfile(arg):
                file = arg
            else:
                print_error(f'File {arg} does not exist!')
                exit(1)

        elif opt in ('-r', '--range'):
            ip_list = []
            ip_range = process_range(arg)
            if len(ip_range) > 0:
                ip_list.extend(ip_range)
            else:
                sys.exit(1)
        elif opt in ('-n', '--name'):
            names = True

        elif opt in '-h':
            usage()

    # start execution based on options
    if file:
        if names:
            try:
                found_names = extract_hostnames(file)
                found_names.sort()
                for n in found_names:
                    print_line(n)
            except OSError:
                sys.exit(0)
        else:
            file_type = detect_type(file)
            if file_type == 'xml':
                xml_parse(file, ip_filter, type_filter, name_filter, target_list)
            elif file_type == 'csv':
                csv_parse(file, ip_filter, type_filter, name_filter, target_list)
    else:
        print_error('A DNSRecon XML or CSV output file must be provided to be parsed')
        usage()


if __name__ == '__main__':
    main()