#!/usr/bin/env python
#
#  File: afl-cov
#
#  Version: 0.1
#
#  Purpose: Perform lcov coverage diff's against each AFL queue file to see
#           new functions and line coverage evolve from an AFL fuzzing cycle.
#
#  Copyright (C) 2015 Michael Rash (mbr@cipherdyne.org)
#
#  License (GNU General Public License):
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
#  USA
#

from shutil import rmtree
import re
import subprocess
import glob
import string
import argparse
import time
import sys, os

def main():

    version      = '0.1'
    exit_failure = 1
    exit_success = 0
    found        = 0
    afl_files    = []
    prev_file    = ''
    cov_paths    = {}
    file_num     = 1
    tot_files    = 0
    es           = exit_success

    cargs = handle_cmdline()

    if cargs.version:
        print "afl-cov-" + version
        return exit_success

    if not validate_args(cargs):
        return exit_failure

    while True:

        is_final  = 0
        new_files = []
        tmp_files = import_dir(cargs.afl_fuzzing_dir + '/queue')

        for f in tmp_files:
            if f not in afl_files:
                afl_files.append(f)
                new_files.append(f)
        tot_files += len(new_files)

        if cargs.live:
            if is_afl_fuzz_running(cargs):
                if not len(new_files):
                    print "[-] No new AFL queue files, sleeping for %d seconds" \
                            % cargs.sleep
                    time.sleep(cargs.sleep)
                    continue
            else:
                print "[+] afl-fuzz appears to be stopped..."
                is_final = 1
                break

        if cargs.verbose or not cargs.quiet:
            print "[+] Imported %d files from: %s" \
                    % (len(new_files), (cargs.afl_fuzzing_dir + '/queue'))

        for f in new_files:
            if cargs.verbose or not cargs.quiet:
                print "[+] AFL file: %s (%d / %d)" \
                        % (os.path.basename(f), file_num, tot_files)

            if not cargs.live:
                if file_num == tot_files:
                    is_final = 1
                elif cargs.afl_queue_id_limit \
                        and file_num > cargs.afl_queue_id_limit-1:
                    is_final = 1

            cov_paths = gen_paths(cargs, prev_file, f)

            if cargs.coverage_cmd:
                ### execute the command to generate code coverage stats
                ### for the current AFL queue file
                run_cmd(cargs.coverage_cmd.replace('AFL_FILE', f), \
                        cargs, 0)

                ### collect the code coverage stats
                gen_coverage(cargs, cov_paths, f, is_final)

            ### diff to the previous code coverage and look for
            ### new lines/functions
            if file_num > 1:
                skip_search = 0
                if found:
                    skip_search = 1
                if coverage_diff(cargs, cov_paths, \
                        prev_file, f, is_final, skip_search):
                    if cargs.func_search or cargs.line_search:
                        found = 1
                        if not cargs.coverage_cmd:
                            break

            prev_file = f
            file_num += 1

            if cargs.afl_queue_id_limit and file_num > cargs.afl_queue_id_limit:
                print "[+] queue/ id limit of %d reached..." \
                        % cargs.afl_queue_id_limit
                break

        if not cargs.live:
            break

    if cargs.live:
        if not cargs.disable_lcov_web:
            if is_final or cargs.lcov_web_all:
                gen_web_cov_report(cargs, cov_paths, is_final)
        if is_final:
            zero_cov_final(cov_paths['zero_cov'],
                    cargs.afl_fuzzing_dir + '/cov/zero-cov/zero-cov-final')

    if cargs.verbose or not cargs.quiet:
        print "[+] Processed %d / %d files" % (file_num-1, tot_files)

    if not found:
        if cargs.func_search:
            print "[-] Function '%s' not found..." % cargs.func_search
            es = exit_failure
        elif cargs.line_search:
            print "[-] Line %s not found..." % cargs.line_search
            es = exit_failure

    return es

def coverage_diff(cargs, cov_paths, a, b, is_final, skip_search):

    found     = 0
    do_search = 0

    if cargs.verbose or not cargs.quiet:
        print "[+] Coverage diff %s %s" \
                % (os.path.basename(a), os.path.basename(b))

    if not skip_search and cargs.func_search or cargs.line_search:
        do_search = 1

    (old_coverage, search_rv) \
            = extract_coverage(cov_paths['prev_lcov_info_final'], do_search, cargs)

    if search_rv:
        ### don't search for the function/line again since we just
        ### found it
        found = 1
        do_search = 0

    (new_coverage, search_rv) \
            = extract_coverage(cov_paths['lcov_info_final'], do_search, cargs)

    if search_rv:
        found = 1

    ### write out the zero coverage result for the current file
    if is_final:
        zero_cov_final(cov_paths['zero_cov'],
                cargs.afl_fuzzing_dir + '/cov/zero-cov/zero-cov-final')

    cfile = open(cov_paths['zero_cov'], 'w')
    for f in new_coverage:
        cfile.write("[+] File: %s\n" % f)
        for ctype in sorted(new_coverage[f]):
            if ctype == 'function':
                for val in sorted(new_coverage[f][ctype]):
                    cfile.write("    %s: %s\n" % (ctype, val))
            elif ctype == 'line':
                if cargs.coverage_include_lines:
                    for val in sorted(new_coverage[f][ctype], key=int):
                        cfile.write("    %s: %s\n" % (ctype, val))
    cfile.close()

    ### diff the two dictionaries
    for f in old_coverage:
        printed_file = 0
        if f in new_coverage:
            for ctype in old_coverage[f]:
                for val in sorted(old_coverage[f][ctype]):
                    if val not in new_coverage[f][ctype]:
                        if not printed_file:
                            if cargs.verbose or not cargs.quiet:
                                tee_print("    Src file: " + f, cov_paths['diff'])
                            printed_file = 1
                        if cargs.verbose or not cargs.quiet:
                            tee_print("      New '" + ctype + "' coverage: " + val,
                                    cov_paths['diff'])
    return found

def zero_cov_final(path, lpath):
    if not os.path.exists(lpath):
        print "[+] Final zero coverage report in: %s" % lpath
        os.symlink(path, lpath)
    return

def extract_coverage(lcov_file, do_search, cargs):

    search_rv = 0
    coverage  = {}

    if do_search:
        id_file = string.replace(os.path.basename(lcov_file),
                '.lcov_info_final', '')

    ### populate old lcov output for functions/lines that were called
    ### zero times
    with open(lcov_file, 'r') as f:
        current_file = ''
        for line in f:
            line = line.strip()

            m = re.search('SF:(\S+)', line)
            if m and m.group(1):
                current_file = m.group(1)
                coverage[current_file] = {}
                coverage[current_file]['function'] = {}
                coverage[current_file]['line'] = {}
                continue

            if current_file:
                m = re.search('^FNDA:(\d+),(\S+)', line)
                if m and m.group(2):
                    fcn = m.group(2) + '()'
                    if m.group(1) == '0':
                        ### the function was never called
                        coverage[current_file]['function'][fcn] = ''
                    elif do_search:
                        ### positive coverage for this function
                        if cargs.func_search and fcn == cargs.func_search:
                            if cargs.src_file:
                                if cargs.src_file == current_file:
                                    print "[+] Function '%s' in file: '%s' executed by: %s" \
                                            % (fcn, current_file, id_file)
                                    search_rv = 1
                            else:
                                print "[+] Function '%s' executed by: %s" \
                                        % (fcn, id_file)
                                search_rv = 1
                    continue

                ### look for lines that were never called
                m = re.search('^DA:(\d+),(\d+)', line)
                if m and m.group(1):
                    lnum = m.group(1)
                    if m.group(2) == '0':
                        ### the was never executed
                        coverage[current_file]['line'][lnum] = ''
                    elif do_search:
                        ### positive coverage for this line
                        if cargs.line_search and lnum == cargs.line_search:
                            if cargs.src_file:
                                if cargs.src_file == current_file:
                                    print "[+] Line '%s' in file: '%s' executed by: %s" \
                                            % (lnum, current_file, id_file)
                                    search_rv = 1
                            else:
                                print "[+] Function '%s' executed by: %s" \
                                        % (lnum, id_file)
                                search_rv = 1

    return coverage, search_rv

def gen_coverage(cargs, cov_paths, afl_file, is_final):

    run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum --capture --initial" \
            + " --directory " + cargs.code_dir \
            + " --output-file " \
            + cov_paths['lcov_base'], \
            cargs, 0)

    run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum --capture --directory " \
            + cargs.code_dir + " --output-file " \
            + cov_paths['lcov_info'], \
            cargs, 0)

    run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum -a " + cov_paths['lcov_base'] \
            + " -a " + cov_paths['lcov_info'] \
            + " --output-file " + cov_paths['lcov_info_final'], \
            cargs, 0)

    out = run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum -r " + cov_paths['lcov_info'] \
            + " /usr/include/*  --output-file " \
            + cov_paths['lcov_info_final'], cargs, 1)

    for line in out.splitlines():
        m = re.search('^\s+(lines\.\..*\:\s.*)', line)
        if m and m.group(1):
            print "    " + m.group(1)
        else:
            m = re.search('^\s+(functions\.\..*\:\s.*)', line)
            if m and m.group(1):
                print "    " + m.group(1)
            else:
                m = re.search('^\s+(branches\.\..*\:\s.*)', line)
                if m and m.group(1):
                    print "    " + m.group(1)

    if not cargs.disable_lcov_web:
        if is_final or cargs.lcov_web_all:
            gen_web_cov_report(cargs, cov_paths, is_final)

    return

def gen_web_cov_report(cargs, cov_paths, is_final):
    if is_final:
        print "[+] Generating lcov web report in: %s" \
                % cargs.afl_fuzzing_dir + '/cov/web/lcov-web-final'
        os.symlink(cov_paths['lcov_web_dir'],
                cargs.afl_fuzzing_dir + '/cov/web/lcov-web-final')

    os.mkdir(cov_paths['lcov_web_dir'])
    run_cmd("genhtml --branch-coverage --output-directory " \
            + cov_paths['lcov_web_dir'] + " " \
            + cov_paths['lcov_info_final'], \
            cargs, 0)
    return

def is_afl_fuzz_running(cargs):
    ### root  12765 10.1  0.2 7336 2616 pts/5   S+  08:45  0:00 afl-fuzz -T ...
    out = run_cmd("ps auxww", cargs, 1)

    found = 0
    for line in out.splitlines():
        if (re.search('\d\:\d{2}\s+afl\-fuzz\s', line)):
            found = 1
            break

    return found

def gen_paths(cargs, prev_afl_file, afl_file):

    cov_paths = {}

    basename = os.path.basename(afl_file)

    cov_paths['diff'] = cargs.afl_fuzzing_dir + '/cov/diff/' \
            + basename + '.cov_diff'
    cov_paths['zero_cov'] = cargs.afl_fuzzing_dir + '/cov/zero-cov/' \
            + basename + '.zero_cov'
    cov_paths['lcov_web_dir'] = cargs.afl_fuzzing_dir + '/cov/web/' \
            + basename
    cov_paths['lcov_base'] = cargs.afl_fuzzing_dir + '/cov/lcov/' \
            + basename + '.lcov_base'
    cov_paths['lcov_info'] = cargs.afl_fuzzing_dir + '/cov/lcov/' \
            + basename + '.lcov_info'
    cov_paths['lcov_info_final'] = cargs.afl_fuzzing_dir + '/cov/lcov/' \
            + basename + '.lcov_info_final'

    if prev_afl_file:
        cov_paths['prev_lcov_info_final'] = cargs.afl_fuzzing_dir \
                + '/cov/lcov/' + os.path.basename(prev_afl_file) \
                + '.lcov_info_final'

    return cov_paths

def run_cmd(cmd, cargs, collect):

    out = ''

    if cargs.verbose:
        print "    CMD: %s" % cmd

    fh = None
    if not cargs.disable_cmd_redirection:
        fh = open(os.devnull, 'w')

    if collect:
        out = subprocess.check_output(cmd.split())
    else:
        subprocess.call(cmd, stdin=None,
                stdout=fh, stderr=subprocess.STDOUT, shell=True)

    if not cargs.disable_cmd_redirection:
        fh.close()

    return out

def import_dir(qdir):
    return sorted(glob.glob(qdir + "/id:*"))

def validate_args(cargs):

    if not cargs.afl_fuzzing_dir:
        print "[*] Must specify either --afl-fuzzing-dir"
        return 0

    if cargs.live:
        while not fuzzing_dir_exists(cargs):
            print "[-] Sleep for %d seconds for AFL fuzzing directory to be created..." \
                    % cargs.sleep
            time.sleep(cargs.sleep)

        ### if we make it here then afl-fuzz is presumably running
        while not is_afl_fuzz_running(cargs):
            print "[-] Sleep for %d seconds waiting for afl-fuzz to be started...." \
                    % cargs.sleep
            time.sleep(cargs.sleep)
    else:
        if not fuzzing_dir_exists(cargs):
            print "[*] It doesn't look like directory '%s' exists" \
                % (cargs.afl_fuzzing_dir + '/queue')
            return 0

    create_cov_dirs = 0
    if os.path.exists(cargs.afl_fuzzing_dir + '/cov'):
        if cargs.overwrite:
            rmtree(cargs.afl_fuzzing_dir + '/cov')
            create_cov_dirs = 1
        else:
            if not cargs.func_search and not cargs.line_search:
                print "[*] Existing coverage dir found, use --overwrite to " \
                        "re-calculate coverage"
                return 0
    else:
        create_cov_dirs = 1

    if create_cov_dirs:
        if not cargs.coverage_cmd:
            print "[*] Must set --coverage-cmd unless using --func-search " \
                    "against existing afl-cov directory"
            return 0

        os.mkdir(cargs.afl_fuzzing_dir + '/cov')
        os.mkdir(cargs.afl_fuzzing_dir + '/cov/web')
        os.mkdir(cargs.afl_fuzzing_dir + '/cov/lcov')
        os.mkdir(cargs.afl_fuzzing_dir + '/cov/zero-cov')
        os.mkdir(cargs.afl_fuzzing_dir + '/cov/diff')

    if cargs.coverage_cmd and 'AFL_FILE' not in cargs.coverage_cmd:
        print "[*] --coverage-cmd must contain AFL_FILE"
        return 0

    if cargs.code_dir:
        if not os.path.exists(cargs.code_dir):
            print "[*] --code-dir path does not exist"
            return 0
    else:
        if not cargs.func_search and not cargs.line_search:
            print "[*] Must set --code-dir unless using --func-search " \
                    "against existing afl-cov directory"
            return 0

    if cargs.func_search or cargs.line_search:
        if '()' not in cargs.func_search:
            cargs.func_search += '()'
        if not cargs.verbose and not cargs.coverage_cmd:
            cargs.quiet = 1
        if cargs.line_search and not cargs.src_file:
            print "[*] Must set --src-file in --line-search mode"
            return 0

    if not cargs.disable_coverage_init and create_cov_dirs:
        ### reset code coverage counters
        run_cmd("lcov --rc lcov_branch_coverage=1 " \
                + "--no-checksum --zerocounters --directory " \
                + cargs.code_dir, cargs, 0)

    return 1

def fuzzing_dir_exists(cargs):
    if not os.path.exists(cargs.afl_fuzzing_dir):
        return 0
    if not os.path.exists(cargs.afl_fuzzing_dir + '/queue'):
        return 0
    return 1

def tee_print(pstr, pfile):
    print pstr
    f = open(pfile, 'a')
    f.write("%s\n" % pstr)
    f.close()
    return

def handle_cmdline():

    p = argparse.ArgumentParser()

    p.add_argument("-e", "--coverage-cmd", type=str,
            help="set command to exec (including args, and assumes code coverage support)")
    p.add_argument("-d", "--afl-fuzzing-dir", type=str,
            help="top level AFL fuzzing directory")
    p.add_argument("-c", "--code-dir", type=str,
            help="directory where the code lives (compiled with code coverage support)")
    p.add_argument("-O", "--overwrite", action='store_true',
            help="overwrite existing coverage results", default=False)
    p.add_argument("--disable-cmd-redirection", action='store_true',
            help="disable redirection of command results to /dev/null",
            default=False)
    p.add_argument("--disable-lcov-web", action='store_true',
            help="disable generation of all lcov web code coverage reports",
            default=False)
    p.add_argument("--disable-coverage-init", action='store_true',
            help="disable initialization of code coverage counters at afl-cov startup",
            default=False)
    p.add_argument("--coverage-include-lines", action='store_true',
            help="include lines in zero-coverage status files",
            default=False)
    p.add_argument("--live", action='store_true',
            help="process a live AFL directory, and afl-cov will exit when it appears afl-fuzz has been stopped",
            default=False)
    p.add_argument("--sleep", type=int,
            help="In --live mode, # of seconds to sleep between checking for new queue files",
            default=60)
    p.add_argument("--lcov-web-all", action='store_true',
            help="generate lcov web reports for all id:NNNNNN* files instead of just the last one",
            default=False)
    p.add_argument("--func-search", type=str,
            help="search for coverage of a specific function")
    p.add_argument("--line-search", type=str,
            help="search for coverage of a specific line number (requires --src-file)")
    p.add_argument("--src-file", type=str,
            help="restrict function or line search to a specfic source file")
    p.add_argument("--afl-queue-id-limit", type=int,
            help="limit the number of id:NNNNNN* files processed in the AFL queue/ directory",
            default=0)
    p.add_argument("-v", "--verbose", action='store_true',
            help="verbose mode", default=False)
    p.add_argument("-V", "--version", action='store_true',
            help="print version and exit", default=False)
    p.add_argument("-q", "--quiet", action='store_true',
            help="quiet mode", default=False)

    return p.parse_args()

if __name__ == "__main__":
    sys.exit(main())
