#!/usr/bin/env python
#
#  File: afl-cov
#
#  Version: 0.2
#
#  Purpose: Perform lcov coverage diff's against each AFL queue file to see
#           new functions and line coverage evolve from an AFL fuzzing cycle.
#
#  Copyright (C) 2015 Michael Rash (mbr@cipherdyne.org)
#
#  License (GNU General Public License):
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
#  USA
#

from shutil import rmtree
import errno
import re
import subprocess
import glob
import string
import argparse
import time
import sys, os

WANT_OUTPUT = 1
NO_OUTPUT   = 0

def main():

    version      = '0.2'
    exit_failure = 1
    exit_success = 0
    found        = 0
    afl_files    = []
    prev_file    = ''
    cov_paths    = {}
    gcov         = {}
    zero_cov     = {}
    file_num     = 0
    tot_files    = 0
    es           = exit_success

    cargs = handle_cmdline()

    if cargs.version:
        print "afl-cov-" + version
        return exit_success

    (gpaths, rv) = global_paths(cargs)

    if not rv:
        return exit_failure

    if not validate_args(gpaths, cargs):
        return exit_failure

    while True:

        new_files = []
        tmp_files = import_dir(cargs.afl_fuzzing_dir + '/queue')

        for f in tmp_files:
            if f not in afl_files:
                afl_files.append(f)
                new_files.append(f)
        tot_files += len(new_files)-1

        if cargs.live:
            if is_afl_fuzz_running(cargs):
                if not len(new_files):
                    print "[-] No new AFL queue files, sleeping for %d seconds" \
                            % cargs.sleep
                    time.sleep(cargs.sleep)
                    continue
            else:
                print "[+] afl-fuzz appears to be stopped..."
                break

        if cargs.verbose or not cargs.quiet:
            print "[+] Imported %d files from: %s" \
                    % (tot_files, (cargs.afl_fuzzing_dir + '/queue'))

        for f in new_files:

            curr_cycle = get_cycle_num(file_num, cargs)

            if cargs.verbose or not cargs.quiet:
                print "[+] AFL file: %s (%d / %d), cycle: %d" \
                        % (os.path.basename(f), file_num, tot_files, curr_cycle)

            cov_paths = gen_paths(prev_file, f, gpaths, cargs)

            if cargs.coverage_cmd:
                ### execute the command to generate code coverage stats
                ### for the current AFL queue file
                run_cmd(cargs.coverage_cmd.replace('AFL_FILE', f), \
                        cargs, NO_OUTPUT)

                ### collect the code coverage stats
                gen_coverage(gpaths, cov_paths, f, cargs)

                ### extract and write out the coverage result for this
                ### id:NNNNNN* test case
                append_id_delta_cov(gcov, curr_cycle, cov_paths, gpaths, cargs)

                ### diff to the previous code coverage and look for
                ### new lines/functions
                if file_num > 0:
                    zero_cov = coverage_diff(cov_paths, gpaths, prev_file, f, cargs)

                if not cargs.disable_lcov_web and cargs.lcov_web_all:
                    gen_web_cov_report(gpaths, cov_paths, cargs)

            prev_file = f
            file_num += 1

            if cargs.func_search or cargs.line_search:
                if not found:
                    found = search_cov(gpaths, cargs)
                if found and not cargs.live:
                    break

            if cargs.afl_queue_id_limit and file_num > cargs.afl_queue_id_limit:
                print "[+] queue/ id limit of %d reached..." \
                        % cargs.afl_queue_id_limit
                break

        if not cargs.live:
            break

    if cargs.verbose or not cargs.quiet:
        print "[+] Processed id:%d / %d files.\n" % (file_num-1, tot_files)

    ### write the final zero coverage and positive coverage reports
    write_zero_cov(gpaths['zero_cov'], zero_cov, cargs)
    write_pos_cov(gpaths['pos_cov'], gcov, cargs)

    if not cargs.disable_lcov_web:
        gen_web_cov_report(gpaths, cov_paths, cargs)
        print "[+] Final lcov web report: %s" \
                % gpaths['web_dir'] + '/lcov-web-final.html'
        os.symlink(cov_paths['lcov_web_dir'] + '/index.html',
                gpaths['web_dir'] + '/lcov-web-final.html')

    if not found:
        if cargs.func_search:
            print "[-] Function '%s' not found..." % cargs.func_search
            es = exit_failure
        elif cargs.line_search:
            print "[-] Line %s not found..." % cargs.line_search
            es = exit_failure

    return es

def coverage_diff(cov_paths, gpaths, a, b, cargs):

    if cargs.verbose or not cargs.quiet:
        print "[+] Coverage diff %s %s" \
                % (os.path.basename(a), os.path.basename(b))

    (old_zero_cov, old_pos_cov) \
            = extract_coverage(cov_paths['prev_lcov_info_final'], cargs)

    ### with the coverage from the previous lcov results extracted,
    ### we remove associated files unless instructed to keep them
    if not cargs.preserve_all_lcov_files:
        rm_prev_cov_files(cov_paths)

    (new_zero_cov, new_pos_cov) \
            = extract_coverage(cov_paths['lcov_info_final'], cargs)

    ### diff the two dictionaries
    for f in old_zero_cov:
        printed_file = 0
        if f in new_zero_cov:
            for ctype in old_zero_cov[f]:
                for val in sorted(old_zero_cov[f][ctype]):
                    if val not in new_zero_cov[f][ctype]:
                        if not printed_file:
                            if cargs.verbose or not cargs.quiet:
                                tee_print("    Src file: " + f, cov_paths['diff'])
                            printed_file = 1
                        if cargs.verbose or not cargs.quiet:
                            tee_print("      New '" + ctype + "' coverage: " + val,
                                    cov_paths['diff'])

    return new_zero_cov

def write_zero_cov(cpath, zero_cov, cargs):
    print "[+] Final zero coverage report: %s" % cpath
    cfile = open(cpath, 'w')
    cfile.write("# All functions / lines in this file were never executed by any\n")
    cfile.write("# AFL test case.\n")
    cfile.close()
    write_cov(cpath, zero_cov, cargs)
    return

def write_pos_cov(cpath, gcov, cargs):
    print "[+] Final positive coverage report: %s" % cpath
    cfile = open(cpath, 'w')
    cfile.write("# All functions / lines in this file were executed by at\n")
    cfile.write("# least one AFL test case. See the cov/id-delta-cov file\n")
    cfile.write("# for more information.\n")
    cfile.close()
    write_cov(cpath, gcov, cargs)
    return

def write_cov(cpath, cov, cargs):
    cfile = open(cpath, 'a')
    for f in cov:
        cfile.write("File: %s\n" % f)
        for ctype in sorted(cov[f]):
            if ctype == 'function':
                for val in sorted(cov[f][ctype]):
                    cfile.write("    %s: %s\n" % (ctype, val))
            elif ctype == 'line':
                if cargs.coverage_include_lines:
                    for val in sorted(cov[f][ctype], key=int):
                        cfile.write("    %s: %s\n" % (ctype, val))
    cfile.close()

    return

def rm_prev_cov_files(cov_paths):
    os.remove(cov_paths['prev_lcov_base'])
    os.remove(cov_paths['prev_lcov_info'])
    os.remove(cov_paths['prev_lcov_info_final'])
    return

def append_id_delta_cov(gcov, curr_cycle, cov_paths, gpaths, cargs):

    pos_cov = extract_coverage(cov_paths['lcov_info_final'], cargs)[1]

    cfile = open(gpaths['id_delta_cov'], 'a')
    for f in pos_cov:
        if f not in gcov:
            cov_init(f, gcov)
        for ctype in sorted(pos_cov[f]):
            if ctype == 'function':
                for val in sorted(pos_cov[f][ctype]):
                    if val not in gcov[f][ctype]:
                        gcov[f][ctype][val] = ''
                        cfile.write("%s, %s, %s, %s, %s\n" \
                                % (cov_paths['id_file'],
                                    curr_cycle, f, ctype, val))
            elif ctype == 'line':
                if cargs.coverage_include_lines:
                    for val in sorted(pos_cov[f][ctype], key=int):
                        if val not in gcov[f][ctype]:
                            gcov[f][ctype][val] = ''
                            cfile.write("%s, %s, %s, %s, %s\n" \
                                    % (cov_paths['id_file'],
                                        curr_cycle, f, ctype, val))
    cfile.close()

    return

def cov_init(cfile, cov):
    cov[cfile] = {}
    cov[cfile]['function'] = {}
    cov[cfile]['line'] = {}
    return

def extract_coverage(lcov_file, cargs):

    search_rv = 0
    zero_cov  = {}
    pos_cov   = {}

    ### populate old lcov output for functions/lines that were called
    ### zero times
    with open(lcov_file, 'r') as f:
        current_file = ''
        for line in f:
            line = line.strip()

            m = re.search('SF:(\S+)', line)
            if m and m.group(1):
                current_file = m.group(1)
                cov_init(current_file, zero_cov)
                cov_init(current_file, pos_cov)
                continue

            if current_file:
                m = re.search('^FNDA:(\d+),(\S+)', line)
                if m and m.group(2):
                    fcn = m.group(2) + '()'
                    if m.group(1) == '0':
                        ### the function was never called
                        zero_cov[current_file]['function'][fcn] = ''
                    else:
                        pos_cov[current_file]['function'][fcn] = ''
                    continue

                ### look for lines that were never called
                m = re.search('^DA:(\d+),(\d+)', line)
                if m and m.group(1):
                    lnum = m.group(1)
                    if m.group(2) == '0':
                        ### the line was never executed
                        zero_cov[current_file]['line'][lnum] = ''
                    else:
                        pos_cov[current_file]['line'][lnum] = ''

    return zero_cov, pos_cov

def search_cov(gpaths, cargs):

    search_rv = 0

    with open(gpaths['id_delta_cov'], 'r') as f:
        for line in f:
            line = line.strip()
            ### id:NNNNNN*_file, cycle, src_file, cov_type, fcn/line\n")
            [id_file, cycle_num, src_file, cov_type, val] = line.split(', ')

            if cargs.func_search and cov_type == 'function' and val == cargs.func_search:
                if cargs.src_file:
                    if cargs.src_file == src_file:
                        print "[+] Function '%s' in file: '%s' executed by: '%s', cycle: %s" \
                                % (val, current_file, id_file, cycle_num)
                        search_rv = 1
                else:
                    print "[+] Function '%s' executed by: '%s', cycle: %s" \
                            % (val, id_file, cycle_num)
                    search_rv = 1

            if cargs.src_file == src_file \
                    and cargs.line_search and val == cargs.line_search:
                if cargs.src_file == src_file:
                    print "[+] Line '%s' in file: '%s' executed by: '%s', cycle: %s" \
                            % (val, current_file, id_file, cycle_num)
                    search_rv = 1

    return search_rv

def search_diff(cov_paths, curr_cycle, cargs):
    found = 0
    with open(cov_paths['diff'], 'r') as f:
        for line in f:
            line = line.strip()
            ### cargs.func_search includes the closing '()'
            if cargs.func_search and cargs.func_search in line:
                print "[+] Function '%s' executed by: '%s', cycle: %d" \
                        % (cargs.func_search, cov_paths['id_file'], curr_cycle)
                found = 1
            if cargs.line_search and line.split()[-1] == cargs.line_search:
                print "[+] Line '%s' in file: '%s' executed by: '%s', cycle: %d" \
                        % (cargs.line_search, cargs.src_file, \
                        cov_paths['id_file'], curr_cycle)
                found = 1
    return found

def get_cycle_num(id_num, cargs):

    cycle_num = -1
    if not os.path.exists(cargs.afl_fuzzing_dir + '/plot_data'):
        return cycle_num

    with open(cargs.afl_fuzzing_dir + '/plot_data') as f:
        for line in f:
            ### unix_time, cycles_done, cur_path, paths_total, pending_total,...
            ### 1427742641, 11, 54, 419, 45, 0, 2.70%, 0, 0, 9, 1645.47
            vals = line.split(', ')
            ### test the id number against the current path
            if vals[2] == str(id_num):
                cycle_num = int(vals[1])
                break
    return cycle_num

def gen_coverage(gpaths, cov_paths, afl_file, cargs):

    run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum --capture --initial" \
            + " --directory " + cargs.code_dir \
            + " --output-file " \
            + cov_paths['lcov_base'], \
            cargs, NO_OUTPUT)

    run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum --capture --directory " \
            + cargs.code_dir + " --output-file " \
            + cov_paths['lcov_info'], \
            cargs, NO_OUTPUT)

    run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum -a " + cov_paths['lcov_base'] \
            + " -a " + cov_paths['lcov_info'] \
            + " --output-file " + cov_paths['lcov_info_final'], \
            cargs, NO_OUTPUT)

    out = run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum -r " + cov_paths['lcov_info'] \
            + " /usr/include/*  --output-file " \
            + cov_paths['lcov_info_final'], cargs, WANT_OUTPUT)

    for line in out.splitlines():
        m = re.search('^\s+(lines\.\..*\:\s.*)', line)
        if m and m.group(1):
            print "    " + m.group(1)
        else:
            m = re.search('^\s+(functions\.\..*\:\s.*)', line)
            if m and m.group(1):
                print "    " + m.group(1)
            else:
                m = re.search('^\s+(branches\.\..*\:\s.*)', line)
                if m and m.group(1):
                    print "    " + m.group(1)
    return

def gen_web_cov_report(gpaths, cov_paths, cargs):
    os.mkdir(cov_paths['lcov_web_dir'])
    run_cmd("genhtml --branch-coverage --output-directory " \
            + cov_paths['lcov_web_dir'] + " " \
            + cov_paths['lcov_info_final'], \
            cargs, NO_OUTPUT)
    return

def is_afl_fuzz_running(cargs):

    found = 0

    with open(cargs.afl_fuzzing_dir + '/fuzzer_stats', 'r') as f:
        for line in f:
            line = line.strip()
            ### fuzzer_pid     : 13238
            m = re.search('fuzzer_pid\s+\:\s+(\d+)', line)
            if m and m.group(1):
                pid = int(m.group(1))
                try:
                    os.kill(pid, 0)
                except OSError as e:
                    if e.errno == errno.EPERM:
                        found = 1
                else:
                    found = 1
                break

    return found

def gen_paths(prev_afl_file, afl_file, gpaths, cargs):

    cov_paths = {}

    basename = os.path.basename(afl_file)

    ### coverage diffs from one ID file to the next
    cov_paths['diff'] = gpaths['diff_dir'] + '/' + basename

    ### current id:NNNNNN* test case file
    cov_paths['id_file'] = basename

    ### web files
    cov_paths['lcov_web_dir'] = gpaths['web_dir'] + '/' + basename

    ### raw lcov files
    cov_paths['lcov_base'] = gpaths['lcov_dir'] + '/' + basename + '.lcov_base'
    cov_paths['lcov_info'] = gpaths['lcov_dir'] + '/' + basename + '.lcov_info'
    cov_paths['lcov_info_final'] = gpaths['lcov_dir'] + '/' \
            + basename + '.lcov_info_final'

    if prev_afl_file:
        cov_paths['prev_lcov_base'] = gpaths['lcov_dir'] + '/' \
                + os.path.basename(prev_afl_file) + '.lcov_base'
        cov_paths['prev_lcov_info'] = gpaths['lcov_dir'] + '/' \
                + os.path.basename(prev_afl_file) + '.lcov_info'
        cov_paths['prev_lcov_info_final'] = gpaths['lcov_dir'] + \
                '/' + os.path.basename(prev_afl_file) + '.lcov_info_final'

    return cov_paths

def run_cmd(cmd, cargs, collect):

    out = ''

    if cargs.verbose:
        print "    CMD: %s" % cmd

    fh = None
    if not cargs.disable_cmd_redirection:
        fh = open(os.devnull, 'w')

    if collect == WANT_OUTPUT:
        out = subprocess.check_output(cmd.split())
    else:
        subprocess.call(cmd, stdin=None,
                stdout=fh, stderr=subprocess.STDOUT, shell=True)

    if not cargs.disable_cmd_redirection:
        fh.close()

    return out

def import_dir(qdir):
    return sorted(glob.glob(qdir + "/id:*"))

def global_paths(cargs):
    gpaths = {}

    if not cargs.afl_fuzzing_dir:
        print "[*] Must specify AFL fuzzing dir with --afl-fuzzing-dir or -d"
        return gpaths, 0

    gpaths['top_dir']  = cargs.afl_fuzzing_dir + '/cov'
    gpaths['web_dir']  = gpaths['top_dir'] + '/web'
    gpaths['lcov_dir'] = gpaths['top_dir'] + '/lcov'
    gpaths['diff_dir'] = gpaths['top_dir'] + '/diff'

    ### summary coverage results
    gpaths['id_delta_cov'] = gpaths['top_dir'] + '/id-delta-cov'
    gpaths['zero_cov']     = gpaths['top_dir'] + '/zero-cov'
    gpaths['pos_cov']      = gpaths['top_dir'] + '/pos-cov'

    return gpaths, 1

def validate_args(gpaths, cargs):

    if cargs.live:
        while not fuzzing_dir_exists(cargs):
            print "[-] Sleep for %d seconds for AFL fuzzing directory to be created..." \
                    % cargs.sleep
            time.sleep(cargs.sleep)

        ### if we make it here then afl-fuzz is presumably running
        while not is_afl_fuzz_running(cargs):
            print "[-] Sleep for %d seconds waiting for afl-fuzz to be started...." \
                    % cargs.sleep
            time.sleep(cargs.sleep)
    else:
        if not fuzzing_dir_exists(cargs):
            print "[*] It doesn't look like directory '%s' exists" \
                % (cargs.afl_fuzzing_dir + '/queue')
            return 0

    if cargs.coverage_cmd:
        if 'AFL_FILE' not in cargs.coverage_cmd:
            print "[*] --coverage-cmd must contain AFL_FILE"
            return 0
    else:
        if not cargs.func_search and not cargs.line_search:
            print "[*] Must set --coverage-cmd or --func-search/--line-search"
            return 0

    create_cov_dirs = 0
    if os.path.exists(gpaths['top_dir']):
        if cargs.overwrite:
            rmtree(gpaths['top_dir'])
            create_cov_dirs = 1
        else:
            if not cargs.func_search and not cargs.line_search:
                print "[*] Existing coverage dir found, use --overwrite to " \
                        "re-calculate coverage"
                return 0
    else:
        create_cov_dirs = 1

    if create_cov_dirs:
        if not cargs.coverage_cmd:
            print "[*] Must set --coverage-cmd unless using --func-search " \
                    "against existing afl-cov directory"
            return 0

        for k in ['top_dir', 'web_dir', 'lcov_dir', 'diff_dir']:
            os.mkdir(gpaths[k])

        ### write coverage results in the following format
        cfile = open(gpaths['id_delta_cov'], 'w')
        cfile.write("# id:NNNNNN*_file, cycle, src_file, coverage_type, fcn/line\n")
        cfile.close()

    if cargs.code_dir:
        if not os.path.exists(cargs.code_dir):
            print "[*] --code-dir path does not exist"
            return 0
    else:
        if not cargs.func_search and not cargs.line_search:
            print "[*] Must set --code-dir unless using --func-search " \
                    "against existing afl-cov directory"
            return 0

    if cargs.func_search or cargs.line_search:
        if cargs.func_search and '()' not in cargs.func_search:
            cargs.func_search += '()'
        if not cargs.verbose and not cargs.coverage_cmd:
            cargs.quiet = 1
        if cargs.line_search and not cargs.src_file:
            print "[*] Must set --src-file in --line-search mode"
            return 0

    if not cargs.disable_coverage_init \
            and create_cov_dirs and cargs.coverage_cmd:
        ### reset code coverage counters
        run_cmd("lcov --rc lcov_branch_coverage=1 " \
                + "--no-checksum --zerocounters --directory " \
                + cargs.code_dir, cargs, NO_OUTPUT)

    return 1

def fuzzing_dir_exists(cargs):
    if not os.path.exists(cargs.afl_fuzzing_dir):
        return 0
    if not os.path.exists(cargs.afl_fuzzing_dir + '/queue'):
        return 0
    return 1

def tee_print(pstr, pfile):
    print pstr
    f = open(pfile, 'a')
    f.write("%s\n" % pstr)
    f.close()
    return

def handle_cmdline():

    p = argparse.ArgumentParser()

    p.add_argument("-e", "--coverage-cmd", type=str,
            help="set command to exec (including args, and assumes code coverage support)")
    p.add_argument("-d", "--afl-fuzzing-dir", type=str,
            help="top level AFL fuzzing directory")
    p.add_argument("-c", "--code-dir", type=str,
            help="directory where the code lives (compiled with code coverage support)")
    p.add_argument("-O", "--overwrite", action='store_true',
            help="overwrite existing coverage results", default=False)
    p.add_argument("--disable-cmd-redirection", action='store_true',
            help="disable redirection of command results to /dev/null",
            default=False)
    p.add_argument("--disable-lcov-web", action='store_true',
            help="disable generation of all lcov web code coverage reports",
            default=False)
    p.add_argument("--disable-coverage-init", action='store_true',
            help="disable initialization of code coverage counters at afl-cov startup",
            default=False)
    p.add_argument("--coverage-include-lines", action='store_true',
            help="include lines in zero-coverage status files",
            default=False)
    p.add_argument("--live", action='store_true',
            help="process a live AFL directory, and afl-cov will exit when it appears afl-fuzz has been stopped",
            default=False)
    p.add_argument("--sleep", type=int,
            help="In --live mode, # of seconds to sleep between checking for new queue files",
            default=60)
    p.add_argument("--lcov-web-all", action='store_true',
            help="generate lcov web reports for all id:NNNNNN* files instead of just the last one",
            default=False)
    p.add_argument("--preserve-all-lcov-files", action='store_true',
            help="Keep all lcov files (not usually necessary)",
            default=False)
    p.add_argument("--func-search", type=str,
            help="search for coverage of a specific function")
    p.add_argument("--line-search", type=str,
            help="search for coverage of a specific line number (requires --src-file)")
    p.add_argument("--src-file", type=str,
            help="restrict function or line search to a specfic source file")
    p.add_argument("--afl-queue-id-limit", type=int,
            help="limit the number of id:NNNNNN* files processed in the AFL queue/ directory",
            default=0)
    p.add_argument("-v", "--verbose", action='store_true',
            help="verbose mode", default=False)
    p.add_argument("-V", "--version", action='store_true',
            help="print version and exit", default=False)
    p.add_argument("-q", "--quiet", action='store_true',
            help="quiet mode", default=False)

    return p.parse_args()

if __name__ == "__main__":
    sys.exit(main())
