#!/usr/bin/python3
#    brd - scans directories and files for damage due to decay of medium.
#    Copyright (C) 2013 Jeff Backus <jeff.backus@gmail.com>
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License along
#    with this program; if not, write to the Free Software Foundation, Inc.,
#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


from __future__ import unicode_literals
import logging
import os
import stat
import hashlib
import argparse
from argparse import RawTextHelpFormatter
import sqlite3
import operator
import time
import io
import sys
import datetime

###########
# Globals #
###########
table_names = { 'files': 'fp_files', 'dirs': 'fp_dirs', 
                'tmp_dirs' : 'tmp_dirs' }

version = 0
default_db = os.path.basename(sys.argv[0]) + ".db"

help_desc = """
bit_rot_detector, or brd, is a tool to scan a directory tree and check each file
for corruption caused by damage to the physical storage medium or by damage from
malicious programs. Files are fingerprinted using the SHA-1 algorithm. File
fingerprints, sizes, and modification times are stored in a SQLite database.

Multiple unrelated directory trees can be stored in the same database. All
trees are identified by the base directory, or root, as specified on the 
command-line. Note that the following are considered different trees:
    some_dir/
    ./some_dir/

Subtrees can be specified anywhere roots can as long as the full root to subtree
path is specified. However, the subtree must already exist in the database. If
the subtree is not currently in the database, it will be considered to be a
new root. For example, the following will scan some_dir as a new root and add 
all of its children to the database, then scan the subtree some_subtree for
problems and/or changes:
    # Add new root
    brd scan some_dir
    # Scan only part of a root for problems and changes.
    brd scan some_dir/some_subtree

Note: the default behavior for scan is to update the database for any files
that are newer than the database record. Files that have been tampered with will
likely be flagged as updated and not as damaged. To properly check for files
that have been tampered with, run scan with the --dry-run option.

Since scanning large trees or trees with large files can take quite a while,
it is possible to exclude files in directories that have recently been checked
via the -s option. How recent is "recent" is controlled by the --expr option and
defaults to 30 days.

The integrity of the database can be checked via the checkdb subcommand.

In addition to checking files for corruption, brd provides the ability to search
the database for duplicate files and subtrees, as well as diff subtrees.
See the dupe_files, dupe_trees, and diff subcommands for details.
"""

help_epilog = """
For more information: 
http://github.com/jsbackus/bit_rot_detector

(c) 2013 Jeff Backus
Released under the GPL v2.
"""

#############
# Functions #
#############
def parse_args():
    """Sets up the argparse object and attempts to parse the command-line.
    """
    
    # Create parser object
    parser = argparse.ArgumentParser(description=help_desc, epilog=help_epilog,
                                     formatter_class=RawTextHelpFormatter)
    parser.add_argument('--version', action="version", 
                        version="%(prog)s version "+str(version) + 
                        " (c) 2013 Jeff Backus")
    parser.add_argument('-l', '--log', nargs='?', 
                        default='', help='log to file instead of console')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='only display warnings and errors')
    parser.add_argument('-d', '--debug', action='store_true',
                        help='display debugging information')
    parser.add_argument('--db', default=default_db,
                        help='Database that contains file fingerprint info.' +
                        '\nDefaults to: ' + default_db)

    subparsers = parser.add_subparsers(title='subcommands', dest='subcommand',
                                       description='valid subcommands',
                                       help='Subcommand Help')
    # Scan subparser
    scan_mode = subparsers.add_parser('scan', 
                                      help='Scans and checks the filesystem')

    scan_mode.add_argument('--use-root', default='',
                           help='Strip path information from all targets and ' +
                           'replace with the specified string when ' +
                           'interacting with the database.')
    scan_mode.add_argument('--root-prefix', default='',
                           help='Append the specified string to all targets ' +
                           'when interacting with the database.')
    scan_mode.add_argument('-p', '--prune', action='store_true',
                           help='Prunes directories and files from database ' +
                           'that no longer exist on the filesystem.')
    scan_mode.add_argument('-P', '--progress', action='store_true',
                           help='Displays progress meter.')
    scan_mode.add_argument('--check-only', action='store_true',
                           help='Scans the specified target but does not ' +
                           'update the database when changes are detected.' +
                           ' Useful for checking for malicious damage when ' +
                           ' scanning files that are known to not have ' +
                           'changed.')
    scan_mode.add_argument('--dry-run', action='store_true', 
                           dest='check_only', 
                           help='Alias for --check-only.')
    scan_mode.add_argument('-s', '--skip-recent', action='store_true',
                           help='When scanning, skips files in directories that'
                           + ' have been checked recently. See --expr.')
    scan_mode.add_argument('--expr', default=30, type=int,
                           help="Specifies how old 'recent' is, in days, when" +
                           " skipping recent files.")
    scan_mode.add_argument('target', nargs='+', 
                           help='Directory tree to scan and check. Can be ' +
                           'a subdirectory of a root that already exists in ' +
                           'the database. If this target is not already in ' +
                           'the database, it will be added as a new root.')

    # Dupe files subparser
    dupe_files_mode = subparsers.add_parser('dupe_files', 
                                            help='Scans the database for ' +
                                            'duplicate files by fingerprint.')
    dupe_files_mode.add_argument('-o', '--output', nargs='?', default='',
                                 help='Optional file to dump list of ' +
                                 'duplicates to. Useful when -v or -d is ' +
                                 'used.')

    # Dupe subtrees subparser
    dupe_trees_mode = subparsers.add_parser('dupe_trees', 
                                             help='Scans the database for ' +
                                             'duplicate subtrees.')
    dupe_trees_mode.add_argument('--nofilefp', action='store_true',
                                 help="Don't include file fingerprints in " +
                                 "directory fingerprints.")
    dupe_trees_mode.add_argument('--nofilename', action='store_true',
                                 help="Don't include file names in directory " +
                                 "fingerprints.")
    dupe_trees_mode.add_argument('--nosubdirfp', action='store_true',
                                 help="Don't include subdirectory " +
                                 "fingerprints in directory fingerprints.")
    dupe_trees_mode.add_argument('--nosubdirname', action='store_true',
                                 help="Don't include subdirectory names in " +
                                 "directory fingerprints.")
    dupe_trees_mode.add_argument('--nodirname', action='store_true',
                                 help="Don't include a directory's name in " +
                                 "its own fingerprint.")
    dupe_trees_mode.add_argument('-o', '--output', nargs='?', default='',
                                 help='Optional file to dump list of ' +
                                 'duplicates to. Useful when -v or -d is ' +
                                 'used.')

    # Diff Root subparser
    diff_mode = subparsers.add_parser('diff', 
                                      help='Compares subtrees like diff -R.')
    diff_mode.add_argument('-o', '--output', nargs='?', default='',
                           help='Optional file to diff output to. ' +
                           'Useful when -v or -d is used.')
    diff_mode.add_argument('--use-root', default='',
                           help='Strip path information from all targets ' +
                           'and replace with the specified string when ' +
                           'interacting with the database.')
    diff_mode.add_argument('--root-prefix', default='',
                           help='Append the specified string to all ' +
                           'targets when interacting with the database.')
    diff_mode.add_argument('target', nargs=2, 
                           help='Names of roots/subtrees to check.')

    # Check DB subparser
    checkdb_mode = subparsers.add_parser('checkdb', 
                                         help='Fingerprints the database in ' +
                                         'order to verify it.')
    checkdb_mode.add_argument('-P', '--progress', action='store_true',
                              help='Displays progress meter.')
    checkdb_mode.add_argument('--check-only', action='store_true',
                              help='Calculates and compares the fingerprint ' +
                              'the database against the fingerprint file but ' +
                              'does not update the fingerprint file if the ' +
                              'database has been modified since the file was ' +
                              'last updated.')
    checkdb_mode.add_argument('--dry-run', action='store_true', 
                              dest='check_only', 
                              help='Alias for --check-only.')
    
    # list subparser
    list_mode = subparsers.add_parser('list',
                                      help='Displays database contents')
    list_mode.add_argument('--use-root', default='',
                           help='Strip path information from all targets and ' +
                           'replace with the specified string when ' +
                           'interacting with the database.')
    list_mode.add_argument('--root-prefix', default='',
                           help='Append the specified string to all targets ' +
                           'when interacting with the database.')
    list_mode.add_argument('-e', '--expanded', action='store_true',
                           help="Display additional info for directories.")
    list_mode.add_argument('-m', '--minimal', action='store_true',
                           help="Display only directory contents.")
    list_mode.add_argument('target', nargs='*', default='',
                           help='Entry/entries to list, or lists roots if none'
                           + ' specified. Format: <root>/<path>')

    # rm subparser
    rm_mode = subparsers.add_parser('rm', help='Recursively removes items ' +
                                    'from the database.')
    rm_mode.add_argument('--use-root', default='',
                         help='Strip path information from all targets ' +
                         'and replace with the specified string when ' +
                         'interacting with the database.')
    rm_mode.add_argument('--root-prefix', default='',
                         help='Append the specified string to all ' +
                         'targets when interacting with the database.')
    rm_mode.add_argument('target', nargs='+', default='',
                         help='Root to remove.')
    rm_mode.add_argument('--dry-run', action='store_true',
                         help='Simulates deleting specified target(s) ' +
                         'without actually deleting anything.' )

    # Create namespace from command-line
    return parser.parse_args()

def setup_logger(verbose_mode, debug_mode, filename=''):
    """Sets up the global logging object.
    """

    # Set up root logger
    rootLogger = logging.getLogger()
    rootLogger.setLevel(logging.DEBUG)

    # Set up formatter info
    formatter_str = '[%(asctime)s %(levelname)s] %(message)s'
    formatter_date = '%Y-%m-%d %I:%M:%S'

    formatter = logging.Formatter(formatter_str, formatter_date)

    # Set up file logger, if appropriate
    if 0 < len(filename):
        fh = logging.FileHandler(filename)
        
        # Determine log level. For files, assume INFO
        if debug_mode:
            fh.setLevel(logging.DEBUG)
        else:
            fh.setLevel(logging.INFO)

        # Set formatter
        fh.setFormatter(formatter)

        # Add to root logger
        rootLogger.addHandler(fh)

        # Log file-only messages
        logging.info('Logging started')

        if debug_mode:
            logging.info('Logging debug messages is enabled')
        
    # Set up console logger
    con = logging.StreamHandler()
    # Set formatter
    formatter = logging.Formatter('\r' + formatter_str, formatter_date)
    con.setFormatter(formatter)

    # Set level
    if debug_mode:
        con.setLevel(logging.DEBUG)
    elif verbose_mode:
        con.setLevel(logging.INFO)
    else:
        con.setLevel(logging.WARNING)

    # Add to root logger
    rootLogger.addHandler(con)

def serialize_hash_of_lists( hash_of_lists ):
    """Generator to serialize a hash of lists. It goes through the keys of the
    hash in no particular order, returning the items of each list one at a time
    as a tuple.
    """
    keys_list = list(hash_of_lists.keys())
    cur_key = ""
    cur_idx = -1
    while True:
        if cur_idx < 0 or len(hash_of_lists[ cur_key ]) <= cur_idx:
            if 0 < len(keys_list):
                cur_key = keys_list.pop()
            else:
                break
            cur_idx = 0
        if cur_idx < len(hash_of_lists[ cur_key ]):
            tmp = (hash_of_lists[ cur_key ][ cur_idx ],)
            yield tmp
            cur_idx += 1
        else:
            break

def read_in_chunks(file_obj, chunk_size=1024*1024):
    """Generator to read data from the specified file in chunks.
    Default chunk size is 1M.
    """
    while True:
        data = file_obj.read(chunk_size)
        if not data:
            break
        yield data

def calc_fingerprint(filename, file_size=0):
    """Returns a string containing the SHA1 fingerprint of this file.
    """
    # Create a new SHA1 object and read from the specified file in chunks.
    result = hashlib.sha1()
    
    # Generate progress-friendly version of the filename
    if cmd_args.progress:
        bytes_read = 0
        last_line_len = 0
        prog_fn = os.path.basename(filename)
        if 21 < len(prog_fn):
            prog_fn = prog_fn[:9] + '...' + prog_fn[-9:]
        start_time = time.time()

    with open(filename, 'rb') as f:
        for read_data in read_in_chunks(f):
            result.update(read_data)

            if cmd_args.progress:
                bytes_read += len(read_data)
                avg_speed = time.time() - start_time

                if 0 < avg_speed:
                    avg_speed = bytes_read / avg_speed
                else:
                    avg_speed = "N/A"

                status_line = prog_fn + ': ' + str(bytes_read) + ' of '
                status_line += str(file_size) + ' bytes @ ' 
                if 1e6 < avg_speed:
                    status_line += '{:03.2f}'.format(avg_speed / 1e6) + 'MB/s'
                elif 1e3 < avg_speed:
                    status_line += '{:03.2f}'.format(avg_speed / 1e6) + 'KB/s'
                else:
                    status_line += '{:03.2f}'.format(avg_speed / 1e6) + 'B/s'

                # print new line
                sys.stdout.write('\r'+status_line + 
                                 (' ' * (last_line_len-len(status_line) + 1)))
                last_line_len = len(status_line)
                
    if cmd_args.progress:
        sys.stdout.write('\n')

    # Return fingerprint
    return result.hexdigest()
    
def process_file(node, filename, mode, file_db_dict, cursor):
    """Processes the specified file. This function is responsible for
    calculating the fingerprint of the file and comparing it against the
    database. If the fingerprints don't match, the modification times are
    compared.

    Returns a dict that can be used to update stats. 
    """

    # Concatenate path and filename
    fullname = os.path.join(node[0], filename)

    # Log the file
    logging.info('Processing file \'%s\'', fullname)
    
    # Generate fingerprint
    fp_cpu_time = time.clock()
    fp_real_time = time.time()
    fp = calc_fingerprint(fullname, mode.st_size)
    fp_cpu_time = time.clock() - fp_cpu_time
    fp_real_time = time.time() - fp_real_time
    logging.debug('File \'%s\' finished in %.4f seconds (%.4f CPU seconds)', 
                  fullname, fp_real_time, fp_cpu_time)

    # Update stats with throughput info
    ret_val = { "bytes" : mode.st_size, "time" : fp_real_time }

    # Attempt to access this file's entry in the DB dict. If we throw an
    # exception, we'll need to add the file to the database. If no exception
    # process the file.
    try:
        # Split out entries of file_rec to local variables to avoid confusion
        logging.debug('Entry: %s', file_db_dict[filename])
        db_id = file_db_dict[filename][0]
        db_mtime = file_db_dict[filename][1]
        db_fp = file_db_dict[filename][2]
        db_size = file_db_dict[filename][3]

        # Remove file from db dict to indicate that we've seen it.
        del(file_db_dict[filename])
        logging.debug("Marking file '" + fullname + "' as seen.")

        # Check file against database
        if db_fp != fp:
            # Fingerprints don't match...
            logging.debug('File \'' + fullname + '\' does not match ' +
                          'fingerprint in database')
                    
            # Compare modification times. If this file is newer, 
            # update database. Otherwise issue a warning.
            if float(db_mtime) < mode.st_mtime:
                if not cmd_args.check_only:
                    logging.info('File \'' + fullname + '\' is newer than '
                                 + 'database record. Updating...')
                    update_file(db_id, fp, mode, cursor)

                    # Return dict indicating update
                    ret_val['updated'] = 1
                else:
                    logging.warning('File \'' + fullname + '\' is newer than '
                                 + 'database record. Marking as "bad"...')
                    ret_val['bad'] = 1
                    
                return ret_val
            else:
                logging.warning('File \'' + fullname + '\' does not ' +
                                'match fingerprint in database ' +
                                'and is not newer. File could be damaged!')

                # Return dict indicating problem
                ret_val['bad'] = 1
                return ret_val
        else:
            logging.debug('File \'' + fullname + '\' matches ' +
                          'fingerprint in database')

            # Double-check size to make sure we aren't fooling 
            # ourselves.
            if db_size != mode.st_size:
                logging.warning('File sizes do not match for ' +
                                'file \'' + fullname + 
                                '\'! File could be damaged!')
                # Return dict indicating problem
                ret_val['bad'] = 1
                return ret_val
            else:
                logging.debug('File \'' + fullname + 
                              '\' matches file size in database')
                # Return dict indicating file ok
                ret_val['good'] = 1
                return ret_val

    except KeyError:
        # File doesn't exist in database - add
        if not cmd_args.check_only:
            logging.debug('File \'' + fullname + 
                          '\' not in database. Adding...')
            add_file(filename, fp, mode, node[1], cursor)
        else:
            logging.debug('File \'' + fullname + 
                          '\' not in database. Marking as "added", but ' +
                          'database not actually touched.')

        # Return dict indicating addition
        ret_val['added'] = 1
        return ret_val

def get_dir_items_from_db(cursor, parent_id, check_files):
    """Searches the database using the specified cursor object for all items
    in the dirs and files tables with the specified Parent_ID.
    
    A dict with the following structure is returned:
    * 'dir_entries' = A dict of dir names => (Path_ID) of all dirs in db with
      the specified parent.
    * 'file_entries' = A dict of filenames => (File_ID, LastModified, FP, Size)
      of all files in db with the specified parent.
    """

    ret_val = { 'dir_entries': dict() }

    # Search the database for all directories with the specified parent.
    cursor.execute("SELECT Path_ID, Name, LastChecked from '" + 
                   table_names['dirs'] + "' WHERE Parent_ID=?", (parent_id,))

    for entry in cursor.fetchall():
        dir_id = entry[0]
        dir_name = entry[1]
        last_checked = entry[2]
        logging.debug("Found dir '%s' (ID='%s') with parent '%s'.", 
                      dir_name, dir_id, parent_id)
        ret_val['dir_entries'][dir_name] = (dir_id, last_checked)

    # Search the database for all files with the specified parent, unless
    # we're skipping the files in this directory.
    if check_files:
        ret_val['file_entries'] = get_file_items_from_db( cursor, parent_id )
    else:
        ret_val['file_entries'] = dict()

    # Return entry list
    return ret_val

def get_file_items_from_db(cursor, parent_id):
    """Searches the database using the specified cursor object for all items
    in the files tables with the specified Parent_ID.
    
    A dict with the following structure is returned:
    * filename => (File_ID, LastModified, FP, Size) of all files in db with the
      specified parent.
    """

    ret_val = dict()

    cursor.execute("SELECT File_ID, Name, LastModified, Fingerprint, Size" +
                   " from '" + table_names['files'] + 
                   "' WHERE Parent_ID=?", (parent_id,))

    for entry in cursor.fetchall():
        file_id = entry[0]
        file_name = entry[1]
        file_mtime = entry[2]
        file_fp = entry[3]
        file_size = entry[4]
        logging.debug("Found file '%s' (ID='%s') with parent '%s'.", 
                      file_name, file_id, parent_id)
        ret_val[file_name] = (file_id, file_mtime, file_fp, file_size)

    return ret_val

def add_file(filename, fp, mode, parent_id, cursor):
    """ Adds the specified file with the specified mode, fingerprint, and
    parent_id to the 'files' table. Returns the new entry's File_ID.
    """
    cursor.execute("INSERT INTO '" + table_names['files'] + 
                   "'(Name, Parent_ID, LastModified, " +
                   "Fingerprint," +
                   " Size) VALUES(?, ?, ?, ?, ?)", 
                   (filename, parent_id, mode.st_mtime, fp, mode.st_size))
    ret_val = cursor.lastrowid
    logging.debug("File '%s' with parent %s' added to database with ID = %s",
                  filename, parent_id, str(ret_val))

    return ret_val

def update_file(file_id, fp, mode, cursor):
    """ Updates the specified file with the specified mode, fingerprint, and
    parent_id to the 'files' table.
    """
    cursor.execute("UPDATE '" + table_names['files'] + 
                   "' SET LastModified=?, Fingerprint=?, Size=? WHERE " +
                   "File_ID=?", (mode.st_mtime, fp, mode.st_size, file_id))

def add_dir(path, parent_id, cursor):
    """Adds the specified path with specified parent_id to the 'dirs' table.
    Returns the new entry's Path_ID.
    """

    cursor.execute("INSERT INTO '" + table_names['dirs'] + 
                   "'(Name, Parent_ID) VALUES(?, ?)", 
                   (path, str(parent_id)))
    ret_val = (cursor.lastrowid, None)
    logging.debug('Directory \'' + str(path) + '\' added with ID = ' + 
                  str(ret_val))
    return ret_val

def mark_dir_checked(dir_id, cursor):
    """Marks the specified directory as "recently seen".
    """

    tmp_now = datetime.datetime.now()
    cursor.execute("UPDATE '" + table_names['dirs'] + 
                   "' SET LastChecked=? WHERE Path_ID=?", (tmp_now, dir_id))
                                                           

def prune_files(path, file_data, cursor):
    """This routine is responsible for iterating over the list of files in the
    database but not on the filesystem and either removing them from the 
    database or simply alerting the user, depending on command-line arguments.
    Returns a tuple to be added to the files stats tuple.
    """
    
    if 0 < len(file_data):
        for item in file_data.keys():
            if ok_to_prune:
                cursor.execute("DELETE FROM '" + table_names['files'] + 
                               "' WHERE File_ID = ?",
                               (file_data[item][0],))
                logging.info("File '%s' pruned from directory '%s'.", item, 
                             path)
            else:
                if cmd_args.subcommand == 'scan':
                    logging.warning("File '%s' no longer exists in directory " +
                                    "'%s'!", item, path)
                elif cmd_args.subcommand == 'rm':
                    logging.info("File '%s' would be pruned from directory " +
                                 "'%s'.", item, path)
                else:
                    logging.warning("File '%s' no longer exists in directory " +
                                    "'%s'!", item, path)

    return { 'missing' : len(file_data) }

def prune_dirs(path, dir_data, cursor):
    """This routine is responsible for iterating over the list of directories in
    the database but not on the filesystem and either removing them from the 
    database or simply alerting the user, depending on command-line arguments.
    Note that it is necessary to remove subdirectories and files!
    Returns a tuple of tuples that contain the updated stats: (files, dirs)
    """

    done = False
    ret_val = [ { 'missing' : 0}, { 'missing' : 0 } ]
    item_data = { 'file_entries': dict(), 'dir_entries': dir_data }

    # Build a queue like in crawl_tree of all directories to be pruned.
    dir_queue = []

    while not done:
        # Each round, append any directories in the database dict to the queue.
        for item in item_data['dir_entries'].keys():
            dir_queue.append( ( path, item, item_data['dir_entries'][item]) )
            
        # Prune any files
        ret_val[0] = add_dicts(ret_val[0], prune_files(path, item_data[ \
                    'file_entries'], cursor))
        
        if 0 < len(dir_queue):
            # Pop the next directory off of the stack
            node = dir_queue.pop()
            logging.debug('Attempting to prune item: %s', node)

            # Update path to refer to this item
            path = os.path.join(node[0], node[1])

            # Grab all entries for this directory from the database
            item_data = get_dir_items_from_db(cursor, node[2][0], True)

            # Delete this item from the database, if appropriate
            if ok_to_prune:
                cursor.execute("DELETE FROM '" + table_names['dirs'] + 
                               "' WHERE Path_ID = ?",
                               (node[2][0],))
                logging.info("Subdirectory '%s' pruned from directory '%s'.", 
                             node[1], node[0])
            else:
                if cmd_args.subcommand == 'scan':
                    logging.warning("Subdirectory '%s' no longer exists in " +
                                    "directory '%s'!", node[1], node[0])
                elif cmd_args.subcommand == 'rm':
                    logging.info("Subdirectory '%s' would be pruned from " +
                                 "directory '%s'.", node[1], node[0])
                else:
                    logging.warning("Subdirectory '%s' no longer exists in " +
                                    "directory '%s'!", node[1], node[0])
            ret_val[1]['missing'] += 1
        else:
            done = True

    return ret_val

def add_dicts(d1, d2):
    """Properly vector-adds the specified dicts-of-numbers and returns the 
    result.
    """
    ret_val = dict()
    for key in d1.keys():
        if key in d2:
            ret_val[ key ] = d1[ key ] + d2[ key ]
        else:
            ret_val[ key ] = d1[ key ]

    for key in d1.keys():
        if not key in ret_val:
            if key in d1:
                ret_val[ key ] = d1[ key ] + d2[key]
            else:
                ret_val[ key ] = d2[ key ]

    return ret_val

def scan_target(target, db_conn):
    """Scans the specified root, checking all files and directories that
    belong to it.
    Returns 0 if successful or 1 if error.
    """

    # Get DB cursor object
    cursor = db_conn.cursor()

    # Attempt to resolve the target
    target_info = resolve_target(target, cursor, True)

    return crawl_tree(target, target_info, cursor)

def gen_file_stats_dict():
    """Returns a properly formated file_stats dictionary.
    """
    # Probably should implement as a class, but meh.
    return { 'added' : 0, 'good' : 0, 'updated' : 0, 'bad' : 0, 
             'missing' : 0, 'skipped' : 0, 'bytes' : 0, 'time' : 0.0 }
        
def gen_dir_stats_dict():
    """Returns a properly formated dir_stats dictionary.
    """
    # Probably should implement as a class, but meh.
    return { 'added' : 0, 'good' : 0, 'updated' : 0, 'bad' : 0, 
             'missing' : 0, 'skipped' : 0}


def crawl_tree(target, target_info, cursor):
    """Crawls the specified directory, processing all files that it finds.
    Returns 0 if successful or 1 if error.
    """

    # Build timedelta object used to determine if entities have expired.
    expr_check = datetime.timedelta(cmd_args.expr)

    file_stats = gen_file_stats_dict()
    dir_stats = gen_dir_stats_dict()

    # Make sure target is a directory
    if target_info['file_id'] == None:
        # Push root onto queue to start process
        dir_queue = [(target, target_info['dir_id'], 
                      target_info['last_checked'])]
    else:
        # Otherwise, process single file and move on
        dir_queue = list()
        # TO DO
        entry_stat = os.stat(target)
        node = ( os.path.dirname( target ), )
        file_name = os.path.basename( target )
        file_db_data = get_file_items_from_db( cursor, target_info['dir_id'] )
                    
        file_stats = add_dicts(file_stats, process_file(node, file_name, 
                                                        entry_stat, 
                                                        file_db_data, cursor ) )

    error_flag = 0

    try:
        # Walk the filesystem
        while 0 < len(dir_queue):
            node = dir_queue.pop()

            # Log the directory that we are processing
            logging.info("Processing directory '%s'", node[0])

            # Figure out if we're skipping the files in this directory
            check_files = True
            if cmd_args.skip_recent:
                if (node[2] != None) and (0 < len(str(node[2]))):
                    tmp_delta = datetime.datetime.now() - node[2]
                    if not (expr_check < tmp_delta):
                        check_files = False
                        logging.info("Skipping files in '%s' because it was " +
                                     "last checked %s days ago.", node[0], 
                                     tmp_delta.days)

            # Query the database
            db_cpu_time = time.clock()
            db_real_time = time.time()
            dir_db_data = get_dir_items_from_db(cursor, node[1], check_files)
            logging.debug("Dir '%s' DB fetched in %.4f seconds (%.4f CPU " +
                          "seconds)",
                          node[0], time.time() - db_real_time, 
                          time.clock() - db_cpu_time)

            ## Process directory contents
            for entry in os.listdir(node[0]):
                # Generate full path to entry
                entry_full_name = os.path.join(node[0], entry)
                
                # Grab entry filesystem stats
                try:
                    entry_stat = os.stat(entry_full_name)

                    if os.path.islink(entry_full_name):
                        # Entry is a symbolic link, which we ignore.
                        logging.info("Skipping symbolic link '%s'",
                                     entry_full_name)
                        if stat.S_ISREG(entry_stat.st_mode):
                            file_stats['skipped'] += 1
                        elif stat.S_ISDIR(entry_stat.st_mode):
                            dir_stats['skipped'] += 1
                    
                    elif stat.S_ISREG(entry_stat.st_mode):
                        # Entry is a regular file.
                        
                        # Process file and update stats, unless we're skipping
                        # files in this directory
                        if check_files:
                            file_stats = add_dicts(file_stats, 
                                                   process_file(node, entry, 
                                                                entry_stat, 
                                                                dir_db_data[ \
                                        'file_entries'],
                                                                cursor))
                            # Update number of bytes read
                            
                        else:
                            file_stats['skipped'] += 1
                        
                    elif stat.S_ISDIR(entry_stat.st_mode):
                        # Attempt to push directory onto stack using data from 
                        # db. If there is an error, assume that the directory is
                        # new. Add it to the DB then push it onto the stack.
                        try:
                            tmp_data = dir_db_data['dir_entries'][entry]
                            dir_queue.append( (entry_full_name, tmp_data[0],
                                               tmp_data[1]) )
                            
                            del(dir_db_data['dir_entries'][entry])
                            logging.debug('Marking directory \'' + 
                                          entry_full_name +
                                          '\' as seen.')
                            
                            # Update stats
                            dir_stats['good'] += 1
                        except KeyError:
                            # Item not in list. Add then append to queue.
                            if not cmd_args.check_only:
                                tmp_data = add_dir(entry, node[1], cursor)
                            else:
                                logging.info('Dir %s not in database. Marking' +
                                             ' as "added", but database has ' +
                                             'not been touched!')

                            dir_queue.append( (entry_full_name, tmp_data[0],
                                               tmp_data[1]) )

                            # Update stats.
                            dir_stats['added'] += 1

                except OSError as e:
                    logging.warning("OSError({0}): {1}".format(e.errno, 
                                                               e.strerror) +
                                    " on file '" + entry_full_name + "'")

            file_stats = add_dicts(file_stats, prune_files(node[0], 
                                                           dir_db_data[ \
                        'file_entries'], cursor))
            tmp_stats = prune_dirs(node[0], dir_db_data['dir_entries'], cursor)
            file_stats = add_dicts(file_stats, tmp_stats[0])
            dir_stats = add_dicts(dir_stats, tmp_stats[1])

            # Mark this directory has recently checked, if appropriate
            if check_files:
                mark_dir_checked(node[1], cursor)

    except KeyboardInterrupt:
        error_flag = 1
        logging.error("Interrupt detected, aborting crawl and " +
                      "committing all changes.")
    
    # Commit
    db_conn.commit()

    logging.info('Finished processing root \'' + target + '\'.')


    # Return list of error code and stats infos
    return [error_flag, file_stats, dir_stats]

def print_scan_stats(file_stats, dir_stats):
    """Prints file and dir scan statistics.
    """

    logging.info('Scan Results:')
    action_names = ('added', 'good', 'updated', 'BAD', 'MISSING', 'skipped')
    logging.info('    Files:')
    for action in action_names:
        logging.info('      %s: %s', action, file_stats[action.lower()])
    logging.info('    Directories:')
    for action in action_names:
        logging.info('      %s: %s', action, dir_stats[action.lower()])

    if 0 < file_stats['time']:
        speed_info = str(file_stats['bytes']) + " bytes processed in "
        speed_info += '{:03.2f}'.format(file_stats['time'])
        speed_info += ' seconds for an average speed of '
        
        avg_speed = file_stats['bytes'] / file_stats['time']
        if 1e6 < avg_speed:
            speed_info += '{:03.2f}'.format(avg_speed / 1e6) + 'MB/s'
        elif 1e3 < avg_speed:
            speed_info += '{:03.2f}'.format(avg_speed / 1e6) + 'KB/s'
        else:
            speed_info += '{:03.2f}'.format(avg_speed / 1e6) + 'B/s'

        logging.info(speed_info)

def open_db(db_url):
    """Function to open the specified SQLite database and return a Connection
    object to it. If the requisite table structure does not exist, it will be
    created.
    """

    # Connect to database
    conn = sqlite3.connect(database=db_url, 
                           detect_types=sqlite3.PARSE_DECLTYPES)

    # Look for fingerprints table
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    found = {'files': False, 'dirs': False }
    for table in cursor.fetchall():
        logging.debug("Found table '" + table[0] + "'")
        for fp_table in table_names:
            if (table[0] == table_names[fp_table]):
                logging.debug("Found " + fp_table + " fingerprint table '" + 
                              table[0] + "'.")
                found[fp_table] = True

    # If not found, create
    if not(found['files']):
        cursor.execute("CREATE TABLE '" + table_names['files'] + 
                       "'(File_ID INTEGER PRIMARY KEY AUTOINCREMENT, " +
                       "Name TEXT, Parent_ID INTEGER, " +
                       "LastModified TEXT, Fingerprint TEXT, Size INTEGER)")
        cursor.execute("CREATE INDEX file_parent_idx ON " + table_names['files']
                      + "(Parent_ID)")
        
    if not(found['dirs']):
        cursor.execute("CREATE TABLE '" + table_names['dirs'] + 
                       "'(Path_ID INTEGER PRIMARY KEY AUTOINCREMENT, " +
                       "Name TEXT, Parent_ID INT, LastChecked TIMESTAMP)")
        cursor.execute("CREATE INDEX dir_parent_idx ON " + table_names['dirs']
                      + "(Parent_ID)")
        
    return conn

def reconstruct_tree(cursor):
    """Iteratively fetches ancestores for all items in the tmp_dirs table until
    all ancestors have been retrieved. Then reconstructs the paths for each 
    entry, returning a dict of dicts: 
    * roots : <path_id> : <root_name>
    * dirs : <path_id> : (<full path sans root name>, root id)
    """
    
    ret_val = { "roots" : {}, "dirs" : {} }
    done = False
    first_pass = True
    while not done:
        logging.debug("Reconstructing tree: First Pass? %s, Row Count: %s", 
                      str(first_pass), str(cursor.rowcount))
        if first_pass or ( 0 < cursor.rowcount ):
            cursor.execute("INSERT INTO '" + table_names['tmp_dirs'] + 
                           "' (Path_ID,Parent_ID" +
                           ") SELECT Path_ID,Parent_ID FROM '" + 
                           table_names['dirs'] + "' WHERE Path_ID IN " +
                           "(SELECT Parent_ID FROM '" + 
                           table_names['tmp_dirs'] + "' WHERE Parent_ID NOT " +
                           "IN (SELECT Path_ID FROM '" + 
                           table_names['tmp_dirs'] + "'))")
            first_pass = False
        else:
            done = True

    # Now retrieve directory names and place into a hash of path_id -> name
    # so that we can rebuild path names.
    # Note: we're expecting parents to have a numerically smaller ID than
    # their children. This will hold true as long as parents are always 
    # added to the database before processing children.
    parent_map = dict()
    cursor.execute("SELECT Path_ID,Parent_ID,Name FROM '" + 
                   table_names['dirs'] +
                   "' WHERE Path_ID IN (SELECT Path_ID FROM '" +
                   table_names['tmp_dirs'] + "') ORDER BY Path_ID")
    
    dir_node_count = 0
    for entry in cursor.fetchall():
        if entry[1] < 0:
            # Root, so wrap it in brackets
            ret_val['roots'][ entry[0] ] = entry[2]
        else:
            # Check to see if this directory's parent is in the list.
            # If not, parent is a root, so check there.
            if(entry[1] in ret_val['dirs']):
                parent_node = ret_val['dirs'][ entry[1] ]
                ret_val['dirs'][ entry[0] ] = (os.path.join( \
                        parent_node[0], entry[2] ), parent_node[1])
                dir_node_count += 1
            elif(entry[1] in ret_val['roots']): 
                ret_val['dirs'][ entry[0] ] = (os.path.join( "", entry[2] ), 
                                               entry[1] )
                dir_node_count += 1
            else:
                logging.error("Parent %s for directory %s (%s) not in map!",
                              entry[1], entry[2], entry[0])
                                                  
            
    logging.info("Directory nodes processed: " + str(dir_node_count))
    
    return ret_val

def gen_db_url(tree_info, parent_id, item):
    """Generates a suitable string representation of the specified entry in the
    database using the reconstructed tree information from reconstruct_tree().
    """

    if parent_id < 0:
        ret_val = '[' + item + ']'
    elif parent_id in tree_info['dirs']:
        root_id = tree_info['dirs'][ parent_id ][1]
        ret_val = '[' + tree_info['roots'][ root_id ] + ']'
        ret_val = os.path.join(ret_val, tree_info['dirs'][ parent_id ][0], 
                               item)
    else:
        ret_val = '[' + tree_info['roots'][ parent_id ] + ']'
        ret_val = os.path.join(ret_val, item)
        
    return ret_val

def create_tmp_dir_table(cursor):
    """Creates the temporary directory table
    """
    cursor.execute("CREATE TEMP TABLE '" + table_names['tmp_dirs'] + 
                   "' (Path_ID INTEGER " +
                   "PRIMARY KEY, Parent_ID INTEGER)")

def drop_tmp_dir_table(cursor):
    """Deletes the temporary dir table
    """
    cursor.execute("DROP TABLE '" + table_names['tmp_dirs'] + "'")

def check_dupe_files(db_conn):
    """Scans the database looking for duplicate files by fingerprint and prints
    results to STDOUT or optional output file.
    """

    # Dict of lists by fingerprint
    file_hash = dict()
    
    # Dict of directories by ID
    dir_hash = dict()

    # Count of duplicates
    count = 0
    
    # Get DB cursor object
    cursor = db_conn.cursor()

    # Create a temporary table containing all duplicate files
    cursor.execute("CREATE TEMP TABLE TMP_DUPES AS SELECT " +
                   "File_ID,Name,Fingerprint,Parent_ID FROM '" + 
                   table_names['files'] + "' WHERE " +
                   "Fingerprint IN (SELECT Fingerprint from '" + 
                   table_names['files'] + "' GROUP BY Fingerprint HAVING " +
                   "1<COUNT(*)) ORDER BY Fingerprint")

    # Select all duplicate files
    cursor.execute("SELECT * FROM TMP_DUPES")
    for entry in cursor.fetchall():
        count += 1
        try:
            # Attempt to append the current entry to the end of list for 
            # its fingerprint
            file_hash[entry[2]].append( (entry[0], entry[1], entry[3]) )
        except KeyError:
            # New entry
            file_hash[entry[2]] = [ (entry[0], entry[1], entry[3]) ]

    # Populate directory lookup table so that we can reconstruct each path.
    # We'll use a SQL to do the heavy lifting to avoid shipping data back and
    # forth.
    if 0 < count:
        # Create temp table
        create_tmp_dir_table(cursor)

        # First pass, direct parents from files temp table
        cursor.execute("INSERT INTO '" + table_names['tmp_dirs'] + 
                       "' (Path_ID,Parent_ID) " +
                       "SELECT Path_ID,Parent_ID FROM '" + table_names['dirs'] +
                       "' WHERE Path_ID IN (SELECT Parent_ID FROM TMP_DUPES " +
                       "GROUP BY Parent_ID)")

        # Now reconstruct tree
        tree_info = reconstruct_tree(cursor)

        ## Display results

        # Open dupes file, if specified
        fh = None
        if 0 < len(cmd_args.output):
            fh = io.open(cmd_args.output, 'wt')
        elif not cmd_args.verbose or (0 < len(cmd_args.log)):
            fh = io.open(sys.stdout.fileno(), 'wt')
        
        for fp in file_hash.keys():
            # Send to log
            logging.info(str(len(file_hash[fp])) + " files with Fingerprint 0x" 
                         + fp + ":")

            for entry in file_hash[fp]:
                path_name = gen_db_url( tree_info, entry[2], entry[1] )
                logging.info("    " + path_name + " (ID = " + str(entry[0]) + 
                             ")")

            # Send to file/STDOUT if appropriate
            if not (fh == None):
                fh.write(str(len(file_hash[fp])) + " files with Fingerprint 0x" 
                         + fp + ":" + os.linesep)

                for entry in file_hash[fp]:
                    path_name = gen_db_url( tree_info, entry[2], entry[1] )
                    logging.info("    " + path_name + " (ID = " + str(entry[0]) 
                             + ")")
                    fh.write("    " + path_name + os.linesep)

        if not (fh == None):
            fh.close()

    logging.info("Duplicates found: " + str(count))

def check_dupe_trees(db_conn):
    """Scans the database looking for duplicate subtrees and prints results
    to STDOUT or optional output file.
    """

    # Get DB cursor object
    cursor = db_conn.cursor()

    # Dict of directories by ID: { ID -> [ name, parent id, dir fp ] }
    dirs_by_id = dict()

    # Dict of directories by parent: { parent id -> [ dir ids ] }
    dirs_by_parent = dict()

    # Dict of directories by fp: { dir fp -> [ dir ids ] }
    dirs_by_fp = dict()

    # Count of duplicates
    count = 0

    # dict of directories waiting to be fingerprinted: 
    # { dir fp -> [ child ids waiting for fp ] }
    waiting_for_fp = dict()

    # Algorithm:
    # 0. Build dirs_by_id and dirs_by_parent from database.
    # 1. Start with all directories that do not have any child directories:
    #    a. Populate the "queue" of dirs waiting for fp with these directories.
    # 2. Iterate over the dirs waiting to be fingerprinted. For each dir:
    #    a. See if any of its children are in in the "queue" to be 
    #       fingerprinted. If so, skip this dir.
    #    b. Grab all files in this dir from the database.
    #    c. Generate the fingerprint for this directory:
    #       i. Add this directory's name to the cypher, if appropriate.
    #       ii. Sort child files list alphabetically.
    #       iii. Add file names (if applicable) and file fingerprints 
    #           (if applicable) to the cypher.
    #       iv. Sort list of child directories alphabetically
    #       v. Add dir names (if applicable) and dir fingerprints (if 
    #          applicable) to encoder.
    #       vi. Update entry in list of dirs by id.
    #       vii. Add fingerprint to list of fingerprints from this pass.
    #       vii. Remove from list of dirs waiting for fp.
    # 3. Iterate over list of fingerprints generated this pass. For each fp,
    #    check the number of entries with the same fp. If > 1:
    #    b. Grab parent dir information from database.
    #    c. Add parent dirs to list of dirs waiting for fp.
    #    d. Add parent dirs to list of dirs by id and dirs by parent.
    # 4. Repeat 2 and 3 until list of dirs waiting to be fingerprinted is empty.
    # 5. Iterate over list of fingerprints. For each fp, see the parents of
    #    each of the directories associated with the current fingerprint also 
    #    have the same fingerprint. If all do, delete this fingerprint.
    # 6. Reconstruct trees for all remaining entries and report results

    # Build dirs_by_id and dirs_by_parent from database
    cursor.execute("SELECT Path_ID,Name,Parent_ID FROM '" +
                   table_names['dirs'] + "'")
    for entry in cursor.fetchall():
        # For the sake of legibility, lets create some local copies of
        # what we retrieved from the database.
        dir_id = entry[0]
        dir_name = entry[1]
        dir_parent = entry[2]
        
        # Add to dict of dirs by id.
        dirs_by_id[ dir_id ] = [ dir_name, dir_parent, None ]

        # Add to dict of dirs by parent.
        if not dir_parent in dirs_by_parent:
            dirs_by_parent[ dir_parent ] = [ dir_id ]
        else:
            dirs_by_parent[ dir_parent ].append( dir_id )

        # Add debug message
        logging.debug( "Found directory '%s', ID=%s, Parent=%s",
                       dir_name, dir_id, dir_parent )
        
    # Select all directories that don't have any child directories into
    # temporary table
    cursor.execute("SELECT Path_ID FROM '" +
                   table_names['dirs'] + "' WHERE Path_ID NOT" +
                   " IN (SELECT Parent_ID FROM '" + table_names['dirs'] + "')")

    for entry in cursor.fetchall():
        # Add to "queue" of directories to be fingerprinted.
        waiting_for_fp[ entry[0] ] = []

        # Add debug message
        logging.debug( "Adding leaf node '%s', ID=%s, Parent=%s", 
                       dirs_by_id[ entry[0] ][ 0 ], entry[0], 
                       dirs_by_id[ entry[0] ][ 1 ])
        
    while 0 < len(waiting_for_fp):
        logging.debug("Waiting for fingerprint: %s", waiting_for_fp.keys())
        logging.debug("dirs_by_parent: %s", dirs_by_parent )

        for dir_id in list(waiting_for_fp.keys()):
            logging.debug("Checking '%s' (%s)", dirs_by_id[ dir_id ][ 0 ],
                          dir_id)
            # Make sure we haven't already removed this id from the list
            if not dir_id in waiting_for_fp:
                continue

            # Check children for any that still need to be fingerprinted.
            no_outstanding_children = True
            for child_id in waiting_for_fp[ dir_id ]:
                # See if the child is in the list to be fingerprinted.
                if child_id in waiting_for_fp[ dir_id ]:
                    no_outstanding_children = False
                    break
                else:
                    # If not, make sure its fingerprint actually exists.
                    # If so, remove it from this list and move on.
                    if dirs_by_id[ child_id ][ 2 ] != None:
                        waiting_for_fp[ dir_id ].remove( child_id )

            # Pop out if this directory is still waiting on children
            if not no_outstanding_children:
                logging.debug("Dir ID %s waiting for children: %s", dir_id, 
                              waiting_for_fp[ dir_id ] )
                continue
            
            ## Start generating a fingerprint
            logging.debug("Fingerprinting %s (%s)", dirs_by_id[ dir_id ][ 0 ],
                          dir_id )
            dir_fp = hashlib.sha1()
            
            # Add directory name to hash, if appropriate
            if not cmd_args.nodirname and 0 <= dirs_by_id[ dir_id ][ 1 ]:
                tmp_data = dirs_by_id[ dir_id ][ 0 ].encode('ascii')
                logging.debug( "Adding '%s' to hash", tmp_data )
                dir_fp.update( tmp_data )

            # Add files, if appropriate
            if not (cmd_args.nofilefp and cmd_args.nofilename):
                # Grab all files with the current dir as parent
                cursor.execute("SELECT Name,Fingerprint FROM '" + 
                               table_names['files'] + "' WHERE Parent_ID=?",
                               (dir_id,))
                # Stuff results into a dict of { file name -> fp }
                tmp_file_list = dict()
                for entry in cursor.fetchall():
                    logging.debug("Found file entry: %s", entry)
                    tmp_file_list[ entry[0] ] = entry[1]

                # Now sort results and add to cypher.
                for name in sorted( tmp_file_list.keys() ):
                    if not cmd_args.nofilename:
                        tmp_data = name.encode('ascii')
                        logging.debug( "Adding '%s' to hash", tmp_data )
                        dir_fp.update( tmp_data )
                    if not cmd_args.nofilefp:
                        tmp_data =  tmp_file_list[ name ].encode('ascii') 
                        logging.debug( "Adding '%s' to hash", tmp_data )
                        dir_fp.update( tmp_data )
                    
            # Add subdirectories, if appropriate
            if not (cmd_args.nosubdirname and cmd_args.nosubdirfp) and \
                    dir_id in dirs_by_parent:
                # Build a list of { directory name -> fp }
                logging.debug( "Processing children: %s", 
                               dirs_by_parent[ dir_id ] )
                tmp_dir_list = dict()
                for entry in dirs_by_parent[ dir_id ]:
                    dir_info = dirs_by_id[ entry ]
                    tmp_dir_list[ dir_info[ 0 ] ] = dir_info[ 2 ]

                # Now sort results and add to cypher.
                for name in sorted( tmp_dir_list.keys() ):
                    if not cmd_args.nosubdirname:
                        tmp_data =  name.encode('ascii') 
                        logging.debug( "Adding '%s' to hash", tmp_data )
                        dir_fp.update( tmp_data )
                    if not cmd_args.nosubdirfp:
                        tmp_data = tmp_dir_list[ name ].encode('ascii') 
                        logging.debug( "Adding '%s' to hash", tmp_data )
                        dir_fp.update( tmp_data )
            
            # Finalize fingerprint and update data structures
            dir_fp = dir_fp.hexdigest()
            dirs_by_id[ dir_id ][ 2 ] = dir_fp
            if not dir_fp in dirs_by_fp:
                dirs_by_fp[ dir_fp ] = [ dir_id ]
            else:
                dirs_by_fp[ dir_fp ].append( dir_id )
            logging.debug("Directory %s (%s) has fingerprint '%s'", 
                          dirs_by_id[ dir_id ][ 0 ], dir_id, dir_fp )

            # Remove from list of to-be-fp'd "queue". Add parent if this dir
            # is valid.
            del(waiting_for_fp[ dir_id ])
            dir_parent = dirs_by_id[ dir_id ][ 1 ]
            if 0 <= dir_parent:
                # If parent isn't already in the list, add an entry and copy
                # over contents from dirs_by_parent
                if not dir_parent in waiting_for_fp:
                    waiting_for_fp[ dir_parent ] = list(dirs_by_parent[ dir_parent ])
                # Remove this dir from the list of children waiting for
                # fingerprints
                waiting_for_fp[ dir_parent ].remove( dir_id )
        logging.debug( "Number of items waiting for fingerprint: %s", 
                      len(waiting_for_fp) )

    # Iterate over list of fingerprints and remove any that only have 1 entry.
    for fp in list(dirs_by_fp.keys()):
        if len(dirs_by_fp[ fp ]) < 2:
            logging.debug( "Removing fp '%s' with sole directory '%s'",
                          fp, dirs_by_id[ dirs_by_fp[ fp ][ 0 ] ][ 0 ] )
            del(dirs_by_fp[ fp ])

    for fp in list(dirs_by_fp.keys()):
        parent_fp = None
        all_ancestors_same_fp = True
        for dir_id in dirs_by_fp[ fp ]:
            dir_parent = dirs_by_id[ dir_id ][ 1 ]
            if 0 <= dir_parent:
                # Set the parent's fingerprint if this is the first pass
                if parent_fp == None:
                    parent_fp = dirs_by_id[ dir_parent ][ 2 ]
                else:
                    if dirs_by_id[ dir_parent ][ 2 ] != parent_fp:
                        all_ancestors_same_fp = False
                        break
            else:
                all_ancestors_same_fp = False
                break

        # Delete this fingerprint if the parents of all items have same fp.
        if all_ancestors_same_fp:
            del(dirs_by_fp[ fp ])
            logging.debug("Removing fingerprint '%s' - all subtrees match.",
                          fp)
        else:
            logging.debug("Not removing fingerprint '%s' - one or more " +
                          "parents differ.", fp)

    logging.debug("Number of matches: %s", len(dirs_by_fp))
                
    # Create temp table
    create_tmp_dir_table(cursor)

    # Iterate through dirs_by_fp, adding all directories to the temporary table
    cursor.executemany("INSERT INTO '" + table_names['tmp_dirs'] + 
                       "' (Path_ID,Parent_ID) SELECT Path_ID,Parent_ID FROM '" +
                       table_names['dirs'] + "' WHERE Path_ID=?", 
                       serialize_hash_of_lists( dirs_by_fp ) )

    # Now reconstruct tree
    tree_info = reconstruct_tree(cursor)

    ## Display results
    cursor.execute("SELECT COUNT(*) FROM '" + table_names['tmp_dirs'] + "'")
    if 0 < cursor.fetchone()[0]:
        # Open dupes file, if specified
        fh = None
        if 0 < len(cmd_args.output):
            logging.info("Writing to file '" + cmd_args.output + "'")
            fh = io.open(cmd_args.output, 'wt')
        elif not cmd_args.verbose or (0 < len(cmd_args.log)):
            fh = io.open(sys.stdout.fileno(), 'wt')
        
        count = 0
        for fp in dirs_by_fp.keys():
            # Send to log
            logging.info(str(len(dirs_by_fp[fp])) + " dirs with Fingerprint 0x" 
                         + fp + ":")

            for entry in dirs_by_fp[fp]:
                tmp_info = dirs_by_id[ entry ]
                path_name = gen_db_url( tree_info, tmp_info[1], tmp_info[0])
                logging.info("    " + path_name + " (ID = " + str(entry) + 
                             ")")

            # Send to file/STDOUT if appropriate
            if not (fh == None):
                fh.write(str(len(dirs_by_fp[fp])) + " dirs with Fingerprint 0x" 
                         + fp + ":" + os.linesep)

                for entry in dirs_by_fp[fp]:
                    tmp_info = dirs_by_id[ entry ]
                    path_name = gen_db_url( tree_info, tmp_info[1], tmp_info[0])
                    fh.write("    " + path_name + os.linesep)

            count += 1
        if not (fh == None):
            fh.close()

    logging.info("Duplicates found: " + str(count))

def diff_trees_notify(msg, fh):
    """Helper function for diff_trees to alert user.
    """
    # Log
    logging.info("%s", msg)

    # Send to file/STDOUT if appropriate
    if not (fh == None):
        fh.write(msg + os.linesep)

def diff_trees(db_conn, lhs_target, rhs_target):
    """Recursively compares the two subtrees, producing output similar to diff.
    """

    # Get DB cursor object
    cursor = db_conn.cursor()

    # Attempt to resolve the targets
    lhs_info = resolve_target(lhs_target, cursor)
    rhs_info = resolve_target(rhs_target, cursor)
    if lhs_info == None:
        logging.error("'%s' not in database!", lhs_target)
    if rhs_info == None:
        logging.error("'%s' not in database!", rhs_target)
    if lhs_info == None or rhs_info == None:
        return
    logging.debug("Targets resolved to: lhs=%s, rhs=%s", lhs_info, rhs_info)
    
    # Open output file, if specified
    fh = None
    if 0 < len(cmd_args.output):
        fh = io.open(cmd_args.output, 'wt')
    elif not cmd_args.verbose or (0 < len(cmd_args.log)):
        fh = io.open(sys.stdout.fileno(), 'wt')

    # If targets are directories, build a queue and crawl it, 
    # similar to crawl_trees, only include the lhs and rhs info in each 
    # node. If they are both files, compare and move on. If they are not
    # the same type, alert user.
    if lhs_info['file_id'] != None and rhs_info['file_id'] != None:
        # Check fingerprints
        lhs_db_data = get_file_items_from_db( cursor, lhs_info['dir_id'] )
        rhs_db_data = get_file_items_from_db( cursor, rhs_info['dir_id'] )
        logging.debug(" lhs: %s, rhs: %s", lhs_db_data, rhs_db_data)
        if( lhs_db_data[ lhs_info[ 'file_name' ] ][ 3 ] != \
                rhs_db_data[ rhs_info[ 'file_name' ] ][ 3 ] ):
            diff_trees_notify( lhs_target + " and " + 
                               rhs_target + " differ.", fh)
        return
    elif lhs_info['file_id'] == None and rhs_info['file_id'] == None:
        # Both are trees.
        dir_queue = [( (lhs_target, lhs_info['dir_id']), 
                       (rhs_target, rhs_info['dir_id']) )]
    else:
        # They're different alert user.
        if lhs_info['file_id'] == None:
            diff_trees_notify(lhs_target + " is a directory.", fh)
        else:
            diff_trees_notify(lhs_target + " is a file.", fh)

        if rhs_info['file_id'] == None:
            diff_trees_notify(rhs_target + " is a directory.", fh)
        else:
            diff_trees_notify(rhs_target + " is a file.", fh)
        return

    try:
        # Walk the subtrees
        while 0 < len(dir_queue):
            node = dir_queue.pop()
            lhs = node[0]
            rhs = node[1]

            logging.debug("%s", node)
        
            # Query the database
            lhs_db_data = get_dir_items_from_db(cursor, lhs[1], True)
            rhs_db_data = get_dir_items_from_db(cursor, rhs[1], True)

            # Check files first
            for entry in list(lhs_db_data['file_entries'].keys()):
                if entry in lhs_db_data['file_entries'] and \
                        entry in rhs_db_data['file_entries']:
                    # Check fingerprints
                    if( lhs_db_data['file_entries'][ entry ][ 3 ] != \
                            rhs_db_data['file_entries'][ entry ][ 3 ] ):
                        diff_trees_notify( os.path.join(lhs[0], entry) + 
                                           " and " + 
                                           os.path.join(rhs[0], entry) + 
                                           " differ.", fh)

                    del( rhs_db_data['file_entries'][ entry ] )
                else:
                    diff_trees_notify("Only in " + 
                                      lhs[0] + ": " + entry, fh)
                del( lhs_db_data['file_entries'][ entry ] )

            for entry in list(rhs_db_data['file_entries'].keys()):
                diff_trees_notify("Only in " + 
                                  rhs[0] + ": " + entry, fh)
                del( rhs_db_data['file_entries'][ entry ] )
                

            # Check directories
            for entry in list(lhs_db_data['dir_entries'].keys()):
                if entry in rhs_db_data['dir_entries']:
                    # Push onto stack to check later
                    dir_queue = [( (os.path.join(lhs[0], entry), 
                                    lhs_db_data['dir_entries'][ entry ][ 0 ]), 
                                   (os.path.join(rhs[0], entry), 
                                    rhs_db_data['dir_entries'][ entry ][ 0 ]) )]
                    del( rhs_db_data['dir_entries'][ entry ] )
                else:
                    diff_trees_notify("Only in " + 
                                      lhs[0] + ": " + entry, fh)
                del( lhs_db_data['dir_entries'][ entry ] )

            for entry in list(rhs_db_data['dir_entries'].keys()):
                diff_trees_notify("Only in " + 
                                  rhs[0] + ": " + entry, fh)
                del( rhs_db_data['dir_entries'][ entry ] )

    except KeyboardInterrupt:
        logging.error("Interrupt detected.")
                
def del_targets(db_conn):
    """Attempts to remove the specified target(s)
    """

    file_stats = { 'missing' : 0 }
    dir_stats = { 'missing' : 0 }

    # Get DB cursor object
    cursor = db_conn.cursor()
    for target in cmd_args.target:
        # Remove any trailing OS separators and split into tokens
        target = target.rstrip(os.sep)
        logging.debug('Attempting to delete target "%s"', target)

        # Check for root in database
        target_info = resolve_target(target, cursor, False)
        if target_info != None:
            logging.debug('Target resolved to: %s', target_info)

            if target_info['file_id'] != None:
                # Target is a file
                tmp_data = { target_info['file_name'] : 
                             (target_info['file_id'], ) }
                tmp_stats = prune_files(os.path.dirname(target), tmp_data, 
                                        cursor)
                file_stats = add_dicts(file_stats, tmp_stats)
            else:
                if target_info['dir_name'] == None:
                    # Target is a root
                    tmp_data = { target_info['root_name'] : 
                                 (target_info['root_id'], 
                                  target_info['last_checked']) }
                    path = ''
                else:
                    # Target is a directory
                    tmp_data = { target_info['dir_name'] : 
                                 (target_info['dir_id'], 
                                  target_info['last_checked']) }
                    path = os.path.dirname(target)

                # Delete tree
                tmp_stats = prune_dirs(path, tmp_data, cursor)
                file_stats = add_dicts(file_stats, tmp_stats[0])
                dir_stats = add_dicts(dir_stats, tmp_stats[1])
        else:
            logging.warning("Target '" + target + "' not in database.")

    # Commit
    db_conn.commit()

    # Print stats!
    logging.info("Finished deleting target(s) '%s': ", cmd_args.target)
    logging.info("    Files: %s", file_stats['missing'])
    logging.info('    Directories: %s', dir_stats['missing'])

def resolve_target(target, cursor, add_root=False):
    """Attempts to resolve the specified target into root name + subdir. Returns
    a dict containing the root/dir/file names and IDs. Nonappicable fields will 
    be None. If the root cannot be resolved, None is returned.
    """

    ### NOTES: Need to modify get_root_from_db or call directly...

    ret_val = None

    # Remove any trailing OS separators
    target = target.rstrip(os.sep)

    # If a root prefix was specified, prepend it
    if 0 < len(cmd_args.root_prefix):
        target = os.path.join(cmd_args.root_prefix, target)

    # If a surrogate root was specified, remove last token and attach it to
    # the surrogate.
    if 0 < len(cmd_args.use_root):
        target = os.path.join(cmd_args.use_root, os.path.basename(target))

    # Split target into tokens
    path_nodes = target.split(os.sep)
    logging.debug('Target tokens: %s', path_nodes)

    # Step through all nodes but the last one, probing database. Bail if we
    # can't find one.
    parent_id = -1
    idx = 1
    tmp_root = path_nodes[0]
    while parent_id < 0:
        cursor.execute("SELECT Path_ID,LastChecked FROM '" + 
                       table_names['dirs'] + 
                       "' WHERE Parent_ID=? AND Name=?", (parent_id, 
                                                          tmp_root) )
        tmp_row = cursor.fetchone()
        if tmp_row == None:
            if idx < len(path_nodes):
                tmp_root = os.path.join(tmp_root, path_nodes[idx])
                idx += 1
            elif add_root:
                # Add to database, if appropriate.
                logging.debug("Unable to locate target '%s'. " + 
                              "Adding as new root.", target)
                cursor.execute("INSERT INTO '" + table_names['dirs'] + 
                               "'(Name,Parent_ID) VALUES(?,?)", 
                               (target,-1))
                parent_id = -1
                tmp_row = (cursor.lastrowid, '')
            else:
                logging.debug("Can't find root for %s!", target)
                return ret_val
        else:
            parent_id = tmp_row[0]

    logging.debug("Found root '%s' in target '%s'", tmp_root, target)
    ret_val = {'root_name' : tmp_root, 'root_id' : parent_id, 'dir_name' : None,
               'dir_id' : None, 'last_checked' : '', 'file_name' : None, 
               'file_id' : None }
    logging.debug("idx: %s of %s", idx, len(path_nodes))
    if len(path_nodes) <= idx:
        ret_val['dir_id'] = tmp_row[0]
        ret_val['last_checked'] = tmp_row[1]
        return ret_val

    for idx in range(idx, len(path_nodes)-1):
        logging.debug("Checking for directory '%s' in parent ID %s", 
                      path_nodes[idx], parent_id)
        cursor.execute("SELECT Path_ID,LastChecked FROM '" + 
                       table_names['dirs'] + 
                       "' WHERE Parent_ID=? AND Name=?", (parent_id, 
                                                          path_nodes[idx]) )
        tmp_row = cursor.fetchone()
        if tmp_row == None:
            logging.info("Can't find directory '%s' in root '%s'!", 
                          path_nodes[idx], tmp_root)
            return None
        else:
            logging.debug("Directory '%s' (%s) is a child of dir ID %s",
                          path_nodes[idx], tmp_row[0], parent_id)
            parent_id = tmp_row[0]

    # Check dir table for last node
    if idx < len(path_nodes):
        logging.debug("Checking for directory '%s' in parent ID %s", 
                      path_nodes[-1], parent_id)
        cursor.execute("SELECT Path_ID,LastChecked FROM '" + 
                       table_names['dirs'] + 
                       "' WHERE Parent_ID=? AND Name=?", (parent_id, 
                                                          path_nodes[-1]) )
        tmp_row = cursor.fetchone()

    # Return results
    if (tmp_row != None):
        # Found entry in directories table.
        logging.debug("Directory '%s' (%s) is a child of dir ID %s",
                      path_nodes[-1], tmp_row[0], parent_id)
        ret_val['dir_name'] = path_nodes[-1]
        ret_val['dir_id'] = tmp_row[0]
        ret_val['last_checked'] = tmp_row[1]
        logging.debug("Found target directory '%s' (%s) in root '%s' (%s)", 
                      ret_val['dir_name'], ret_val['dir_id'], 
                      ret_val['root_name'], ret_val['root_id'])

    else:
        # Try the file table
        logging.debug("Checking for file '%s' in parent ID %s", 
                      path_nodes[-1], parent_id)
        cursor.execute("SELECT File_ID FROM '" + table_names['files'] + 
                       "' WHERE Parent_ID=? AND Name GLOB ?", 
                       (parent_id, path_nodes[-1]) )
        tmp_row = cursor.fetchone()
        if (tmp_row != None):
            ret_val['dir_id'] = parent_id
            ret_val['file_name'] = path_nodes[-1]
            ret_val['file_id'] = tmp_row[0]
            logging.debug("Found target file '%s' (%s) in root '%s' (%s)", 
                          ret_val['file_name'], ret_val['file_id'], 
                          ret_val['root_name'], ret_val['root_id'])
            
        else:
            logging.info("Unable to locate target '%s'.", target)
            return None

    return ret_val

def list_db(db_conn, target):
    """Lists all items that have target has a parent or display info on file if
    target is a file.
    """

    # Get DB cursor object
    cursor = db_conn.cursor()

    count = 0

    if cmd_args.minimal:
        indent = ''
    else:
        indent = ' ' * 4

    # If target is None, then print list of roots.
    if target == None:
        if not cmd_args.minimal:
            print( '[Roots]')
        cursor.execute("SELECT Name,Path_ID FROM '" + 
                       table_names['dirs'] + "' WHERE Parent_ID=?", 
                       (-1,) )
        for row in cursor.fetchall():
            if cmd_args.expanded:
                print(indent + os.path.join(row[0],'') + ' (' + str(row[1])
                      + ')')
            else:
                print(indent + os.path.join(row[0],''))
            count += 1
    else:
        # Otherwise, try to locate the target
        target_info = resolve_target(target, cursor)
        
        if (target_info == None) or (target_info['dir_id'] == None):
            print(target + " is not in database." + os.linesep)
            return 0
            
        # Try the file table
        if (target_info['file_id'] != None):
            if not cmd_args.minimal:
                print("Files matching '" + target + "':" + os.linesep)

            headers = ("Name", "ID", "Last Modified", "Fingerprint", "Size")
            cursor.execute("SELECT Name,File_ID,LastModified,Fingerprint,Size "
                           + "FROM '" + table_names['files'] + 
                           "' WHERE Parent_ID=? AND " +
                           "Name GLOB ?", 
                           (target_info['dir_id'], target_info['file_name']) )

            for row in cursor.fetchall():
                if cmd_args.minimal:
                    # Print name only
                    print(row[0])
                else:
                    # Print name
                    print(row[0] + ':')

                    # Print ID
                    print(indent + 'ID: ' + str(row[1]))

                    # Print date last modified
                    if row[2] != None:
                        print(indent + 'Last Modified: ' + \
                            str(datetime.datetime.fromtimestamp(float(row[2]))))
                    else:
                        print(indent + 'Last Modified: ')

                    # Print Fingerprint
                    print(indent + 'Fingerprint: 0x' + row[3])

                    # Print Size
                    print(indent + 'Size: ' + str(row[4]) + ' bytes')
                count += 1
        elif (target_info['dir_id'] != None):
            # Retrieve all children
            parent_id = target_info['dir_id']
            if (not cmd_args.minimal) or (1 < len(cmd_args.target)):
                print(target + ":")
            if cmd_args.expanded:
                indent = ' ' * 8
                print( (' ' * 4) + '[INFO]')
                cursor.execute("SELECT Parent_ID,LastChecked FROM '" + 
                               table_names['dirs'] + "' WHERE Path_ID=?", 
                               (parent_id,))
                row = cursor.fetchone()

                # Dir ID
                print(indent + 'ID: ' + str(parent_id))

                # Parent ID
                print(indent + 'Parent ID: ' + str(row[0]))

                # Date Last Checked:
                print(indent + 'Last Checked: ' + str(row[1]))
            elif cmd_args.minimal:
                indent = ''
            else:
                indent = ' ' * 4

            # subdirs
            if cmd_args.expanded:
                print( (' ' * 4) + '[Subdirectories]')
            cursor.execute("SELECT Name,Path_ID FROM '" + 
                           table_names['dirs'] + "' WHERE Parent_ID=?", 
                           (parent_id,) )
            for row in cursor.fetchall():
                if cmd_args.expanded:
                    print(indent + os.path.join(row[0],'') + ' (' + str(row[1])
                          + ')')
                else:
                    print(indent + os.path.join(row[0],''))
                count += 1
            # files
            if cmd_args.expanded:
                print( (' ' * 4) + '[Files]')
            cursor.execute("SELECT Name,File_ID FROM '" + 
                           table_names['files'] + "' WHERE Parent_ID=?", 
                           (parent_id,) )
            for row in cursor.fetchall():
                if cmd_args.expanded:
                    print(indent + row[0] + ' (' + str(row[1]) + ')')
                else:
                    print(indent + row[0])
                count += 1

            # If in minimal mode and there are multiple targets, print a blank 
            # line.
            if cmd_args.minimal and (1 < len(cmd_args.target)):
                print()

    if not cmd_args.minimal:
        print(os.linesep + str(count) + " entries listed." + os.linesep)
    return count

def write_db_fp(sha1, filename):
    """Helper function to write the specified sha1 to the specified filename.
    """

    try:
        fh = io.open(filename, 'wt')
        payload = sha1 + os.linesep
        fh.write(payload)
        fh.close()
    except IOError as e:
        logging.error("IOError({0}): {1}".format(e.errno, e.strerror) +
                      " on file '" + filename + "'")


def check_db():
    """Calculates the fingerprint of the database and checks it against the
    fingerprint in a file with the same name + 'sha1' extension. In the event
    that the fingerprints do not match, the mod times of the two files are
    compared. If the db was modified after the fingerprint file, the database
    is assumed fine and the fingerprint file is updated.
    """

    # For convenience...
    fullname = cmd_args.db

    # Log the file
    logging.info("Fingerprinting database '%s'", fullname)
    
    # Generate fingerprint
    fp_cpu_time = time.clock()
    fp_real_time = time.time()
    db_stat = os.stat(fullname)
    fp = calc_fingerprint(fullname, db_stat.st_size)
    logging.debug("Database '%s' has fingerprint '0x%s'", fullname, fp)
    logging.debug("File '%s' finished in %.4f seconds (%.4f CPU seconds)", 
                  fullname, time.time() - fp_real_time, 
                  time.clock() - fp_cpu_time)

    # Generate fingerprint filename
    fp_file = fullname + ".sha1"

    # Check against fingerprint file
    try:
        # Attempt to open the file. If we can't, assume that we need to create
        # a new one.
        fh = io.open(fp_file, 'rt')

        last_fp = fh.readline().rstrip()
        logging.debug("Previous fingerprint = '0x%s'", last_fp)

        if fp != last_fp:
            logging.debug("Database fingerprint does not match previous " +
                          "fingerprint.")

            # Check modification times.
            fp_stat = os.stat(fp_file)
            if fp_stat.st_mtime < db_stat.st_mtime:
                # Database has been touched since last fingerprint. Update,
                # if appropriate
                if cmd_args.check_only:
                    logging.info("Database '" + fullname + "' newer than " +
                                 "fingerprint file '" + fp_file + "'!")
                else:
                    logging.info("Database '" + fullname + "' newer than " +
                                 "fingerprint file '" + fp_file + 
                                 "'. Updating...")
                    write_db_fp(fp, fp_file)
            else:
                # Looks like something squirrelly is going on. Alert user.
                logging.warning("Database '" + fullname + "' does not match " +
                                "fingerprint file '" + fp_file + 
                                "'. Database could be damaged!")

        else:
            logging.info("Database fingerprint matches previous fingerprint.")

    except OSError as e:
        if e.errno == os.errno.ENOENT:
            if cmd_args.check_only:
                logging.info("Unable to open fingerprint file '" + fp_file + 
                             "'!")
            else:
                logging.info("Unable to open fingerprint file '" + fp_file + 
                             "'. Generating a new one.")
                write_db_fp(fp, fp_file)
        else:
            logging.error( "I/O Error while checking database: " + e.strerr )

########
# Main #
########

if __name__ == '__main__':
    # Start timer
    cpu_start_time = time.clock()
    real_start_time = time.time()

    # Parse command-line arguments
    cmd_args = parse_args()

    # Set up logger
    setup_logger(cmd_args.verbose,cmd_args.debug,cmd_args.log)
    logging.debug('Command-line arguments: %s', vars(cmd_args))

    ok_to_prune = False

    try:
        if cmd_args.subcommand == 'scan':
            if cmd_args.prune:
                ok_to_prune = True
            if cmd_args.check_only:
                ok_to_prune = False
            # Open fingerprint database
            with open_db(cmd_args.db) as db_conn:
                file_stats = gen_file_stats_dict()
                dir_stats = gen_dir_stats_dict()
                
                # Scan each specified root.
                for target in cmd_args.target:
                    # Scan target
                    result = scan_target(target, db_conn)
                    # Update stats
                    file_stats = add_dicts(file_stats, result[1])
                    dir_stats = add_dicts(dir_stats, result[2])
                    # Check for interrupt and break if appropriate
                    if (result[0] != 0):
                        break

            # Dump stats, if appropriate
            print_scan_stats( file_stats, dir_stats )

            # Add a blank line if using progress indicator
            if cmd_args.progress:
                sys.stdout.write('\r\n')

        elif cmd_args.subcommand == 'dupe_files':
            # Open fingerprint database
            with open_db(cmd_args.db) as db_conn:
                check_dupe_files(db_conn)

        elif cmd_args.subcommand == 'dupe_trees':
            # Open fingerprint database
                with open_db(cmd_args.db) as db_conn:
                    check_dupe_trees(db_conn)

        elif cmd_args.subcommand == 'diff':
            # Open fingerprint database
            with open_db(cmd_args.db) as db_conn:
                diff_trees(db_conn, cmd_args.target[0], cmd_args.target[1])

        elif cmd_args.subcommand == 'list':
            # Open fingerprint database
            with open_db(cmd_args.db) as db_conn:
                if( 0 < len(cmd_args.target) ):
                    for target in cmd_args.target:
                        if( target != '*' ):
                            list_db(db_conn, target)
                        else:
                            list_db(db_conn, None)
                else:
                    list_db(db_conn, None)

        if cmd_args.subcommand == 'rm':
            ok_to_prune = not cmd_args.dry_run
            # Open fingerprint database
            with open_db(cmd_args.db) as db_conn:
                del_targets(db_conn)

        elif cmd_args.subcommand == 'checkdb':
            check_db()

    except KeyboardInterrupt:
        # Catch here also, in case it was missed earlier.
        logging.error("Interrupt detected! Aborting")
        pass

    # Fini!
    logging.info('Finished. Total Run Time = %.4f seconds (%.4f CPU seconds)', 
                 time.time() - real_start_time, time.clock() - cpu_start_time)
