#!usr/bin/perl -w 
########################################################################
#                                                                      #
# Started:      14/05/2002                                             #
# Author:       John                                                   #
# Contact:      support@hrcompletemanager.com                          #
# Purpose:      Trawls a directory tree & lists duplicate files        #
#               directly into a batch file to enable an                # 
#               administrator to delete them.                          #
#                                                                      #
# Copyright:    hrcompletemanager.com 2022                            #
#               This is free software. Do with is what you wish. It    #
#               comes with no guarantees or warranties. The  author is #
#               not responsible for anything. If you use it and find   #
#               it useful then a thank you would be nice. If you       #
#               use it to make a lot of money then congratulations.    #
#                                                                      #
# WARNING:  	WE USE THIS PROGRAM A LOT                              #
#               IT DOES MORE OR LESS EXACTLY WHAT WE WANT              #
# 	        	IT MAY NOT BE SUITABLE FOR YOU                         #
# 	        	USE IT AT YOUR OWN RISK                                #
#                                                                      #
########################################################################

=head1 DESCRIPTION

This utility trawls recursively through the path $CLEANITUP_ROOT
and generates a file ($INITIAL_FILE) which contains the following
information about all files present:
    fileName
    fileModifiedDate
    fileSize
    full path and filename

NOTE:
This script was written to complement the backitup.pl script which 
creates files with a distinctive naming convention with the date and 
time at the beginning of the filename. Therefore, if the setting 
up to and including the time (ie the last nn.nn_ of the filename).

The name of this file is mangled by concatenating the fileName, 
filemodifiedDate and FileSize into one field (a sort of checksum). The 
mangled Name is sorted in checksum order, duplicates removed and output
to $DELETION_FILE

$DELETE COMMAND is substituted for first field (checksum) of each line.

This results in a file which contains lines like the following:

REM    SUMMARY INFORMATION
REM    ======================================
REM    Date run:     Wed Dec 10 16:47:11 2003
REM    Total files to delete:      2
REM    Total files to retain:      12
REM
del "c:\trash\spam.txt"
del "c:\trash\spam2.txt"

The sysadmin can browse this file and run it to delete duplicates.

=cut

use strict;
use File::Find;
use IO::File;
use Env;

#=======================================================================
# USER CONFIGURABLE SECTION
# Because Env package is used you can set environment variables instead
#=======================================================================
my $CLEANITUP_ROOT = "s:/" ;# Clean everything below this.
my $DEBUG          = "NO";         # "YES" to retain tmp files & comments.
my $COMMENT        = "REM";        # Comment symbol for this OS.
my $FS             = "\\";         # fs symbol for this OS.
my $DELETE_COMMAND = "del";        # Correct command for this OS.
my $BACKITUP       = "YES";        # "YES" if cleanup for backitup.pl &
                                   # will strip leading filename string.
my $INITIAL_FILE   = "c:/backitups/initial_file.txt"; # Name of initial file.
my $DELETION_FILE  = "c:/backitups/danger.bat"; # Name of the final file
                                   # (ADMINS RUN THIS TO DELETE DUPLICATES)
#=======================================================================
# END OF USER CONFIGURABLE SECTION
#=======================================================================
# If directory name not passed then use the target from the config section
@ARGV = $CLEANITUP_ROOT unless @ARGV;

# $INITIAL_FILE stuff
# -------------------
    # Create the $INITIAL_FILE
    open fhINITIALFILE,' > '.$INITIAL_FILE;
    find(\&createfile_initialfile, @ARGV);
    close (fhINITIALFILE);
    
    # Strip garbage from $INITIAL_FILE (if needed)
    if ($BACKITUP eq "YES"){
        print "\n==>INFO: BACKITUP == \"YES\" so strip garbage from filename.\n";
        
        my $old = $INITIAL_FILE;
        my $new = "new.tmp";
        open(fhOLD, "< $old")         or die "can't open $old: $!";
        open(fhNEW, "> $new")         or die "can't open $new: $!";
        while (<fhOLD>) {
            # Deleting garbage means removing stuff at front of filename
            # ie all chars from BOL to 1st occurrence of nn.nn_ (time)
            s/^.+?\d\d\.\d\d\_/\"/igm;
            print fhNEW $_            or die "can't write $new: $!";
        }
        close(fhOLD)                  or die "can't close $old: $!";
        close(fhNEW)                  or die "can't close $new: $!";
        if ($DEBUG eq "YES"){
            rename($old, "$old.tmp")    or die "can't rename $old to $old.tmp $!";
        }
        rename($new, $old)          or die "can't rename $new to $old: $!";
        print "\n==>INFO: $INITIAL_FILE created and garbage successfully stripped\n";
    }
    else{
        print "\n==>INFO: $INITIAL_FILE created successfully\n";
    }

# Append header stuff
# ------------------
    my $time_stamp = localtime($^T);
    
    my $header =     "$COMMENT    SUMMARY INFORMATION\n";
    $header = "$header$COMMENT    ======================================\n";
    $header = "$header$COMMENT    Date run:     $time_stamp\n";
    $header = "$header$COMMENT    Total files to delete:\n";
    $header = "$header$COMMENT    Total files to retain:\n";
    $header = "$header$COMMENT\n";

	open (FILEHANDLE, ">$DELETION_FILE") or die ("Cannot open $DELETION_FILE");
	print FILEHANDLE "$header";
	close (FILEHANDLE);
    
# ARRAY stuff
# -----------
# This section reads in the $INITIAL_FILE into an array and manipulates it
#

open(fhFILE,"$INITIAL_FILE") or die "can't open $INITIAL_FILE: $!\n";
my  @array = <fhFILE>;
close(fhFILE);
my @sorted=sort @array;
my $name = 0;
my $path = 0;
my $nextname = 0;
my $nextpath = 0;
my $count_deletes = 0;
my $count_retains = 0;
    foreach my $line (@sorted) {
	    ($nextname, $nextpath) = split(/\|/,$line);
         if ($name ne $nextname){
             $name = $nextname;
             $path = $nextpath;
             # If $DEBUG = YES then include retained files with comments
             if ($DEBUG eq "YES"){
	            open (fhDELETE, ">>$DELETION_FILE") or die ("Cannot open $DELETION_FILE");
                # next line to include comment for the retained file and path
                print fhDELETE "$COMMENT $nextpath";
                # next line to include comment for the mangled file name 
                #print fhDELETE "$COMMENT $name\n";
                close fhDELETE;
             }
             $count_retains = $count_retains + 1;
         }
         else{
             $name = $nextname;
             $path = $nextpath;
             # print "$DELETE_COMMAND $path";
	         open (fhDELETE, ">>$DELETION_FILE") or die ("Cannot open $DELETION_FILE");
	         print fhDELETE "$DELETE_COMMAND $path";
             $count_deletes = $count_deletes + 1;
             close fhDELETE;
        }
    }
# Putting in the totals stuff
# ---------------------------
# This section search & replaces $count_deletes and $count_retains in $DELETION_FILE
#
        my $old = $DELETION_FILE;
        my $new = "new.tmp";
        open(fhOLD, "< $old")         or die "can't open $old: $!";
        open(fhNEW, "> $new")         or die "can't open $new: $!";
        while (<fhOLD>) {
            s/Total files to delete:/Total files to delete:      $count_deletes/;
            s/Total files to retain:/Total files to retain:      $count_retains/;
            s/\//$FS/g;
            print fhNEW $_            or die "can't write $new: $!";
        }
        close(fhOLD)                  or die "can't close $old: $!";
        close(fhNEW)                  or die "can't close $new: $!";
        if ($DEBUG eq "YES"){
            rename($old, "$old.tmp")  or die "can't rename $old to $old.tmp $!";
        }
        rename($new, $old)            or die "can't rename $new to $old: $!";

# START OF SUBS
#-------------------------------------------------
sub createfile_initialfile {
    if (-f $_) {
        # write these 2 fields, wrap with quotes and seperate with pipes
        # 1. mangled checksum (filename&date&filesize)
        # 2. fullpath
        print fhINITIALFILE "","\"$_",(stat($_))[9],-s, "\"|","\"", $File::Find::name,"\"\n" ;
    }
}
#-------------------------------------------------
# END OF SUB
