#include "dclean.h" #include "recordio.h" char buffer[512]; // character buffer for sprintf int main(int argc, char *argv[]) { if (argc != 3) { sprintf(buffer, "usage: dclean directory_master directory_copy\n"); write(1, buffer, strlen(buffer)); return EXIT_FAILURE; } char *master = argv[1]; // read in the arguments char *copy = argv[2]; clean_txt(); // clean the directory find_directories(master, "dclean-master.txt"); // find all files in the master directory sortfile("dclean-master.txt", "dclean-master.txt"); // sort the file by filesize find_directories(copy, "dclean-copy.txt"); // find all of the files in the copy directory sortfile("dclean-copy.txt", "dclean-copy.txt"); // sort the file by filesize find_same(); // find which files are the same } //----------------------------------------------------------- // FUNCTION clean_txt: // this is mostly here to make sure that all of the .txt files are cleared before the program runs // PARAMETER USAGE : // none // FUNCTION CALLED : // void run_external_process(int fds[2], const char** args); //----------------------------------------------------------- void clean_txt() { const char *a[3] = {"rm", "dclean-master.txt", NULL}; const char *b[3] = {"rm", "dclean-copy.txt", NULL}; const char *c[3] = {"rm", ".rinx.dclean-master.txt", NULL}; const char *d[3] = {"rm", ".rinx.dclean-copy.txt", NULL}; int s[2]; // pointer for the pipe; (we dont actually use the pipe) run_external_process(s, a); run_external_process(s, b); run_external_process(s, c); run_external_process(s, d); } //----------------------------------------------------------- // FUNCTION find_same: // Seaches through the Master and Copy files and checks to see if the files listed are the same using 'diff' // PARAMETER USAGE : // none // FUNCTION CALLED : // int rio_open(const char *pathname, int flags, mode_t mode); // int rio_lseek(int fd, int offset, int whence); // void * rio_read(int fd, int * return_value); // int identical(char * file1, char * file2); //----------------------------------------------------------- void find_same() { const char s[2] = {'*', '\n'}; //* and \n character for use in tokenizer char *token; char *tokencp; char *rc; char *rccp; int r, rcp, i, j, ir, jr, idn, n; int fd = rio_open("dclean-master.txt", O_RDONLY, 0644); // open the list of master files if (fd < 0) { fprintf(stderr, "%s\n", strerror(EIO)); exit(EXIT_FAILURE); }; // error checking int fdcp = rio_open("dclean-copy.txt", O_RDONLY, 0644); // open the list of copy files if (fdcp < 0) { fprintf(stderr, "%s\n", strerror(EIO)); exit(EXIT_FAILURE); }; // error checking int fdsh = open("clean-duplicates.sh", O_RDWR | O_CREAT | O_TRUNC, 0644); // open the shell script if (fdsh < 0) { fprintf(stderr, "%s\n", strerror(EIO)); exit(EXIT_FAILURE); }; // error checking n = write(fdsh, "#!/bin/sh\n", strlen("#!/bin/sh\n")); // write this to the top so the .sh works if (n < 0) { fprintf(stderr, "%s\n", strerror(EIO)); exit(EXIT_FAILURE); }; // error checking int eol = rio_lseek(fd, 0, SEEK_END); // we now know the length of the master file int eolcp = rio_lseek(fdcp, 0, SEEK_END); // we now know the length of the copy file r = rio_lseek(fd, 0, SEEK_SET); // seek back to the beginning rcp = rio_lseek(fdcp, 0, SEEK_SET); for (i = 0; i < eol; i++) { // loop through all files in the master file ir = i; // we have to use this since rio_read returns a value back through &ir rc = rio_read(fd, &ir); // read the ith line of the file token = strtok(rc, s); // the first token will always be the filesize token = strtok(NULL, s); // this is the filepath for (j = 0; j < eolcp; j++) { // loop through all files in the copy file jr = j; // we have to use this since rio_read returns a value back through &jr rccp = rio_read(fdcp, &jr); // read the jth line of the file tokencp = strtok(rccp, s); // the first token will always be the filesize tokencp = strtok(NULL, s); // this is the filepath idn = identical(token, tokencp); // see if the two files are identical if (idn == 1) { // if they are identical then add them to the shell script n = write(fdsh, "rm ", strlen("rm ")); // rm (remove command) n = write(fdsh, tokencp, strlen(tokencp)); // the file in question n = write(fdsh, "\n", strlen("\n")); if (n < 0) { fprintf(stderr, "%s\n", strerror(EIO)); exit(EXIT_FAILURE); }; // error checking } free(rccp); // we free this as to reset the buffer } rio_lseek(fdcp, 0, SEEK_SET); // reset pointer because otherwise we would only search the master file 1 time free(rc); // free the buffer for the master file } } //----------------------------------------------------------- // FUNCTION sortfile: // Sorts the inputfile using /usr/bin/sort // PARAMETER USAGE : // char * inputfile : the file for input // char * outputfile : the file for output // FUNCTION CALLED : // none //----------------------------------------------------------- int sortfile(char *inputfile, char *outputfile) { // equivilent to: sort -n dclean-master.txt > sorted.txt unsigned char byte; const char *a[4] = {"sort", "-n", inputfile, NULL}; // construct arguments #ifdef DEBUG sprintf(buffer, "Sorting File [%s]\n", inputfile); write(1, buffer, strlen(buffer)); #endif pid_t pid; // the pid of the child/parent int status; // the waiting integer if ((pid = fork()) == 0) { // forking diff (child process) int fd = open(outputfile, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); // open file (create it if it doesnt exist) if (fd < 0) { fprintf(stderr, "Open (outputfile) Failed because of: %s\n", strerror(EIO)); return -1; }; dup2(fd, 1); // make stdout go to file execvp(a[0], a); // create the sort process exit(0); // exit the child } else if (pid < 0) { // error checking sprintf(buffer, "Fork Failed!\n"); write(1, buffer, strlen(buffer)); exit(-1); } else { // parent process while (wait(&status) != pid) ; // wait for completion; } return 0; } //----------------------------------------------------------- // FUNCTION find_directories: // Runs "iodir" to generate the .txt files // PARAMETER USAGE : // char * file_path : the file for input // char * out_name : the file for output // FUNCTION CALLED : // void run_external_process(int fds[2], const char** args); //----------------------------------------------------------- int find_directories(char *file_path, char *out_name) { const char *a[4] = {"iodir", file_path, out_name, NULL}; // construct arguments int fds[2]; // pointer for the pipe we will create in a line or so #ifdef DEBUG sprintf(buffer, "Generating Directory File [%s] For path: [%s]\n", out_name, file_path); write(1, buffer, strlen(buffer)); #endif run_external_process(fds, a); // generate the files using iodir [I edited it slightly] return 0; } //----------------------------------------------------------- // FUNCTION identical: // Runs "diff file1 file2" to generate see if they are the same // PARAMETER USAGE : // char * file1 : "master" file // char * file2 : "copy" file // FUNCTION CALLED : // void run_external_process(int fds[2], const char** args); //----------------------------------------------------------- int identical(char *file1, char *file2) { const char *a[4] = {"diff", file1, file2, NULL}; // construct arguments int fds[2]; // pointer for the pipe we will create in a line or so run_external_process(fds, a); // run diff // continue with parent process ssize_t size = read(fds[0], buffer, sizeof(buffer)); // read from the pipe if (size == 0) { // diff returns zero when the files are the same #ifdef DEBUG sprintf(buffer, "\"%s\" is a copy of \"%s\"\n", file2, file1); write(1, buffer, strlen(buffer)); #endif return 1; // they are identical } else { #ifdef DEBUG sprintf(buffer, "\"%s\" and \"%s\" differ\n", file1, file2); write(1, buffer, strlen(buffer)); #endif return 0; // they are different } return -1; // something weird went on; throw an error } //----------------------------------------------------------- // FUNCTION run_external_process: // Runs child process and returns the childs output to the parent // PARAMETER USAGE : // int fds[2] : the pointers to the pipe // const char** args : the arguments for the execvp // FUNCTION CALLED : // none //----------------------------------------------------------- void run_external_process(int fds[2], const char **args) { pid_t pid; // the pid of the child/parent int status; // the waiting integer pipe(fds); // create a new pipe pipe if ((pid = fork()) == 0) { // forking the child process close(fds[0]); // close the input of the pipe close(1); // close stdout dup(fds[1]); // dup the pipes stdout (pipe now sends child's stdout) execvp(args[0], args); // create the diff process exit(0); // exit the child } else if (pid < 0) { // error checking sprintf(buffer, "Fork Failed!\n"); write(1, buffer, strlen(buffer)); exit(-1); } else { // parent process close(fds[1]); // close output of pipe which redirects stdout of the child while (wait(&status) != pid) ; // wait for completion; } }