275 lines
9.2 KiB
C
275 lines
9.2 KiB
C
|
#include "dclean.h"
|
||
|
#include "recordio.h"
|
||
|
|
||
|
char buffer[512]; // character buffer for sprintf
|
||
|
|
||
|
int main(int argc, char *argv[])
|
||
|
{
|
||
|
if (argc != 3) {
|
||
|
sprintf(buffer, "usage: dclean directory_master directory_copy\n");
|
||
|
write(1, buffer, strlen(buffer));
|
||
|
return EXIT_FAILURE;
|
||
|
}
|
||
|
char *master = argv[1]; // read in the arguments
|
||
|
char *copy = argv[2];
|
||
|
|
||
|
clean_txt(); // clean the directory
|
||
|
find_directories(master, "dclean-master.txt"); // find all files in the master directory
|
||
|
sortfile("dclean-master.txt", "dclean-master.txt"); // sort the file by filesize
|
||
|
find_directories(copy, "dclean-copy.txt"); // find all of the files in the copy directory
|
||
|
sortfile("dclean-copy.txt", "dclean-copy.txt"); // sort the file by filesize
|
||
|
find_same(); // find which files are the same
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------
|
||
|
// FUNCTION clean_txt:
|
||
|
// this is mostly here to make sure that all of the .txt files are cleared before the program runs
|
||
|
// PARAMETER USAGE :
|
||
|
// none
|
||
|
// FUNCTION CALLED :
|
||
|
// void run_external_process(int fds[2], const char** args);
|
||
|
//-----------------------------------------------------------
|
||
|
void clean_txt()
|
||
|
{
|
||
|
const char *a[3] = {"rm", "dclean-master.txt", NULL};
|
||
|
const char *b[3] = {"rm", "dclean-copy.txt", NULL};
|
||
|
const char *c[3] = {"rm", ".rinx.dclean-master.txt", NULL};
|
||
|
const char *d[3] = {"rm", ".rinx.dclean-copy.txt", NULL};
|
||
|
int s[2]; // pointer for the pipe; (we dont actually use the pipe)
|
||
|
run_external_process(s, a);
|
||
|
run_external_process(s, b);
|
||
|
run_external_process(s, c);
|
||
|
run_external_process(s, d);
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------
|
||
|
// FUNCTION find_same:
|
||
|
// Seaches through the Master and Copy files and checks to see if the files listed are the same using 'diff'
|
||
|
// PARAMETER USAGE :
|
||
|
// none
|
||
|
// FUNCTION CALLED :
|
||
|
// int rio_open(const char *pathname, int flags, mode_t mode);
|
||
|
// int rio_lseek(int fd, int offset, int whence);
|
||
|
// void * rio_read(int fd, int * return_value);
|
||
|
// int identical(char * file1, char * file2);
|
||
|
//-----------------------------------------------------------
|
||
|
void find_same()
|
||
|
{
|
||
|
const char s[2] = {'*', '\n'}; //* and \n character for use in tokenizer
|
||
|
char *token;
|
||
|
char *tokencp;
|
||
|
char *rc;
|
||
|
char *rccp;
|
||
|
int r, rcp, i, j, ir, jr, idn, n;
|
||
|
|
||
|
int fd = rio_open("dclean-master.txt", O_RDONLY, 0644); // open the list of master files
|
||
|
if (fd < 0)
|
||
|
{
|
||
|
fprintf(stderr, "%s\n", strerror(EIO));
|
||
|
exit(EXIT_FAILURE);
|
||
|
}; // error checking
|
||
|
|
||
|
int fdcp = rio_open("dclean-copy.txt", O_RDONLY, 0644); // open the list of copy files
|
||
|
if (fdcp < 0)
|
||
|
{
|
||
|
fprintf(stderr, "%s\n", strerror(EIO));
|
||
|
exit(EXIT_FAILURE);
|
||
|
}; // error checking
|
||
|
|
||
|
int fdsh = open("clean-duplicates.sh", O_RDWR | O_CREAT | O_TRUNC, 0644); // open the shell script
|
||
|
if (fdsh < 0)
|
||
|
{
|
||
|
fprintf(stderr, "%s\n", strerror(EIO));
|
||
|
exit(EXIT_FAILURE);
|
||
|
}; // error checking
|
||
|
|
||
|
n = write(fdsh, "#!/bin/sh\n", strlen("#!/bin/sh\n")); // write this to the top so the .sh works
|
||
|
if (n < 0)
|
||
|
{
|
||
|
fprintf(stderr, "%s\n", strerror(EIO));
|
||
|
exit(EXIT_FAILURE);
|
||
|
}; // error checking
|
||
|
|
||
|
int eol = rio_lseek(fd, 0, SEEK_END); // we now know the length of the master file
|
||
|
int eolcp = rio_lseek(fdcp, 0, SEEK_END); // we now know the length of the copy file
|
||
|
r = rio_lseek(fd, 0, SEEK_SET); // seek back to the beginning
|
||
|
rcp = rio_lseek(fdcp, 0, SEEK_SET);
|
||
|
|
||
|
for (i = 0; i < eol; i++)
|
||
|
{ // loop through all files in the master file
|
||
|
ir = i; // we have to use this since rio_read returns a value back through &ir
|
||
|
rc = rio_read(fd, &ir); // read the ith line of the file
|
||
|
token = strtok(rc, s); // the first token will always be the filesize
|
||
|
token = strtok(NULL, s); // this is the filepath
|
||
|
|
||
|
for (j = 0; j < eolcp; j++)
|
||
|
{ // loop through all files in the copy file
|
||
|
jr = j; // we have to use this since rio_read returns a value back through &jr
|
||
|
rccp = rio_read(fdcp, &jr); // read the jth line of the file
|
||
|
|
||
|
tokencp = strtok(rccp, s); // the first token will always be the filesize
|
||
|
tokencp = strtok(NULL, s); // this is the filepath
|
||
|
|
||
|
idn = identical(token, tokencp); // see if the two files are identical
|
||
|
|
||
|
if (idn == 1)
|
||
|
{ // if they are identical then add them to the shell script
|
||
|
n = write(fdsh, "rm ", strlen("rm ")); // rm (remove command)
|
||
|
n = write(fdsh, tokencp, strlen(tokencp)); // the file in question
|
||
|
n = write(fdsh, "\n", strlen("\n"));
|
||
|
if (n < 0)
|
||
|
{
|
||
|
fprintf(stderr, "%s\n", strerror(EIO));
|
||
|
exit(EXIT_FAILURE);
|
||
|
}; // error checking
|
||
|
}
|
||
|
|
||
|
free(rccp); // we free this as to reset the buffer
|
||
|
}
|
||
|
rio_lseek(fdcp, 0, SEEK_SET); // reset pointer because otherwise we would only search the master file 1 time
|
||
|
free(rc); // free the buffer for the master file
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------
|
||
|
// FUNCTION sortfile:
|
||
|
// Sorts the inputfile using /usr/bin/sort
|
||
|
// PARAMETER USAGE :
|
||
|
// char * inputfile : the file for input
|
||
|
// char * outputfile : the file for output
|
||
|
// FUNCTION CALLED :
|
||
|
// none
|
||
|
//-----------------------------------------------------------
|
||
|
int sortfile(char *inputfile, char *outputfile)
|
||
|
{
|
||
|
// equivilent to: sort -n dclean-master.txt > sorted.txt
|
||
|
unsigned char byte;
|
||
|
const char *a[4] = {"sort", "-n", inputfile, NULL}; // construct arguments
|
||
|
|
||
|
#ifdef DEBUG
|
||
|
sprintf(buffer, "Sorting File [%s]\n", inputfile);
|
||
|
write(1, buffer, strlen(buffer));
|
||
|
#endif
|
||
|
|
||
|
pid_t pid; // the pid of the child/parent
|
||
|
int status; // the waiting integer
|
||
|
|
||
|
if ((pid = fork()) == 0)
|
||
|
{ // forking diff (child process)
|
||
|
int fd = open(outputfile, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); // open file (create it if it doesnt exist)
|
||
|
if (fd < 0)
|
||
|
{
|
||
|
fprintf(stderr, "Open (outputfile) Failed because of: %s\n", strerror(EIO));
|
||
|
return -1;
|
||
|
};
|
||
|
dup2(fd, 1); // make stdout go to file
|
||
|
execvp(a[0], a); // create the sort process
|
||
|
exit(0); // exit the child
|
||
|
}
|
||
|
else if (pid < 0)
|
||
|
{ // error checking
|
||
|
sprintf(buffer, "Fork Failed!\n");
|
||
|
write(1, buffer, strlen(buffer));
|
||
|
exit(-1);
|
||
|
}
|
||
|
else
|
||
|
{ // parent process
|
||
|
while (wait(&status) != pid)
|
||
|
; // wait for completion;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------
|
||
|
// FUNCTION find_directories:
|
||
|
// Runs "iodir" to generate the .txt files
|
||
|
// PARAMETER USAGE :
|
||
|
// char * file_path : the file for input
|
||
|
// char * out_name : the file for output
|
||
|
// FUNCTION CALLED :
|
||
|
// void run_external_process(int fds[2], const char** args);
|
||
|
//-----------------------------------------------------------
|
||
|
int find_directories(char *file_path, char *out_name)
|
||
|
{
|
||
|
const char *a[4] = {"iodir", file_path, out_name, NULL}; // construct arguments
|
||
|
int fds[2]; // pointer for the pipe we will create in a line or so
|
||
|
#ifdef DEBUG
|
||
|
sprintf(buffer, "Generating Directory File [%s] For path: [%s]\n", out_name, file_path);
|
||
|
write(1, buffer, strlen(buffer));
|
||
|
#endif
|
||
|
run_external_process(fds, a); // generate the files using iodir [I edited it slightly]
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------
|
||
|
// FUNCTION identical:
|
||
|
// Runs "diff file1 file2" to generate see if they are the same
|
||
|
// PARAMETER USAGE :
|
||
|
// char * file1 : "master" file
|
||
|
// char * file2 : "copy" file
|
||
|
// FUNCTION CALLED :
|
||
|
// void run_external_process(int fds[2], const char** args);
|
||
|
//-----------------------------------------------------------
|
||
|
int identical(char *file1, char *file2)
|
||
|
{
|
||
|
const char *a[4] = {"diff", file1, file2, NULL}; // construct arguments
|
||
|
int fds[2]; // pointer for the pipe we will create in a line or so
|
||
|
run_external_process(fds, a); // run diff
|
||
|
// continue with parent process
|
||
|
ssize_t size = read(fds[0], buffer, sizeof(buffer)); // read from the pipe
|
||
|
if (size == 0)
|
||
|
{ // diff returns zero when the files are the same
|
||
|
#ifdef DEBUG
|
||
|
sprintf(buffer, "\"%s\" is a copy of \"%s\"\n", file2, file1);
|
||
|
write(1, buffer, strlen(buffer));
|
||
|
#endif
|
||
|
return 1; // they are identical
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
#ifdef DEBUG
|
||
|
sprintf(buffer, "\"%s\" and \"%s\" differ\n", file1, file2);
|
||
|
write(1, buffer, strlen(buffer));
|
||
|
#endif
|
||
|
return 0; // they are different
|
||
|
}
|
||
|
return -1; // something weird went on; throw an error
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------
|
||
|
// FUNCTION run_external_process:
|
||
|
// Runs child process and returns the childs output to the parent
|
||
|
// PARAMETER USAGE :
|
||
|
// int fds[2] : the pointers to the pipe
|
||
|
// const char** args : the arguments for the execvp
|
||
|
// FUNCTION CALLED :
|
||
|
// none
|
||
|
//-----------------------------------------------------------
|
||
|
void run_external_process(int fds[2], const char **args)
|
||
|
{
|
||
|
pid_t pid; // the pid of the child/parent
|
||
|
int status; // the waiting integer
|
||
|
pipe(fds); // create a new pipe pipe
|
||
|
|
||
|
if ((pid = fork()) == 0)
|
||
|
{ // forking the child process
|
||
|
close(fds[0]); // close the input of the pipe
|
||
|
close(1); // close stdout
|
||
|
dup(fds[1]); // dup the pipes stdout (pipe now sends child's stdout)
|
||
|
execvp(args[0], args); // create the diff process
|
||
|
exit(0); // exit the child
|
||
|
}
|
||
|
else if (pid < 0)
|
||
|
{ // error checking
|
||
|
sprintf(buffer, "Fork Failed!\n");
|
||
|
write(1, buffer, strlen(buffer));
|
||
|
exit(-1);
|
||
|
}
|
||
|
else
|
||
|
{ // parent process
|
||
|
close(fds[1]); // close output of pipe which redirects stdout of the child
|
||
|
while (wait(&status) != pid)
|
||
|
; // wait for completion;
|
||
|
}
|
||
|
}
|