
275 lines
9.2 KiB
Raw Permalink Normal View History

2022-09-03 23:35:42 -04:00
#include "dclean.h"
#include "recordio.h"
char buffer[512]; // character buffer for sprintf
int main(int argc, char *argv[])
if (argc != 3) {
sprintf(buffer, "usage: dclean directory_master directory_copy\n");
write(1, buffer, strlen(buffer));
char *master = argv[1]; // read in the arguments
char *copy = argv[2];
clean_txt(); // clean the directory
find_directories(master, "dclean-master.txt"); // find all files in the master directory
sortfile("dclean-master.txt", "dclean-master.txt"); // sort the file by filesize
find_directories(copy, "dclean-copy.txt"); // find all of the files in the copy directory
sortfile("dclean-copy.txt", "dclean-copy.txt"); // sort the file by filesize
find_same(); // find which files are the same
// FUNCTION clean_txt:
// this is mostly here to make sure that all of the .txt files are cleared before the program runs
// none
// void run_external_process(int fds[2], const char** args);
void clean_txt()
const char *a[3] = {"rm", "dclean-master.txt", NULL};
const char *b[3] = {"rm", "dclean-copy.txt", NULL};
const char *c[3] = {"rm", ".rinx.dclean-master.txt", NULL};
const char *d[3] = {"rm", ".rinx.dclean-copy.txt", NULL};
int s[2]; // pointer for the pipe; (we dont actually use the pipe)
run_external_process(s, a);
run_external_process(s, b);
run_external_process(s, c);
run_external_process(s, d);
// FUNCTION find_same:
// Seaches through the Master and Copy files and checks to see if the files listed are the same using 'diff'
// none
// int rio_open(const char *pathname, int flags, mode_t mode);
// int rio_lseek(int fd, int offset, int whence);
// void * rio_read(int fd, int * return_value);
// int identical(char * file1, char * file2);
void find_same()
const char s[2] = {'*', '\n'}; //* and \n character for use in tokenizer
char *token;
char *tokencp;
char *rc;
char *rccp;
int r, rcp, i, j, ir, jr, idn, n;
int fd = rio_open("dclean-master.txt", O_RDONLY, 0644); // open the list of master files
if (fd < 0)
fprintf(stderr, "%s\n", strerror(EIO));
}; // error checking
int fdcp = rio_open("dclean-copy.txt", O_RDONLY, 0644); // open the list of copy files
if (fdcp < 0)
fprintf(stderr, "%s\n", strerror(EIO));
}; // error checking
int fdsh = open("clean-duplicates.sh", O_RDWR | O_CREAT | O_TRUNC, 0644); // open the shell script
if (fdsh < 0)
fprintf(stderr, "%s\n", strerror(EIO));
}; // error checking
n = write(fdsh, "#!/bin/sh\n", strlen("#!/bin/sh\n")); // write this to the top so the .sh works
if (n < 0)
fprintf(stderr, "%s\n", strerror(EIO));
}; // error checking
int eol = rio_lseek(fd, 0, SEEK_END); // we now know the length of the master file
int eolcp = rio_lseek(fdcp, 0, SEEK_END); // we now know the length of the copy file
r = rio_lseek(fd, 0, SEEK_SET); // seek back to the beginning
rcp = rio_lseek(fdcp, 0, SEEK_SET);
for (i = 0; i < eol; i++)
{ // loop through all files in the master file
ir = i; // we have to use this since rio_read returns a value back through &ir
rc = rio_read(fd, &ir); // read the ith line of the file
token = strtok(rc, s); // the first token will always be the filesize
token = strtok(NULL, s); // this is the filepath
for (j = 0; j < eolcp; j++)
{ // loop through all files in the copy file
jr = j; // we have to use this since rio_read returns a value back through &jr
rccp = rio_read(fdcp, &jr); // read the jth line of the file
tokencp = strtok(rccp, s); // the first token will always be the filesize
tokencp = strtok(NULL, s); // this is the filepath
idn = identical(token, tokencp); // see if the two files are identical
if (idn == 1)
{ // if they are identical then add them to the shell script
n = write(fdsh, "rm ", strlen("rm ")); // rm (remove command)
n = write(fdsh, tokencp, strlen(tokencp)); // the file in question
n = write(fdsh, "\n", strlen("\n"));
if (n < 0)
fprintf(stderr, "%s\n", strerror(EIO));
}; // error checking
free(rccp); // we free this as to reset the buffer
rio_lseek(fdcp, 0, SEEK_SET); // reset pointer because otherwise we would only search the master file 1 time
free(rc); // free the buffer for the master file
// FUNCTION sortfile:
// Sorts the inputfile using /usr/bin/sort
// char * inputfile : the file for input
// char * outputfile : the file for output
// none
int sortfile(char *inputfile, char *outputfile)
// equivilent to: sort -n dclean-master.txt > sorted.txt
unsigned char byte;
const char *a[4] = {"sort", "-n", inputfile, NULL}; // construct arguments
#ifdef DEBUG
sprintf(buffer, "Sorting File [%s]\n", inputfile);
write(1, buffer, strlen(buffer));
pid_t pid; // the pid of the child/parent
int status; // the waiting integer
if ((pid = fork()) == 0)
{ // forking diff (child process)
int fd = open(outputfile, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); // open file (create it if it doesnt exist)
if (fd < 0)
fprintf(stderr, "Open (outputfile) Failed because of: %s\n", strerror(EIO));
return -1;
dup2(fd, 1); // make stdout go to file
execvp(a[0], a); // create the sort process
exit(0); // exit the child
else if (pid < 0)
{ // error checking
sprintf(buffer, "Fork Failed!\n");
write(1, buffer, strlen(buffer));
{ // parent process
while (wait(&status) != pid)
; // wait for completion;
return 0;
// FUNCTION find_directories:
// Runs "iodir" to generate the .txt files
// char * file_path : the file for input
// char * out_name : the file for output
// void run_external_process(int fds[2], const char** args);
int find_directories(char *file_path, char *out_name)
const char *a[4] = {"iodir", file_path, out_name, NULL}; // construct arguments
int fds[2]; // pointer for the pipe we will create in a line or so
#ifdef DEBUG
sprintf(buffer, "Generating Directory File [%s] For path: [%s]\n", out_name, file_path);
write(1, buffer, strlen(buffer));
run_external_process(fds, a); // generate the files using iodir [I edited it slightly]
return 0;
// FUNCTION identical:
// Runs "diff file1 file2" to generate see if they are the same
// char * file1 : "master" file
// char * file2 : "copy" file
// void run_external_process(int fds[2], const char** args);
int identical(char *file1, char *file2)
const char *a[4] = {"diff", file1, file2, NULL}; // construct arguments
int fds[2]; // pointer for the pipe we will create in a line or so
run_external_process(fds, a); // run diff
// continue with parent process
ssize_t size = read(fds[0], buffer, sizeof(buffer)); // read from the pipe
if (size == 0)
{ // diff returns zero when the files are the same
#ifdef DEBUG
sprintf(buffer, "\"%s\" is a copy of \"%s\"\n", file2, file1);
write(1, buffer, strlen(buffer));
return 1; // they are identical
#ifdef DEBUG
sprintf(buffer, "\"%s\" and \"%s\" differ\n", file1, file2);
write(1, buffer, strlen(buffer));
return 0; // they are different
return -1; // something weird went on; throw an error
// FUNCTION run_external_process:
// Runs child process and returns the childs output to the parent
// int fds[2] : the pointers to the pipe
// const char** args : the arguments for the execvp
// none
void run_external_process(int fds[2], const char **args)
pid_t pid; // the pid of the child/parent
int status; // the waiting integer
pipe(fds); // create a new pipe pipe
if ((pid = fork()) == 0)
{ // forking the child process
close(fds[0]); // close the input of the pipe
close(1); // close stdout
dup(fds[1]); // dup the pipes stdout (pipe now sends child's stdout)
execvp(args[0], args); // create the diff process
exit(0); // exit the child
else if (pid < 0)
{ // error checking
sprintf(buffer, "Fork Failed!\n");
write(1, buffer, strlen(buffer));
{ // parent process
close(fds[1]); // close output of pipe which redirects stdout of the child
while (wait(&status) != pid)
; // wait for completion;