// vim: set ts=2 sw=2 et tw=80: /* * Assignment 1 - File Splitter * Claudio Maggioni * * This code was written by using documentation on man(3) and cplusplus.com (C * standard library section). * * The flag VERBOSE_ERRORS, if defined, activates proper error message handling * and printing on stderr. This is disabled to comply with tests. */ #include #include #include #include /* * Maximum number of part files expected, as defined by the assignment. By * giving a size parameter too small thus having a number of part files greater * than this may cause buffer overflows while handling part file names. */ const unsigned int MAX_SPLIT = 1024; /* * Size of the byte buffer used in merge to read part file. Increase or decrease * at will. */ const int BUFFER_SIZE = 1024; int digits(int); int usage(); int split(const char* filename, long max_size); int merge(char* index, const char* merged); /** * Compute an appoximate figure of the number of decimal digits in an unsigned * int by using an approximation of log base 8 (faster than log_10). Used only * to compute the mamimum length of part filenames. */ inline int digits(int x) { return (x ? (1 + digits(x << 3)) : 1); } /* * Print usage message on stderr if VERBOSE_ERRORS is enabled. */ inline int usage(const char* s) { #ifdef VERBOSE_ERRORS fprintf(stderr, "Usage: %s split [filename] [max-size]\n" " %s merge [index] [merged]\n", s, s); #endif return 64; } int main(int argc, char** const argv) { if (argc != 4) return usage(argv[0]); if (strcmp(argv[1], "split") == 0) { long max = strtol(argv[3], NULL, 10); if (max <= 0) { return usage(argv[0]); } return split(argv[2], max); } else if (strcmp(argv[1], "merge") == 0) { return merge(argv[2], argv[3]); } else { return usage(argv[0]); } } /* * Compute checksum on part files by using XOR. An initial value must be given * in order to support computation of the checksum in chunks. */ unsigned char checksum(unsigned char start, const unsigned char* buffer, long size) { unsigned char sum = start; for (long i = 0; i < size; i++) { sum ^= buffer[i]; } return sum; } /* * Entire of split subcommand. Returns the exit status code. */ int split(const char* filename, long max_size) { FILE* source = fopen(filename, "r"); if (source == NULL) { #ifdef VERBOSE_ERRORS perror("Cannot open source file"); #endif return 1; } const int fnlen = strlen(filename); // Compute the mamimum filename length and allocate a VLA to match that char index_name[fnlen + strlen(".part") + digits(MAX_SPLIT) + 1]; sprintf(index_name, "%s.index", filename); FILE* index = fopen(index_name, "wx"); if (index == NULL) { #ifdef VERBOSE_ERRORS perror("Cannot create index file"); #endif fclose(source); return 2; } char part_name[fnlen + 10]; unsigned char buffer[BUFFER_SIZE]; for(int part = 1; !feof(source); part++) { // assign file name to part sprintf(part_name, "%s.part%d", filename, part); FILE* part_file = NULL; unsigned char sum = 0x00; int first = part == 1; for (long i = max_size; i > 0 && (!feof(source) || first);) { const long to_read = i > BUFFER_SIZE ? BUFFER_SIZE : i; const long bytes = fread(buffer, 1, to_read, source); if (ferror(source)) { #ifdef VERBOSE_ERRORS perror("Cannot read source file"); #endif fclose(source); fclose(index); return 3; } else if (bytes == 0 && part > 1) { goto all_read; } if (part_file == NULL) { part_file = fopen(part_name, "wx"); if (part_file == NULL) { #ifdef VERBOSE_ERRORS perror("Cannot create part file"); #endif fclose(source); fclose(index); return 4; } } sum = checksum(sum, buffer, bytes); const long w_bytes = fwrite(buffer, 1, bytes, part_file); if (w_bytes == 0 && !first) { #ifdef VERBOSE_ERRORS perror("Cannot write on part file"); #endif fclose(source); fclose(index); fclose(part_file); return 5; } i -= bytes; first = 0; } if (fprintf(index, "%s %02x\n", part_name, sum) < 0) { #ifdef VERBOSE_ERRORS fprintf(stderr, "Cannot update index file"); #endif fclose(source); fclose(index); fclose(part_file); return 6; } fclose(part_file); } all_read: fclose(source); fclose(index); return 0; } int merge(char* n_index, const char* n_merged) { FILE* index = fopen(n_index, "r"); if (index == NULL) { #ifdef VERBOSE_ERRORS perror("Cannot open index file"); #endif return 7; } FILE* merged = fopen(n_merged, "wx"); if (merged == NULL) { #ifdef VERBOSE_ERRORS perror("Cannot create merged file"); #endif fclose(index); return 8; } // Allocate enough memory to store the full path of each part // We don't care about the .part suffix since the file name length contains // .index which is already longer. VLAs again btw char part_name[strlen(n_index) + digits(MAX_SPLIT) + 1]; unsigned int n_part = 1; while(!feof(index)) { unsigned int sum; fscanf(index, "%s %x\n", part_name, &sum); FILE* part = fopen(part_name, "r"); if (part == NULL) { #ifdef VERBOSE_ERRORS perror("Cannot open part"); #endif fclose(index); fclose(merged); return 9; } unsigned char check = 0x00; while (!feof(part)) { unsigned char buffer[1000]; long bytes = fread(buffer, 1, 1000, part); if (ferror(part)) { #ifdef VERBOSE_ERRORS perror("Cannot read part"); #endif fclose(index); fclose(merged); fclose(part); return 10; } if (bytes == 0) { break; } check = checksum(check, buffer, bytes); fwrite(buffer, 1, bytes, merged); if (ferror(part)) { #ifdef VERBOSE_ERRORS perror("Cannot wrtite on merged"); #endif fclose(index); fclose(merged); fclose(part); return 11; } } fclose(part); if ((unsigned char) sum != check) { #ifdef VERBOSE_ERRORS perror("Checksums differ for part"); #endif printf("%u\n", n_part); fclose(index); fclose(merged); return 12; } n_part++; } fclose(index); fclose(merged); return 0; }