2019-10-18 07:43:32 +00:00
|
|
|
// vim: set ts=2 sw=2 et tw=80:
|
2019-10-23 14:03:59 +00:00
|
|
|
/*
|
|
|
|
* Assignment 1 - File Splitter
|
|
|
|
* Claudio Maggioni
|
|
|
|
*
|
|
|
|
* This code was written by using documentation on man(3) and cplusplus.com (C
|
|
|
|
* standard library section).
|
|
|
|
*
|
|
|
|
* The flag VERBOSE_ERRORS, if defined, activates proper error message handling
|
|
|
|
* and printing on stderr. This is disabled to comply with tests.
|
|
|
|
*/
|
2019-10-18 07:43:32 +00:00
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2019-10-23 14:03:59 +00:00
|
|
|
#include <libgen.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Maximum number of part files expected, as defined by the assignment. By
|
|
|
|
* giving a size parameter too small thus having a number of part files greater
|
|
|
|
* than this may cause buffer overflows while handling part file names.
|
|
|
|
*/
|
|
|
|
const unsigned int MAX_SPLIT = 1024;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Size of the byte buffer used in merge to read part file. Increase or decrease
|
|
|
|
* at will.
|
|
|
|
*/
|
2019-10-18 07:43:32 +00:00
|
|
|
const int BUFFER_SIZE = 1024;
|
|
|
|
|
2019-10-23 14:03:59 +00:00
|
|
|
int digits(int);
|
2019-10-18 07:43:32 +00:00
|
|
|
int usage();
|
|
|
|
int split(const char* filename, long max_size);
|
2019-10-23 14:03:59 +00:00
|
|
|
int merge(char* index, const char* merged);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute an appoximate figure of the number of decimal digits in an unsigned
|
|
|
|
* int by using an approximation of log base 8 (faster than log_10). Used only
|
|
|
|
* to compute the mamimum length of part filenames.
|
|
|
|
*/
|
|
|
|
inline int digits(int x) {
|
|
|
|
return (x ? (1 + digits(x << 3)) : 1);
|
|
|
|
}
|
2019-10-18 07:43:32 +00:00
|
|
|
|
2019-10-23 14:03:59 +00:00
|
|
|
/*
|
|
|
|
* Print usage message on stderr if VERBOSE_ERRORS is enabled.
|
|
|
|
*/
|
2019-10-18 07:43:32 +00:00
|
|
|
inline int usage(const char* s) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
fprintf(stderr, "Usage: %s split [filename] [max-size]\n"
|
|
|
|
" %s merge [index] [merged]\n", s, s);
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
|
|
|
return 64;
|
2019-10-18 07:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char** const argv) {
|
|
|
|
if (argc != 4) return usage(argv[0]);
|
|
|
|
if (strcmp(argv[1], "split") == 0) {
|
|
|
|
long max = strtol(argv[3], NULL, 10);
|
|
|
|
if (max <= 0) {
|
|
|
|
return usage(argv[0]);
|
|
|
|
}
|
|
|
|
return split(argv[2], max);
|
|
|
|
} else if (strcmp(argv[1], "merge") == 0) {
|
|
|
|
return merge(argv[2], argv[3]);
|
|
|
|
} else {
|
|
|
|
return usage(argv[0]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-23 14:03:59 +00:00
|
|
|
/*
|
|
|
|
* Compute checksum on part files by using XOR. An initial value must be given
|
|
|
|
* in order to support computation of the checksum in chunks.
|
|
|
|
*/
|
2019-10-18 07:43:32 +00:00
|
|
|
unsigned char checksum(unsigned char start,
|
|
|
|
const unsigned char* buffer, long size) {
|
|
|
|
unsigned char sum = start;
|
|
|
|
for (long i = 0; i < size; i++) {
|
|
|
|
sum ^= buffer[i];
|
|
|
|
}
|
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
|
2019-10-23 14:03:59 +00:00
|
|
|
/*
|
|
|
|
* Entire of split subcommand. Returns the exit status code.
|
|
|
|
*/
|
2019-10-18 07:43:32 +00:00
|
|
|
int split(const char* filename, long max_size) {
|
|
|
|
FILE* source = fopen(filename, "r");
|
|
|
|
if (source == NULL) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
perror("Cannot open source file");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
2019-10-18 07:43:32 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
const int fnlen = strlen(filename);
|
2019-10-23 14:03:59 +00:00
|
|
|
|
|
|
|
// Compute the mamimum filename length and allocate a VLA to match that
|
|
|
|
char index_name[fnlen + strlen(".part") + digits(MAX_SPLIT) + 1];
|
|
|
|
|
2019-10-18 07:43:32 +00:00
|
|
|
sprintf(index_name, "%s.index", filename);
|
|
|
|
FILE* index = fopen(index_name, "wx");
|
|
|
|
|
|
|
|
if (index == NULL) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
perror("Cannot create index file");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
2019-10-18 07:43:32 +00:00
|
|
|
fclose(source);
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
char part_name[fnlen + 10];
|
|
|
|
unsigned char buffer[BUFFER_SIZE];
|
|
|
|
|
|
|
|
for(int part = 1; !feof(source); part++) {
|
|
|
|
// assign file name to part
|
|
|
|
sprintf(part_name, "%s.part%d", filename, part);
|
|
|
|
|
2019-10-20 12:38:25 +00:00
|
|
|
FILE* part_file = NULL;
|
2019-10-18 07:43:32 +00:00
|
|
|
unsigned char sum = 0x00;
|
|
|
|
|
2019-10-23 14:03:59 +00:00
|
|
|
int first = part == 1;
|
|
|
|
for (long i = max_size; i > 0 && (!feof(source) || first);) {
|
|
|
|
|
2019-10-18 07:43:32 +00:00
|
|
|
const long to_read = i > BUFFER_SIZE ? BUFFER_SIZE : i;
|
|
|
|
const long bytes = fread(buffer, 1, to_read, source);
|
|
|
|
|
2019-10-20 12:38:25 +00:00
|
|
|
if (ferror(source)) {
|
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
perror("Cannot read source file");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
2019-10-18 07:43:32 +00:00
|
|
|
fclose(source);
|
|
|
|
fclose(index);
|
|
|
|
return 3;
|
2019-10-23 14:03:59 +00:00
|
|
|
} else if (bytes == 0 && part > 1) {
|
2019-10-20 12:38:25 +00:00
|
|
|
goto all_read;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (part_file == NULL) {
|
|
|
|
part_file = fopen(part_name, "wx");
|
|
|
|
|
|
|
|
if (part_file == NULL) {
|
|
|
|
#ifdef VERBOSE_ERRORS
|
|
|
|
perror("Cannot create part file");
|
|
|
|
#endif
|
|
|
|
fclose(source);
|
|
|
|
fclose(index);
|
|
|
|
return 4;
|
|
|
|
}
|
2019-10-18 07:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
sum = checksum(sum, buffer, bytes);
|
|
|
|
|
|
|
|
const long w_bytes = fwrite(buffer, 1, bytes, part_file);
|
|
|
|
|
2019-10-23 14:03:59 +00:00
|
|
|
if (w_bytes == 0 && !first) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
perror("Cannot write on part file");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
2019-10-18 07:43:32 +00:00
|
|
|
fclose(source);
|
|
|
|
fclose(index);
|
|
|
|
fclose(part_file);
|
|
|
|
return 5;
|
|
|
|
}
|
|
|
|
|
|
|
|
i -= bytes;
|
2019-10-23 14:03:59 +00:00
|
|
|
first = 0;
|
2019-10-18 07:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (fprintf(index, "%s %02x\n", part_name, sum) < 0) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
fprintf(stderr, "Cannot update index file");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
2019-10-18 07:43:32 +00:00
|
|
|
fclose(source);
|
|
|
|
fclose(index);
|
|
|
|
fclose(part_file);
|
|
|
|
return 6;
|
|
|
|
}
|
|
|
|
|
|
|
|
fclose(part_file);
|
|
|
|
}
|
|
|
|
|
2019-10-20 12:38:25 +00:00
|
|
|
all_read:
|
2019-10-18 07:43:32 +00:00
|
|
|
fclose(source);
|
|
|
|
fclose(index);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-23 14:03:59 +00:00
|
|
|
int merge(char* n_index, const char* n_merged) {
|
2019-10-18 07:43:32 +00:00
|
|
|
FILE* index = fopen(n_index, "r");
|
|
|
|
|
|
|
|
if (index == NULL) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
perror("Cannot open index file");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
2019-10-18 07:43:32 +00:00
|
|
|
return 7;
|
|
|
|
}
|
|
|
|
|
|
|
|
FILE* merged = fopen(n_merged, "wx");
|
|
|
|
|
|
|
|
if (merged == NULL) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
perror("Cannot create merged file");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
2019-10-18 07:43:32 +00:00
|
|
|
fclose(index);
|
|
|
|
return 8;
|
|
|
|
}
|
|
|
|
|
2019-10-23 14:03:59 +00:00
|
|
|
// Allocate enough memory to store the full path of each part
|
|
|
|
// We don't care about the .part suffix since the file name length contains
|
|
|
|
// .index which is already longer. VLAs again btw
|
|
|
|
char part_name[strlen(n_index) + digits(MAX_SPLIT) + 1];
|
|
|
|
|
2019-10-20 12:38:25 +00:00
|
|
|
unsigned int n_part = 1;
|
2019-10-18 07:43:32 +00:00
|
|
|
|
|
|
|
while(!feof(index)) {
|
|
|
|
unsigned int sum;
|
2019-10-23 14:03:59 +00:00
|
|
|
|
2019-10-18 07:43:32 +00:00
|
|
|
fscanf(index, "%s %x\n", part_name, &sum);
|
|
|
|
|
|
|
|
FILE* part = fopen(part_name, "r");
|
|
|
|
|
|
|
|
if (part == NULL) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
perror("Cannot open part");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
2019-10-18 07:43:32 +00:00
|
|
|
fclose(index);
|
|
|
|
fclose(merged);
|
|
|
|
return 9;
|
|
|
|
}
|
|
|
|
unsigned char check = 0x00;
|
|
|
|
while (!feof(part)) {
|
|
|
|
unsigned char buffer[1000];
|
|
|
|
long bytes = fread(buffer, 1, 1000, part);
|
|
|
|
|
|
|
|
if (ferror(part)) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
perror("Cannot read part");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
2019-10-18 07:43:32 +00:00
|
|
|
fclose(index);
|
|
|
|
fclose(merged);
|
|
|
|
fclose(part);
|
|
|
|
return 10;
|
|
|
|
}
|
|
|
|
|
2019-10-20 12:38:25 +00:00
|
|
|
if (bytes == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-10-18 07:43:32 +00:00
|
|
|
check = checksum(check, buffer, bytes);
|
|
|
|
fwrite(buffer, 1, bytes, merged);
|
|
|
|
|
|
|
|
if (ferror(part)) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
perror("Cannot wrtite on merged");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
|
|
|
|
2019-10-18 07:43:32 +00:00
|
|
|
fclose(index);
|
|
|
|
fclose(merged);
|
|
|
|
fclose(part);
|
|
|
|
return 11;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fclose(part);
|
|
|
|
|
|
|
|
if ((unsigned char) sum != check) {
|
2019-10-20 12:38:25 +00:00
|
|
|
#ifdef VERBOSE_ERRORS
|
2019-10-18 07:43:32 +00:00
|
|
|
perror("Checksums differ for part");
|
2019-10-20 12:38:25 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
printf("%u\n", n_part);
|
2019-10-18 07:43:32 +00:00
|
|
|
fclose(index);
|
|
|
|
fclose(merged);
|
|
|
|
return 12;
|
|
|
|
}
|
2019-10-20 12:38:25 +00:00
|
|
|
|
|
|
|
n_part++;
|
2019-10-18 07:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fclose(index);
|
|
|
|
fclose(merged);
|
|
|
|
return 0;
|
|
|
|
}
|