This repository has been archived on 2021-10-31. You can view files and clone it, but cannot push or open issues or pull requests.
sys_prog/splitter/splitter.c

285 lines
6.3 KiB
C

// vim: set ts=2 sw=2 et tw=80:
/*
* Assignment 1 - File Splitter
* Claudio Maggioni
*
* This code was written by using documentation on man(3) and cplusplus.com (C
* standard library section).
*
* The flag VERBOSE_ERRORS, if defined, activates proper error message handling
* and printing on stderr. This is disabled to comply with tests.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libgen.h>
/*
* Maximum number of part files expected, as defined by the assignment. By
* giving a size parameter too small thus having a number of part files greater
* than this may cause buffer overflows while handling part file names.
*/
const unsigned int MAX_SPLIT = 1024;
/*
* Size of the byte buffer used in merge to read part file. Increase or decrease
* at will.
*/
const int BUFFER_SIZE = 1024;
int digits(int);
int usage();
int split(const char* filename, long max_size);
int merge(char* index, const char* merged);
/**
* Compute an appoximate figure of the number of decimal digits in an unsigned
* int by using an approximation of log base 8 (faster than log_10). Used only
* to compute the mamimum length of part filenames.
*/
inline int digits(int x) {
return (x ? (1 + digits(x << 3)) : 1);
}
/*
* Print usage message on stderr if VERBOSE_ERRORS is enabled.
*/
inline int usage(const char* s) {
#ifdef VERBOSE_ERRORS
fprintf(stderr, "Usage: %s split [filename] [max-size]\n"
" %s merge [index] [merged]\n", s, s);
#endif
return 64;
}
int main(int argc, char** const argv) {
if (argc != 4) return usage(argv[0]);
if (strcmp(argv[1], "split") == 0) {
long max = strtol(argv[3], NULL, 10);
if (max <= 0) {
return usage(argv[0]);
}
return split(argv[2], max);
} else if (strcmp(argv[1], "merge") == 0) {
return merge(argv[2], argv[3]);
} else {
return usage(argv[0]);
}
}
/*
* Compute checksum on part files by using XOR. An initial value must be given
* in order to support computation of the checksum in chunks.
*/
unsigned char checksum(unsigned char start,
const unsigned char* buffer, long size) {
unsigned char sum = start;
for (long i = 0; i < size; i++) {
sum ^= buffer[i];
}
return sum;
}
/*
* Entire of split subcommand. Returns the exit status code.
*/
int split(const char* filename, long max_size) {
FILE* source = fopen(filename, "r");
if (source == NULL) {
#ifdef VERBOSE_ERRORS
perror("Cannot open source file");
#endif
return 1;
}
const int fnlen = strlen(filename);
// Compute the mamimum filename length and allocate a VLA to match that
char index_name[fnlen + strlen(".part") + digits(MAX_SPLIT) + 1];
sprintf(index_name, "%s.index", filename);
FILE* index = fopen(index_name, "wx");
if (index == NULL) {
#ifdef VERBOSE_ERRORS
perror("Cannot create index file");
#endif
fclose(source);
return 2;
}
char part_name[fnlen + 10];
unsigned char buffer[BUFFER_SIZE];
for(int part = 1; !feof(source); part++) {
// assign file name to part
sprintf(part_name, "%s.part%d", filename, part);
FILE* part_file = NULL;
unsigned char sum = 0x00;
int first = part == 1;
for (long i = max_size; i > 0 && (!feof(source) || first);) {
const long to_read = i > BUFFER_SIZE ? BUFFER_SIZE : i;
const long bytes = fread(buffer, 1, to_read, source);
if (ferror(source)) {
#ifdef VERBOSE_ERRORS
perror("Cannot read source file");
#endif
fclose(source);
fclose(index);
return 3;
} else if (bytes == 0 && part > 1) {
goto all_read;
}
if (part_file == NULL) {
part_file = fopen(part_name, "wx");
if (part_file == NULL) {
#ifdef VERBOSE_ERRORS
perror("Cannot create part file");
#endif
fclose(source);
fclose(index);
return 4;
}
}
sum = checksum(sum, buffer, bytes);
const long w_bytes = fwrite(buffer, 1, bytes, part_file);
if (w_bytes == 0 && !first) {
#ifdef VERBOSE_ERRORS
perror("Cannot write on part file");
#endif
fclose(source);
fclose(index);
fclose(part_file);
return 5;
}
i -= bytes;
first = 0;
}
if (fprintf(index, "%s %02x\n", part_name, sum) < 0) {
#ifdef VERBOSE_ERRORS
fprintf(stderr, "Cannot update index file");
#endif
fclose(source);
fclose(index);
fclose(part_file);
return 6;
}
fclose(part_file);
}
all_read:
fclose(source);
fclose(index);
return 0;
}
int merge(char* n_index, const char* n_merged) {
FILE* index = fopen(n_index, "r");
if (index == NULL) {
#ifdef VERBOSE_ERRORS
perror("Cannot open index file");
#endif
return 7;
}
FILE* merged = fopen(n_merged, "wx");
if (merged == NULL) {
#ifdef VERBOSE_ERRORS
perror("Cannot create merged file");
#endif
fclose(index);
return 8;
}
// Allocate enough memory to store the full path of each part
// We don't care about the .part suffix since the file name length contains
// .index which is already longer. VLAs again btw
char part_name[strlen(n_index) + digits(MAX_SPLIT) + 1];
unsigned int n_part = 1;
while(!feof(index)) {
unsigned int sum;
fscanf(index, "%s %x\n", part_name, &sum);
FILE* part = fopen(part_name, "r");
if (part == NULL) {
#ifdef VERBOSE_ERRORS
perror("Cannot open part");
#endif
fclose(index);
fclose(merged);
return 9;
}
unsigned char check = 0x00;
while (!feof(part)) {
unsigned char buffer[1000];
long bytes = fread(buffer, 1, 1000, part);
if (ferror(part)) {
#ifdef VERBOSE_ERRORS
perror("Cannot read part");
#endif
fclose(index);
fclose(merged);
fclose(part);
return 10;
}
if (bytes == 0) {
break;
}
check = checksum(check, buffer, bytes);
fwrite(buffer, 1, bytes, merged);
if (ferror(part)) {
#ifdef VERBOSE_ERRORS
perror("Cannot wrtite on merged");
#endif
fclose(index);
fclose(merged);
fclose(part);
return 11;
}
}
fclose(part);
if ((unsigned char) sum != check) {
#ifdef VERBOSE_ERRORS
perror("Checksums differ for part");
#endif
printf("%u\n", n_part);
fclose(index);
fclose(merged);
return 12;
}
n_part++;
}
fclose(index);
fclose(merged);
return 0;
}