From 4fff8ef2f348aaf8bb3b1ba3a18064c13dc7cf28 Mon Sep 17 00:00:00 2001 From: "Felix A. Croes" <felix@dworkin.nl> Date: Mon, 20 Sep 2021 16:47:04 +0200 Subject: [PATCH] Document Archive.hh. --- src/Archive.hh | 241 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 170 insertions(+), 71 deletions(-) diff --git a/src/Archive.hh b/src/Archive.hh index 58d26c8..8c3b70e 100644 --- a/src/Archive.hh +++ b/src/Archive.hh @@ -18,15 +18,21 @@ #define A_BUFSIZE (1024 * 1024) #define A_BLOCKSIZE ((size_t) 8192) +/* + * libarchive for iRODS + */ class Archive { struct Data { - rsComm_t *rsComm; - const char *name; - const char *resource; - int index; - char buf[A_BUFSIZE]; + rsComm_t *rsComm; /* iRODS context */ + const char *name; /* name of file to open */ + const char *resource; /* resource to create the file on */ + int index; /* file index */ + char buf[A_BUFSIZE]; /* buffer for reading */ }; + /* + * archive constructor + */ Archive(struct archive *archive, Data *data, bool creating, json_t *list, size_t dataSize, std::string &path, std::string &collection, const char *resc, std::string indexString) : @@ -52,6 +58,10 @@ public: struct archive *a; Data *data; + /* + * Create archive, determine format and compression mode based on + * the name: archive.tar, archive.zip, archive.tar.gz + */ a = archive_write_new(); if (a == NULL) { return NULL; @@ -72,6 +82,9 @@ public: return NULL; } + /* + * archive was created, call the constructor + */ return new Archive(a, data, true, json_array(), 0, path, collection, resc, ""); } @@ -90,6 +103,9 @@ public: std::string origin; Archive *archive; + /* + * open any archive + */ a = archive_read_new(); if (a == NULL) { return NULL; @@ -107,13 +123,18 @@ public: return NULL; } - // read INDEX.json + /* + * the archive must have INDEX.json as its first entry + */ if (strcmp(archive_entry_pathname(entry), "INDEX.json") != 0) { delete data; archive_read_free(a); return NULL; } + /* + * retrieve and load INDEX.json + */ size = (size_t) archive_entry_size(entry); buf = new char[size + 1]; buf[size] = '\0'; @@ -125,19 +146,27 @@ public: return NULL; } - // get list of items from json + /* + * obtain list of items from INDEX.json + */ origin = json_string_value(json_object_get(json, "collection")); size = (size_t) json_integer_value(json_object_get(json, "size")); list = json_object_get(json, "items"); json_incref(list); json_decref(json); + /* + * safe to call the constructor + */ archive = new Archive(a, data, false, list, size, path, origin, resc, buf); delete buf; return archive; } + /* + * destruct archive, cleaning up if needed + */ ~Archive() { if (archive != NULL) { if (creating) { @@ -151,6 +180,10 @@ public: delete data; } + /* + * Add a DataObj to an archive. It will be added to the index at first, + * the actual archive will be created when construct() is called. + */ void addDataObj(std::string name, size_t size, time_t created, time_t modified, std::string owner, std::string zone, std::string checksum, json_t *attributes) { @@ -178,6 +211,10 @@ public: } } + /* + * Add a collection to an archive. It will be added to the index at first, + * the actual archive will be created when construct() is called. + */ void addColl(std::string name, time_t created, time_t modified, std::string owner, std::string zone, json_t *attributes) { json_t *json; @@ -195,63 +232,18 @@ public: json_array_append_new(list, json); } - std::string indexItems() { - return indexString; - } - - size_t size() { - return dataSize; - } - - json_t *nextItem() { - // get next item (potentially skipping current) from archive - if (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { - return json_array_get(list, index++); - } else { - return NULL; - } - } - - int extractItem(std::string filename) { - // extract current object - if (archive_entry_filetype(entry) == AE_IFDIR) { - collInp_t collCreateInp; - - memset(&collCreateInp, '\0', sizeof(collInp_t)); - rstrcpy(collCreateInp.collName, filename.c_str(), MAX_NAME_LEN); - return rsCollCreate(data->rsComm, &collCreateInp); - } else { - char buf[A_BUFSIZE]; - int fd, status; - la_ssize_t len; - - fd = _creat(data->rsComm, filename.c_str(), data->resource); - if (fd < 0) { - return fd; - } - while ((len=archive_read_data(archive, buf, sizeof(buf))) > 0) { - status = _write(data->rsComm, fd, buf, (size_t) len); - if (status < 0) { - _close(data->rsComm, fd); - return status; - } - } - if (len < 0) { - _close(data->rsComm, fd); - return SYS_TAR_EXTRACT_ALL_ERR; - } - return _close(data->rsComm, fd); - } - - return 0; - } - + /* + * construct an archive from the index, return status + */ int construct() { if (creating) { json_t *json; char *str; la_ssize_t len; + /* + * first entry, INDEX.json + */ json = json_object(); json_object_set_new(json, "collection", json_string(origin.c_str())); @@ -274,6 +266,9 @@ public: } free(str); + /* + * now add the DataObjs and collections + */ for (index = 0; index < json_array_size(list); index++) { const char *filename; int fd; @@ -288,12 +283,18 @@ public: archive_entry_set_mtime(entry, mtime, 0); if (strcmp(json_string_value(json_object_get(json, "type")), "coll") == 0) { + /* + * collection + */ archive_entry_set_filetype(entry, AE_IFDIR); archive_entry_set_perm(entry, 0750); if (archive_write_header(archive, entry) < 0) { return SYS_TAR_APPEND_ERR; } } else { + /* + * DataObj + */ archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_perm(entry, 0600); size = (size_t) json_integer_value(json_object_get(json, @@ -329,9 +330,80 @@ public: return 0; } + /* + * return INDEX.json as a string + */ + std::string indexItems() { + return indexString; + } + + /* + * return size in blocks of items once extracted + */ + size_t size() { + return dataSize; + } + + /* + * get metadata of next item (potentially skipping current) from archive + */ + json_t *nextItem() { + if (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { + return json_array_get(list, index++); + } else { + return NULL; + } + } + + /* + * extract current item under the given filename + */ + int extractItem(std::string filename) { + if (archive_entry_filetype(entry) == AE_IFDIR) { + collInp_t collCreateInp; + + /* + * collection + */ + memset(&collCreateInp, '\0', sizeof(collInp_t)); + rstrcpy(collCreateInp.collName, filename.c_str(), MAX_NAME_LEN); + addKeyVal(&collCreateInp.condInput, RECURSIVE_OPR__KW, ""); + return rsCollCreate(data->rsComm, &collCreateInp); + } else { + char buf[A_BUFSIZE]; + int fd, status; + la_ssize_t len; + + /* + * DataObj + */ + fd = _creat(data->rsComm, filename.c_str(), data->resource); + if (fd < 0) { + return fd; + } + while ((len=archive_read_data(archive, buf, sizeof(buf))) > 0) { + status = _write(data->rsComm, fd, buf, (size_t) len); + if (status < 0) { + _close(data->rsComm, fd); + return status; + } + } + if (len < 0) { + _close(data->rsComm, fd); + return SYS_TAR_EXTRACT_ALL_ERR; + } + return _close(data->rsComm, fd); + } + + return 0; + } + private: - static int _creat(rsComm_t *rsComm, const char *name, - const char *resource) { + /* + * create an iRODS DataObj + */ + static int _creat(rsComm_t *rsComm, const char *name, const char *resource) + { dataObjInp_t input; memset(&input, '\0', sizeof(dataObjInp_t)); @@ -344,6 +416,9 @@ private: return rsDataObjCreate(rsComm, &input); } + /* + * open an iRODS DataObj + */ static int _open(rsComm_t *rsComm, const char *name) { dataObjInp_t input; @@ -353,6 +428,9 @@ private: return rsDataObjOpen(rsComm, &input); } + /* + * read an iRODS DataObj + */ static int _read(rsComm_t *rsComm, int index, void *buf, size_t len) { openedDataObjInp_t input; bytesBuf_t rbuf; @@ -365,6 +443,9 @@ private: return rsDataObjRead(rsComm, &input, &rbuf); } + /* + * write to an iRODS DataObj + */ static int _write(rsComm_t *rsComm, int index, const void *buf, size_t len) { openedDataObjInp_t input; @@ -378,6 +459,9 @@ private: return rsDataObjWrite(rsComm, &input, &wbuf); } + /* + * close an iRODS DatObj + */ static int _close(rsComm_t *rsComm, int index) { openedDataObjInp_t input; @@ -386,6 +470,9 @@ private: return rsDataObjClose(rsComm, &input); } + /* + * libarchive wrapper for _creat() + */ static int a_creat(struct archive *a, void *data) { Data *d; @@ -394,6 +481,9 @@ private: return (d->index >= 0) ? ARCHIVE_OK : ARCHIVE_FATAL; } + /* + * libarchive wrapper for _open() + */ static int a_open(struct archive *a, void *data) { Data *d; @@ -402,6 +492,9 @@ private: return (d->index >= 0) ? ARCHIVE_OK : ARCHIVE_FATAL; } + /* + * libarchive wrapper for _read() + */ static la_ssize_t a_read(struct archive *a, void *data, const void **buf) { Data *d; la_ssize_t status; @@ -416,8 +509,11 @@ private: } } + /* + * libarchive wrapper for _write() + */ static la_ssize_t a_write(struct archive *a, void *data, const void *buf, - size_t size) { + size_t size) { Data *d; la_ssize_t status; @@ -430,6 +526,9 @@ private: } } + /* + * libarchive wrapper for _close() + */ static int a_close(struct archive *a, void *data) { Data *d; @@ -437,14 +536,14 @@ private: return (d->index < 0 || _close(d->rsComm, d->index) < 0) ? -1 : 0; } - struct archive *archive; // libarchive reference - struct archive_entry *entry;// archive entry - Data *data; - bool creating; // new archive? - json_t *list; // list of items - size_t index; // item index - size_t dataSize; // total size of archived data objects - std::string path; // path of archive - std::string origin; // original collection - std::string indexString; + struct archive *archive; /* libarchive reference */ + struct archive_entry *entry; /* archive entry */ + Data *data; /* context data */ + bool creating; /* new archive? */ + json_t *list; /* list of items */ + size_t index; /* index of current item */ + size_t dataSize; /* total size of archived DataObjs */ + std::string path; /* path of archive */ + std::string origin; /* original collection */ + std::string indexString; /* index as a string */ }; -- GitLab