From 4fff8ef2f348aaf8bb3b1ba3a18064c13dc7cf28 Mon Sep 17 00:00:00 2001
From: "Felix A. Croes" <felix@dworkin.nl>
Date: Mon, 20 Sep 2021 16:47:04 +0200
Subject: [PATCH] Document Archive.hh.

---
 src/Archive.hh | 241 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 170 insertions(+), 71 deletions(-)

diff --git a/src/Archive.hh b/src/Archive.hh
index 58d26c8..8c3b70e 100644
--- a/src/Archive.hh
+++ b/src/Archive.hh
@@ -18,15 +18,21 @@
 #define A_BUFSIZE       (1024 * 1024)
 #define A_BLOCKSIZE     ((size_t) 8192)
 
+/*
+ * libarchive for iRODS
+ */
 class Archive {
     struct Data {
-        rsComm_t *rsComm;
-        const char *name;
-        const char *resource;
-        int index;
-        char buf[A_BUFSIZE];
+        rsComm_t *rsComm;       /* iRODS context */
+        const char *name;       /* name of file to open */
+        const char *resource;   /* resource to create the file on */
+        int index;              /* file index */
+        char buf[A_BUFSIZE];    /* buffer for reading */
     };
 
+    /*
+     * archive constructor
+     */
     Archive(struct archive *archive, Data *data, bool creating, json_t *list,
             size_t dataSize, std::string &path, std::string &collection,
             const char *resc, std::string indexString) :
@@ -52,6 +58,10 @@ public:
         struct archive *a;
         Data *data;
 
+        /*
+         * Create archive, determine format and compression mode based on
+         * the name: archive.tar, archive.zip, archive.tar.gz
+         */
         a = archive_write_new();
         if (a == NULL) {
             return NULL;
@@ -72,6 +82,9 @@ public:
             return NULL;
         }
 
+        /*
+         * archive was created, call the constructor
+         */
         return new Archive(a, data, true, json_array(), 0, path, collection,
                            resc, "");
     }
@@ -90,6 +103,9 @@ public:
         std::string origin;
         Archive *archive;
 
+        /*
+         * open any archive
+         */
         a = archive_read_new();
         if (a == NULL) {
             return NULL;
@@ -107,13 +123,18 @@ public:
             return NULL;
         }
 
-        // read INDEX.json
+        /*
+         * the archive must have INDEX.json as its first entry
+         */
         if (strcmp(archive_entry_pathname(entry), "INDEX.json") != 0) {
             delete data;
             archive_read_free(a);
             return NULL;
         }
 
+        /*
+         * retrieve and load INDEX.json
+         */
         size = (size_t) archive_entry_size(entry);
         buf = new char[size + 1];
         buf[size] = '\0';
@@ -125,19 +146,27 @@ public:
             return NULL;
         }
 
-        // get list of items from json
+        /*
+         * obtain list of items from INDEX.json
+         */
         origin = json_string_value(json_object_get(json, "collection"));
         size = (size_t) json_integer_value(json_object_get(json, "size"));
         list = json_object_get(json, "items");
         json_incref(list);
         json_decref(json);
 
+        /*
+         * safe to call the constructor
+         */
         archive = new Archive(a, data, false, list, size, path, origin, resc,
                               buf);
         delete buf;
         return archive;
     }
 
+    /*
+     * destruct archive, cleaning up if needed
+     */
     ~Archive() {
         if (archive != NULL) {
             if (creating) {
@@ -151,6 +180,10 @@ public:
         delete data;
     }
 
+    /*
+     * Add a DataObj to an archive.  It will be added to the index at first,
+     * the actual archive will be created when construct() is called.
+     */
     void addDataObj(std::string name, size_t size, time_t created,
                     time_t modified, std::string owner, std::string zone,
                     std::string checksum, json_t *attributes) {
@@ -178,6 +211,10 @@ public:
         }
     }
 
+    /*
+     * Add a collection to an archive.  It will be added to the index at first,
+     * the actual archive will be created when construct() is called.
+     */
     void addColl(std::string name, time_t created, time_t modified,
                  std::string owner, std::string zone, json_t *attributes) {
         json_t *json;
@@ -195,63 +232,18 @@ public:
         json_array_append_new(list, json);
     }
 
-    std::string indexItems() {
-        return indexString;
-    }
-
-    size_t size() {
-        return dataSize;
-    }
-
-    json_t *nextItem() {
-        // get next item (potentially skipping current) from archive
-        if (archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
-            return json_array_get(list, index++);
-        } else {
-            return NULL;
-        }
-    }
-
-    int extractItem(std::string filename) {
-        // extract current object
-        if (archive_entry_filetype(entry) == AE_IFDIR) {
-            collInp_t collCreateInp;
-
-            memset(&collCreateInp, '\0', sizeof(collInp_t));
-            rstrcpy(collCreateInp.collName, filename.c_str(), MAX_NAME_LEN);
-            return rsCollCreate(data->rsComm, &collCreateInp);
-        } else {
-            char buf[A_BUFSIZE];
-            int fd, status;
-            la_ssize_t len;
-
-            fd = _creat(data->rsComm, filename.c_str(), data->resource);
-            if (fd < 0) {
-                return fd;
-            }
-            while ((len=archive_read_data(archive, buf, sizeof(buf))) > 0) {
-                status = _write(data->rsComm, fd, buf, (size_t) len);
-                if (status < 0) {
-                    _close(data->rsComm, fd);
-                    return status;
-                }
-            }
-            if (len < 0) {
-                _close(data->rsComm, fd);
-                return SYS_TAR_EXTRACT_ALL_ERR;
-            }
-            return _close(data->rsComm, fd);
-        }
-
-        return 0;
-    }
-
+    /*
+     * construct an archive from the index, return status
+     */
     int construct() {
         if (creating) {
             json_t *json;
             char *str;
             la_ssize_t len;
 
+            /*
+             * first entry, INDEX.json
+             */
             json = json_object();
             json_object_set_new(json, "collection",
                                 json_string(origin.c_str()));
@@ -274,6 +266,9 @@ public:
             }
             free(str);
 
+            /*
+             * now add the DataObjs and collections
+             */
             for (index = 0; index < json_array_size(list); index++) {
                 const char *filename;
                 int fd;
@@ -288,12 +283,18 @@ public:
                 archive_entry_set_mtime(entry, mtime, 0);
                 if (strcmp(json_string_value(json_object_get(json, "type")),
                            "coll") == 0) {
+                    /*
+                     * collection
+                     */
                     archive_entry_set_filetype(entry, AE_IFDIR);
                     archive_entry_set_perm(entry, 0750);
                     if (archive_write_header(archive, entry) < 0) {
                         return SYS_TAR_APPEND_ERR;
                     }
                 } else {
+                    /*
+                     * DataObj
+                     */
                     archive_entry_set_filetype(entry, AE_IFREG);
                     archive_entry_set_perm(entry, 0600);
                     size = (size_t) json_integer_value(json_object_get(json,
@@ -329,9 +330,80 @@ public:
         return 0;
     }
 
+    /*
+     * return INDEX.json as a string
+     */
+    std::string indexItems() {
+        return indexString;
+    }
+
+    /*
+     * return size in blocks of items once extracted
+     */
+    size_t size() {
+        return dataSize;
+    }
+
+    /*
+     * get metadata of next item (potentially skipping current) from archive
+     */
+    json_t *nextItem() {
+        if (archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
+            return json_array_get(list, index++);
+        } else {
+            return NULL;
+        }
+    }
+
+    /*
+     * extract current item under the given filename
+     */
+    int extractItem(std::string filename) {
+        if (archive_entry_filetype(entry) == AE_IFDIR) {
+            collInp_t collCreateInp;
+
+            /*
+             * collection
+             */
+            memset(&collCreateInp, '\0', sizeof(collInp_t));
+            rstrcpy(collCreateInp.collName, filename.c_str(), MAX_NAME_LEN);
+            addKeyVal(&collCreateInp.condInput, RECURSIVE_OPR__KW, "");
+            return rsCollCreate(data->rsComm, &collCreateInp);
+        } else {
+            char buf[A_BUFSIZE];
+            int fd, status;
+            la_ssize_t len;
+
+            /*
+             * DataObj
+             */
+            fd = _creat(data->rsComm, filename.c_str(), data->resource);
+            if (fd < 0) {
+                return fd;
+            }
+            while ((len=archive_read_data(archive, buf, sizeof(buf))) > 0) {
+                status = _write(data->rsComm, fd, buf, (size_t) len);
+                if (status < 0) {
+                    _close(data->rsComm, fd);
+                    return status;
+                }
+            }
+            if (len < 0) {
+                _close(data->rsComm, fd);
+                return SYS_TAR_EXTRACT_ALL_ERR;
+            }
+            return _close(data->rsComm, fd);
+        }
+
+        return 0;
+    }
+
 private:
-    static int _creat(rsComm_t *rsComm, const char *name,
-                      const char *resource) {
+    /*
+     * create an iRODS DataObj
+     */
+    static int _creat(rsComm_t *rsComm, const char *name, const char *resource)
+    {
         dataObjInp_t input;
 
         memset(&input, '\0', sizeof(dataObjInp_t));
@@ -344,6 +416,9 @@ private:
         return rsDataObjCreate(rsComm, &input);
     }
 
+    /*
+     * open an iRODS DataObj
+     */
     static int _open(rsComm_t *rsComm, const char *name) {
         dataObjInp_t input;
 
@@ -353,6 +428,9 @@ private:
         return rsDataObjOpen(rsComm, &input);
     }
 
+    /*
+     * read an iRODS DataObj
+     */
     static int _read(rsComm_t *rsComm, int index, void *buf, size_t len) {
         openedDataObjInp_t input;
         bytesBuf_t rbuf;
@@ -365,6 +443,9 @@ private:
         return rsDataObjRead(rsComm, &input, &rbuf);
     }
 
+    /*
+     * write to an iRODS DataObj
+     */
     static int _write(rsComm_t *rsComm, int index, const void *buf, size_t len)
     {
         openedDataObjInp_t input;
@@ -378,6 +459,9 @@ private:
         return rsDataObjWrite(rsComm, &input, &wbuf);
     }
 
+    /*
+     * close an iRODS DatObj
+     */
     static int _close(rsComm_t *rsComm, int index) {
         openedDataObjInp_t input;
 
@@ -386,6 +470,9 @@ private:
         return rsDataObjClose(rsComm, &input);
     }
 
+    /*
+     * libarchive wrapper for _creat()
+     */
     static int a_creat(struct archive *a, void *data) {
         Data *d;
 
@@ -394,6 +481,9 @@ private:
         return (d->index >= 0) ? ARCHIVE_OK : ARCHIVE_FATAL;
     }
 
+    /*
+     * libarchive wrapper for _open()
+     */
     static int a_open(struct archive *a, void *data) {
         Data *d;
 
@@ -402,6 +492,9 @@ private:
         return (d->index >= 0) ? ARCHIVE_OK : ARCHIVE_FATAL;
     }
 
+    /*
+     * libarchive wrapper for _read()
+     */
     static la_ssize_t a_read(struct archive *a, void *data, const void **buf) {
         Data *d;
         la_ssize_t status;
@@ -416,8 +509,11 @@ private:
         }
     }
 
+    /*
+     * libarchive wrapper for _write()
+     */
     static la_ssize_t a_write(struct archive *a, void *data, const void *buf,
-                                     size_t size) {
+                              size_t size) {
         Data *d;
         la_ssize_t status;
 
@@ -430,6 +526,9 @@ private:
         }
     }
 
+    /*
+     * libarchive wrapper for _close()
+     */
     static int a_close(struct archive *a, void *data) {
         Data *d;
 
@@ -437,14 +536,14 @@ private:
         return (d->index < 0 || _close(d->rsComm, d->index) < 0) ? -1 : 0;
     }
 
-    struct archive *archive;    // libarchive reference
-    struct archive_entry *entry;// archive entry
-    Data *data;
-    bool creating;              // new archive?
-    json_t *list;               // list of items
-    size_t index;               // item index
-    size_t dataSize;            // total size of archived data objects
-    std::string path;           // path of archive
-    std::string origin;         // original collection
-    std::string indexString;
+    struct archive *archive;            /* libarchive reference */
+    struct archive_entry *entry;        /* archive entry */
+    Data *data;                         /* context data */
+    bool creating;                      /* new archive? */
+    json_t *list;                       /* list of items */
+    size_t index;                       /* index of current item */
+    size_t dataSize;                    /* total size of archived DataObjs */
+    std::string path;                   /* path of archive */
+    std::string origin;                 /* original collection */
+    std::string indexString;            /* index as a string */
 };
-- 
GitLab