From a8dd1530b08624e28d1e34a666a723ebb740507f Mon Sep 17 00:00:00 2001
From: Sven Warris <sven@ubuntu>
Date: Thu, 15 Jun 2023 00:54:39 +0200
Subject: [PATCH] Full processing of F500 ISA JSON to FAIRDOM

---
 f500/collecting/F500.py    |   2 +
 f500/collecting/Fairdom.py | 100 +++++++++++++++++++++++++++++++++++--
 2 files changed, 97 insertions(+), 5 deletions(-)

diff --git a/f500/collecting/F500.py b/f500/collecting/F500.py
index e98fc91..bf95228 100644
--- a/f500/collecting/F500.py
+++ b/f500/collecting/F500.py
@@ -133,6 +133,8 @@ class F500:
                                help='Formal name of the organism, for example "Solanum tuberosum"')
         my_parser_upload.add_argument('-p', '--project', required = True,
                                help='Project ID in the FAIRDOM-seek platform')
+        my_parser_upload.add_argument('-s', '--sample_type', required = True,
+                               help='Sample_type ID in the FAIRDOM-seek platform')
 
              
         # Execute the parse_args() method
diff --git a/f500/collecting/Fairdom.py b/f500/collecting/Fairdom.py
index 6c4512b..c4456db 100644
--- a/f500/collecting/Fairdom.py
+++ b/f500/collecting/Fairdom.py
@@ -62,14 +62,61 @@ class Fairdom:
         assayJSON['data']['relationships'] = {}
         assayJSON['data']['relationships']['study'] = {}
         assayJSON['data']['relationships']['study']['data'] = {'id' : studyID, 'type' : 'studies'}
+        assayJSON['data']['relationships']['organisms'] = {}
+        assayJSON['data']['relationships']['organisms']['data'] = [{'id' : str(self.args.organism), 'type' : 'organisms'}]
         return assayJSON
     
+    def createDataFileJSON(self, data_file):
+        data_fileJSON = {}
+        data_fileJSON['data'] = {}
+        data_fileJSON['data']['type'] = 'data_files'
+        data_fileJSON['data']['attributes'] = {}
+        data_fileJSON['data']['attributes']['title'] = data_file.filename
+        data_fileJSON['data']['attributes']['content_blobs'] = [{'url': 'https://www.wur.nl/upload/854757ab-168f-46d7-b415-f8b501eebaa5_WUR_RGB_standard_2021-site.svg', 
+                                                                'original_filename': data_file.filename,
+                                                                'content-type': 'image/svg+xml'}]
+        data_fileJSON['data']['relationships'] = {}
+        data_fileJSON['data']['relationships']['projects'] = {}
+        data_fileJSON['data']['relationships']['projects']['data'] = [{'id' : self.args.project , 'type' : 'projects'}]
+        return data_fileJSON
+    
+    def addSampleToAssayJSON(self, sampleID, assayJSON):
+        if 'samples' not in assayJSON['data']['relationships']:
+            assayJSON['data']['relationships']['samples'] = {} 
+            assayJSON['data']['relationships']['samples']['data'] = []
+        assayJSON['data']['relationships']['samples']['data'].append({'id': sampleID, 'type': 'samples'})
+
+    def addDataFileToAssayJSON(self, data_fileID, assayJSON):
+        if 'data_files' not in assayJSON['data']['relationships']:
+            assayJSON['data']['relationships']['data_files'] = {} 
+            assayJSON['data']['relationships']['data_files']['data'] = []
+        assayJSON['data']['relationships']['data_files']['data'].append({'id': data_fileID, 'type': 'data_files'})
+
+    def addDataFilesToSampleJSON(self, assayJSON, sampleJSON):
+        if 'data_files' not in sampleJSON['data']['relationships']:
+            sampleJSON['data']['relationships']['data_files'] = []
+        sampleJSON['data']['relationships']['data_files'].extend(assayJSON['data']['relationships']['data_files'])
+
+    
+    def createSampleJSON(self, sample):
+        sampleJSON = {}
+        sampleJSON['data'] = {}
+        sampleJSON['data']['type'] = 'samples'
+        sampleJSON['data']['attributes'] = {}
+        sampleJSON['data']['attributes']['title'] = sample.name
+        sampleJSON['data']['attributes']['attribute_map'] = {'PotID' : sample.name}
+        sampleJSON['data']['relationships'] = {}
+        sampleJSON['data']['relationships']['projects'] = {}
+        sampleJSON['data']['relationships']['projects']['data'] = [{'id' : self.args.project, 'type' : 'projects'}]
+        sampleJSON['data']['relationships']['sample_type'] = {}
+        sampleJSON['data']['relationships']['sample_type']['data'] = {'id' : self.args.sample_type, 'type' : 'sample_types'}        
+        return sampleJSON
+    
     def upload(self):
         # create investigation
         investigationJSON = self.createInvestigationJSON()
         self.logger.info("Creating investigation in FAIRDOM at {}".format(self.args.URL))
         r = self.session.post(self.args.URL + '/investigations', json=investigationJSON)
-        r.raise_for_status()
         if r.status_code == 201 or r.status_code == 200:
             investigationID = r.json()['data']['id']
             self.logger.info("Investigation id {} created. Status: {}".format(investigationID, r.status_code))
@@ -78,9 +125,10 @@ class Fairdom:
             exit(1)
     
         for study in self.investigation.studies:
+            self.currentAssays = {}
+            self.samples = {}
             studyJSON = self.createStudyJSON(study, investigationID)
             r = self.session.post(self.args.URL + '/studies', json=studyJSON)
-            r.raise_for_status()
             if r.status_code == 201 or r.status_code == 200:
                 studyID = r.json()['data']['id']
                 self.logger.info("Study id {} created. Status: {}".format(studyID, r.status_code))
@@ -88,10 +136,52 @@ class Fairdom:
                 self.logger.error("Could not create new study, error code {}".format(r.status_code))
                 exit(1)
             for assay in study.assays:
-                assayJSON = self.createAssayJSON(assay, studyID)
-                self.logger.info(assayJSON)
+                self.datafiles = {}
+                # Assays have none-unique names for now
+                assay.filename = assay.filename.split("T")[0] # only per day for now
+                if assay.filename not in self.currentAssays:
+                    self.currentAssays[assay.filename] = self.createAssayJSON(assay, studyID)
+                assayJSON = self.currentAssays[assay.filename]
+                # create add data files
+                for data_file in assay.data_files:
+                    if data_file.filename not in self.datafiles:
+                        self.datafiles[data_file.filename] = self.createDataFileJSON(data_file)
+                        r = self.session.post(self.args.URL + '/data_files', json=self.datafiles[data_file.filename])
+                        if r.status_code == 201 or r.status_code == 200:
+                            data_fileID = r.json()['data']['id']
+                            self.logger.info("Data file id {} created ({}). Status: {}".format(data_fileID, data_file.filename, r.status_code))
+                        else: 
+                            self.logger.error("Could not create new data file, error code {}".format(r.status_code))
+                            exit(1)
+                        self.datafiles[data_file.filename]['id'] = data_fileID
+                    data_fileJSON = self.datafiles[data_file.filename]
+                    self.addDataFileToAssayJSON(data_fileID, assayJSON)
+                        
+                # create / register sample
+                for sample in assay.samples:
+                    if sample.name not in self.samples:
+                        self.samples[sample.name] = self.createSampleJSON(sample)
+                    
+                    sampleJSON = self.samples[sample.name]
+                    self.addDataFilesToSampleJSON(assayJSON, sampleJSON)
+            for assay in study.assays:
+                for sample in assay.samples:
+                    if 'id' not in self.samples[sample.name]:                
+                        r = self.session.post(self.args.URL + '/samples', json=self.samples[sample.name])
+                        if r.status_code == 201 or r.status_code == 200:
+                            sampleID = r.json()['data']['id']
+                            self.samples[sample.name]['id'] = sampleID
+                            self.logger.info("Sample id {} created ({}). Status: {}".format(sampleID, sample.name, r.status_code))
+                        else: 
+                            self.logger.error("Could not create new sample, error code {}".format(r.status_code))
+                            exit(1)
+                    sampleID = self.samples[sample.name]['id']
+                    self.addSampleToAssayJSON(sampleID, self.currentAssays[assay.filename] )
+
+            for assay in self.currentAssays:
+                assayJSON = self.currentAssays[assay]
+                #self.logger.info(assayJSON)
                 r = self.session.post(self.args.URL + '/assays', json=assayJSON)
-                r.raise_for_status()
                 if r.status_code == 201 or r.status_code == 200:
                     assayID = r.json()['data']['id']
                     self.logger.info("Assay id {} created. Status: {}".format(assayID, r.status_code))
-- 
GitLab