From a8dd1530b08624e28d1e34a666a723ebb740507f Mon Sep 17 00:00:00 2001 From: Sven Warris <sven@ubuntu> Date: Thu, 15 Jun 2023 00:54:39 +0200 Subject: [PATCH] Full processing of F500 ISA JSON to FAIRDOM --- f500/collecting/F500.py | 2 + f500/collecting/Fairdom.py | 100 +++++++++++++++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 5 deletions(-) diff --git a/f500/collecting/F500.py b/f500/collecting/F500.py index e98fc91..bf95228 100644 --- a/f500/collecting/F500.py +++ b/f500/collecting/F500.py @@ -133,6 +133,8 @@ class F500: help='Formal name of the organism, for example "Solanum tuberosum"') my_parser_upload.add_argument('-p', '--project', required = True, help='Project ID in the FAIRDOM-seek platform') + my_parser_upload.add_argument('-s', '--sample_type', required = True, + help='Sample_type ID in the FAIRDOM-seek platform') # Execute the parse_args() method diff --git a/f500/collecting/Fairdom.py b/f500/collecting/Fairdom.py index 6c4512b..c4456db 100644 --- a/f500/collecting/Fairdom.py +++ b/f500/collecting/Fairdom.py @@ -62,14 +62,61 @@ class Fairdom: assayJSON['data']['relationships'] = {} assayJSON['data']['relationships']['study'] = {} assayJSON['data']['relationships']['study']['data'] = {'id' : studyID, 'type' : 'studies'} + assayJSON['data']['relationships']['organisms'] = {} + assayJSON['data']['relationships']['organisms']['data'] = [{'id' : str(self.args.organism), 'type' : 'organisms'}] return assayJSON + def createDataFileJSON(self, data_file): + data_fileJSON = {} + data_fileJSON['data'] = {} + data_fileJSON['data']['type'] = 'data_files' + data_fileJSON['data']['attributes'] = {} + data_fileJSON['data']['attributes']['title'] = data_file.filename + data_fileJSON['data']['attributes']['content_blobs'] = [{'url': 'https://www.wur.nl/upload/854757ab-168f-46d7-b415-f8b501eebaa5_WUR_RGB_standard_2021-site.svg', + 'original_filename': data_file.filename, + 'content-type': 'image/svg+xml'}] + data_fileJSON['data']['relationships'] = {} + data_fileJSON['data']['relationships']['projects'] = {} + data_fileJSON['data']['relationships']['projects']['data'] = [{'id' : self.args.project , 'type' : 'projects'}] + return data_fileJSON + + def addSampleToAssayJSON(self, sampleID, assayJSON): + if 'samples' not in assayJSON['data']['relationships']: + assayJSON['data']['relationships']['samples'] = {} + assayJSON['data']['relationships']['samples']['data'] = [] + assayJSON['data']['relationships']['samples']['data'].append({'id': sampleID, 'type': 'samples'}) + + def addDataFileToAssayJSON(self, data_fileID, assayJSON): + if 'data_files' not in assayJSON['data']['relationships']: + assayJSON['data']['relationships']['data_files'] = {} + assayJSON['data']['relationships']['data_files']['data'] = [] + assayJSON['data']['relationships']['data_files']['data'].append({'id': data_fileID, 'type': 'data_files'}) + + def addDataFilesToSampleJSON(self, assayJSON, sampleJSON): + if 'data_files' not in sampleJSON['data']['relationships']: + sampleJSON['data']['relationships']['data_files'] = [] + sampleJSON['data']['relationships']['data_files'].extend(assayJSON['data']['relationships']['data_files']) + + + def createSampleJSON(self, sample): + sampleJSON = {} + sampleJSON['data'] = {} + sampleJSON['data']['type'] = 'samples' + sampleJSON['data']['attributes'] = {} + sampleJSON['data']['attributes']['title'] = sample.name + sampleJSON['data']['attributes']['attribute_map'] = {'PotID' : sample.name} + sampleJSON['data']['relationships'] = {} + sampleJSON['data']['relationships']['projects'] = {} + sampleJSON['data']['relationships']['projects']['data'] = [{'id' : self.args.project, 'type' : 'projects'}] + sampleJSON['data']['relationships']['sample_type'] = {} + sampleJSON['data']['relationships']['sample_type']['data'] = {'id' : self.args.sample_type, 'type' : 'sample_types'} + return sampleJSON + def upload(self): # create investigation investigationJSON = self.createInvestigationJSON() self.logger.info("Creating investigation in FAIRDOM at {}".format(self.args.URL)) r = self.session.post(self.args.URL + '/investigations', json=investigationJSON) - r.raise_for_status() if r.status_code == 201 or r.status_code == 200: investigationID = r.json()['data']['id'] self.logger.info("Investigation id {} created. Status: {}".format(investigationID, r.status_code)) @@ -78,9 +125,10 @@ class Fairdom: exit(1) for study in self.investigation.studies: + self.currentAssays = {} + self.samples = {} studyJSON = self.createStudyJSON(study, investigationID) r = self.session.post(self.args.URL + '/studies', json=studyJSON) - r.raise_for_status() if r.status_code == 201 or r.status_code == 200: studyID = r.json()['data']['id'] self.logger.info("Study id {} created. Status: {}".format(studyID, r.status_code)) @@ -88,10 +136,52 @@ class Fairdom: self.logger.error("Could not create new study, error code {}".format(r.status_code)) exit(1) for assay in study.assays: - assayJSON = self.createAssayJSON(assay, studyID) - self.logger.info(assayJSON) + self.datafiles = {} + # Assays have none-unique names for now + assay.filename = assay.filename.split("T")[0] # only per day for now + if assay.filename not in self.currentAssays: + self.currentAssays[assay.filename] = self.createAssayJSON(assay, studyID) + assayJSON = self.currentAssays[assay.filename] + # create add data files + for data_file in assay.data_files: + if data_file.filename not in self.datafiles: + self.datafiles[data_file.filename] = self.createDataFileJSON(data_file) + r = self.session.post(self.args.URL + '/data_files', json=self.datafiles[data_file.filename]) + if r.status_code == 201 or r.status_code == 200: + data_fileID = r.json()['data']['id'] + self.logger.info("Data file id {} created ({}). Status: {}".format(data_fileID, data_file.filename, r.status_code)) + else: + self.logger.error("Could not create new data file, error code {}".format(r.status_code)) + exit(1) + self.datafiles[data_file.filename]['id'] = data_fileID + data_fileJSON = self.datafiles[data_file.filename] + self.addDataFileToAssayJSON(data_fileID, assayJSON) + + # create / register sample + for sample in assay.samples: + if sample.name not in self.samples: + self.samples[sample.name] = self.createSampleJSON(sample) + + sampleJSON = self.samples[sample.name] + self.addDataFilesToSampleJSON(assayJSON, sampleJSON) + for assay in study.assays: + for sample in assay.samples: + if 'id' not in self.samples[sample.name]: + r = self.session.post(self.args.URL + '/samples', json=self.samples[sample.name]) + if r.status_code == 201 or r.status_code == 200: + sampleID = r.json()['data']['id'] + self.samples[sample.name]['id'] = sampleID + self.logger.info("Sample id {} created ({}). Status: {}".format(sampleID, sample.name, r.status_code)) + else: + self.logger.error("Could not create new sample, error code {}".format(r.status_code)) + exit(1) + sampleID = self.samples[sample.name]['id'] + self.addSampleToAssayJSON(sampleID, self.currentAssays[assay.filename] ) + + for assay in self.currentAssays: + assayJSON = self.currentAssays[assay] + #self.logger.info(assayJSON) r = self.session.post(self.args.URL + '/assays', json=assayJSON) - r.raise_for_status() if r.status_code == 201 or r.status_code == 200: assayID = r.json()['data']['id'] self.logger.info("Assay id {} created. Status: {}".format(assayID, r.status_code)) -- GitLab