Skip to content
Snippets Groups Projects
Commit fd39af12 authored by Jasper Koehorst's avatar Jasper Koehorst
Browse files

error handling improved

parent 210c88d7
Branches
No related tags found
No related merge requests found
......@@ -65,7 +65,8 @@ while True:
for index, identifier in enumerate(record['IdList']):
folder = make_folder(identifier)
path = folder + "/" + identifier + ".xml"
if os.path.exists(path):
continue
if not os.path.exists(folder):
os.makedirs(folder)
identifiers.add(int(identifier))
......@@ -76,5 +77,5 @@ while True:
results = Parallel(n_jobs=num_cores)(delayed(processInput)(i) for i in identifiers)
# Stop when finished
sys.exit(0)
except TerminatedWorkerError as e:
except Exception as e:
pass
......@@ -5,10 +5,6 @@ import re
import xmltodict
import xlsxwriter
# GLOBAL PARAMETERS
MINIMUM_NUMBER_OF_SPOTS = 10000
def myprint(d):
"""
Prints key, value for a nested dictionary
......@@ -43,6 +39,9 @@ def machine_filter(doc):
:return:
"""
if 'EXPERIMENT_PACKAGE' not in doc['EXPERIMENT_PACKAGE_SET']: return False
if 'ILLUMINA' not in doc['EXPERIMENT_PACKAGE_SET']['EXPERIMENT_PACKAGE']['EXPERIMENT']['PLATFORM'].keys():
print('Platform is not illumina')
return False
......@@ -159,7 +158,8 @@ def setup_study(content):
study['StudyIdentifier'] = study_object['@accession']
study['alias'] = study_object['@alias']
study['StudyTitle'] = study_object['DESCRIPTOR']['STUDY_TITLE']
study['StudyDescription'] = study_object['DESCRIPTOR']['STUDY_ABSTRACT']
if 'STUDY_ABSTRACT' in study_object['DESCRIPTOR']:
study['StudyDescription'] = study_object['DESCRIPTOR']['STUDY_ABSTRACT']
if 'STUDY_TYPE' in study_object['DESCRIPTOR']:
study['type'] = study_object['DESCRIPTOR']['STUDY_TYPE']['@existing_study_type']
......@@ -229,10 +229,11 @@ def setup_observation_unit(content):
def setup_assay(content):
assay = {}
assay['SequencingCenter'] = 'Unknown'
assay['SequencingDate'] = 'Unknown'
assay['IsolationProtocol'] = 'Unknown'
assay['Facility'] = 'Unknown'
assay['Method'] = 'Unknown'
assay['Date'] = 'Unknown'
assay['target_subfragment'] = ""
run = content['RUN_SET']['RUN']
assay['AssayIdentifier'] = run['@accession']
assay['alias'] = run['@alias']
......@@ -243,7 +244,7 @@ def setup_assay(content):
assay['published'] = run['@published']
assay['SampleIdentifier'] = content['SAMPLE']['@accession']
assay['AssayTitle'] = 'Automatic assay title from SRA ' + assay['AssayIdentifier']
assay['AssayName'] = 'Automatic assay title from SRA ' + assay['AssayIdentifier']
assay['AssayDescription'] = 'Automatic assay description from SRA ' + assay['AssayIdentifier']
# Files
sra_file = run['SRAFiles']['SRAFile']
......@@ -308,7 +309,7 @@ def create_xlsx(pickle_list):
assay_keys = set()
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('sra.xlsx')
workbook = xlsxwriter.Workbook(EXCEL_FILE)
project_worksheet = workbook.add_worksheet(name="Project")
investigation_worksheet = workbook.add_worksheet(name="Investigation")
study_worksheet = workbook.add_worksheet(name="Study")
......@@ -518,6 +519,7 @@ def lookup_creation(doc, pickle_file):
# if content['RUN_SET']['RUN']['Statistics']['@nreads'] != "2":
# return None
project = setup_project(content)
investigation = setup_investigation(content)
study = setup_study(content)
......@@ -535,22 +537,26 @@ def lookup_creation(doc, pickle_file):
pickle.dump(assay, pickle_file)
pickle_file.close()
def selection(content):
"""
Selection function based on criteria set by the developer
:param file:
:return:
"""
if "ERP119217" in content:
print("Match detected")
if "" in content:
print("Filter disabled")
return True
# if "PRJNA527973" in content or "PRJNA517152" in content:
# if "ERP119217" in content:
# print("Match detected")
# return True
# if "PRJNA527973" in content or "PRJNA517152" in content:
# return True
# if "soil" not in content:
# return False
print("Did not pass the selection filter")
return False
......@@ -564,22 +570,21 @@ def main():
for (dirpath, dirnames, filenames) in os.walk(path):
listOfFiles = set([os.path.join(dirpath, file) for file in filenames])
for index, elem in enumerate(listOfFiles):
if not elem.endswith("xml"): continue
for elem in listOfFiles:
if not elem.endswith("xml"):
print("element not xml")
continue
with open(elem) as fd:
content = fd.read()
if selection(content):
doc = xmltodict.parse(content)
status = machine_filter(doc)
if not status:
if not machine_filter(doc):
print("Not passing machine filter", elem)
continue
status = paired_check(doc)
if not status:
if not paired_check(doc):
print("Not passing paired check", elem)
continue
# Create lookup file
......@@ -589,14 +594,16 @@ def main():
os.remove(pickle_file)
if not os.path.exists(pickle_file):
# print("Creating pickle")
print("Creating pickle")
lookup_creation(doc, pickle_file)
else:
pass
pickle_list.add(pickle_file)
# Create EXCEL file
create_xlsx(pickle_list)
if __name__ == '__main__':
# GLOBAL PARAMETERS
# MINIMUM_NUMBER_OF_SPOTS = 10000
EXCEL_FILE = 'sra.xlsx'
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment