Commit 86236acc authored by Hans van den Heuvel's avatar Hans van den Heuvel
Browse files

Fixed issues, implemented verion numbering.

parent 01b7d82e
......@@ -3,4 +3,6 @@
# User-specific files
# Dirs
__pycache__/
\ No newline at end of file
__pycache__/
Output/
Build/
#!/usr/bin/python
__version_info__ = ('1', '0', '0')
__version__ = '.'.join(__version_info__)
#############################################################################
# Phase 0. Initialization
# Doing stuff like parsing arguments, and reading the files.
......@@ -12,8 +16,7 @@ import os
# Small utility to create hyperlink to hyperlink :-)
def print_as_link(text):
return '[{text}]({text})'.format(text=text)
return f'[{text}]({text})'
# These are the files we work with
# Create list
......@@ -22,7 +25,8 @@ dataset = dataconversion.DataSet(
+ ' Biometris, Wageningen University and Research.',
description='Converts the EFSA CAPEG database Excel sheet into MCRA '
+ 'effects and assessment groups.',
epilog='For example: use %(prog)s -v -x for a verbose example.')
epilog='For example: use %(prog)s -v -x for a verbose example.',
version = __version__)
#
#
dataset.add(
......@@ -75,14 +79,22 @@ dataset.init()
capeg = dataset.capeg.sheet
# We need to clean up the table firstly
# Remove all CasNumbers with na
capeg.drop(capeg.loc[capeg['casNumber'] == 'na'].index, inplace=True)
# capeg.drop(capeg.loc[capeg['casNumber'] == 'na'].index, inplace=True)
# Change all CasNumbers na into 7440-50-8
capeg['casNumber'].replace('na', '7440-50-8', inplace=True)
# Remove all obscure Casnumers
# capeg.drop(capeg.loc[capeg['casNumber'].str.contains('[\(\)/]', regex=True)].index, inplace=True)
# capeg.drop(capeg.loc[capeg['casNumber']
# .str.contains('[\(\)/]', regex=True)].index, inplace=True)
# Max length of strings (second argument)
max_len = slice(0,99)
# FIRST The effects table
# Add the fields for the effects table
capeg.mcra.addcolumn(
{'idEffect', 'Name', 'Description', 'Reference', 'targetL1'})
{'idEffect', 'Name', 'Description', 'Reference', 'targetL1', 'AcuteChronic'})
# Create extra colum for proper CAS1 names
capeg['targetL1'] = capeg['target_CAG1'].str.split().str[0].str.strip()
capeg['targetL1'].replace('Bones', 'Skeleton', inplace=True)
......@@ -91,19 +103,23 @@ capeg['targetL1'].replace('Bone', 'Bone marrow', inplace=True)
# Create tempcopy
capeg2 = capeg.copy(deep=True)
# Fill the idEffectA and idEffectC eg. L1-Liver-Acute
capeg['AcuteChronic'] = 'Acute'
capeg['idEffect'] = 'L1-' + \
capeg['targetL1'].str.split().str[0].str.strip() + '-Acute'
capeg['targetL1'].str.split().str[0].str.strip() + '-' + \
capeg['AcuteChronic']
capeg2['AcuteChronic'] = 'Chronic'
capeg2['idEffect'] = 'L1-' + \
capeg2['targetL1'].str.split().str[0].str.strip() + '-Chronic'
capeg2['targetL1'].str.split().str[0].str.strip() + '-' + \
capeg2['AcuteChronic']
# Description
capeg['Description'] = 'Acute adverse effects on ' + \
capeg['Description'] = capeg['AcuteChronic'] + ' adverse effects on ' + \
capeg['targetL1'].str.lower() + '.'
capeg2['Description'] = 'Chronic adverse effects on ' + \
capeg2['Description'] = capeg2['AcuteChronic'] + ' adverse effects on ' + \
capeg2['targetL1'].str.lower() + '.'
# Combine the sheets, append the second after the first
capeg = capeg.append(capeg2, ignore_index=True)
# Set the name
capeg['Name'] = capeg['targetL1']
capeg['Name'] = capeg['targetL1'].str[max_len]
# Set the reference
capeg['Reference'] = ''
......@@ -128,11 +144,12 @@ capeg.mcra.addcolumn(['id', 'Name', 'Description', 'Reference'])
capeg['id'] = 'AG1-' + \
capeg['idEffect'].str.split('-').str[1:].str.join('-')
# Name
capeg['Name'] = 'CAG ' + capeg['targetL1'].str.lower()
capeg['Name'] = ('AG Level 1 ' + capeg['targetL1'].str.title() + \
' ' + capeg['AcuteChronic']).str[max_len]
# Description
capeg['Description'] = \
'Cummulative assesment group for adverse effects on ' + \
capeg['targetL1'].str.lower() + '.'
'Cummulative assesment group for ' + capeg['AcuteChronic'].str.lower() + \
' adverse effects on ' + capeg['targetL1'].str.lower() + '.'
# Reference
capeg['Reference'] = 'https://doi.org/10.2903/sp.efsa.2012.EN-269'
......@@ -154,7 +171,7 @@ capeg.mcra.addcolumn(['idSubstance', 'Name', 'Description'])
# ID
capeg['idSubstance'] = capeg['casNumber']
# Name
capeg['Name'] = capeg['chemicalName']
capeg['Name'] = capeg['chemicalName'].str[max_len]
# Done, now wrap this table up
substances_header = ['idSubstance', 'Name', 'Description',
......@@ -183,4 +200,4 @@ dataset.agm.sheet = capeg[agm_header].drop_duplicates()
dataset.agm.sheet.mcra.keepcolumn(agm_header)
# DONE
dataset.close(file_report=True)
dataset.close(file_report=True, version = __version__)
# Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
!.gitignore
# The default Excel file
!capeg_20210706_13492613.xls
\ No newline at end of file
......@@ -15,6 +15,10 @@ import math
import sys
import textwrap
import getpass
import re
__version_info__ = ('0', '9', '0')
__version__ = '.'.join(__version_info__)
# For debugging purposes
# from objbrowser import browse
......@@ -251,10 +255,8 @@ class DataSheet:
report = ''
if self.direction == 'Input' or \
(self.direction == 'Output' and self.closed):
report += '* {dir} file: {file}\n'.format(
dir=self.direction, file=os.path.split(self.file.path)[1])
report += textwrap.indent(
'* [{path}]({path})\n'.format(path=self.file.path), PY_INDENT)
filename = os.path.split(self.file.path)[1]
report += f'* {self.direction} file: [{filename}]({filename})\n'
report += textwrap.indent(
'* {props}\n'.format(props=self.properties), PY_INDENT)
report += textwrap.indent(
......@@ -360,44 +362,61 @@ class DataSheet:
# # Save report
# with open(self.file.reportpath, 'w+') as f:
# f.write(self.report)
print('Output file: {file}; {props}'.format(
file=self.file.path,
props=self.properties))
if '-v' in sys.argv or '--verbosity' in sys.argv:
print(f'Output file: {self.file.path}; {self.properties}')
class DataSet:
def __init__(self, opening=None, description=None, epilog=None):
def __init__(self, opening=None, description=None,
epilog=None, version=False):
self.args = None
self.list = []
# Whether or not to create a zip file
self.zip = None
# The report for the entire dataset
self.report = ''
self.runtime = datetime.now().strftime('%H:%M:%S, %d %b %Y')
self.runcommand = ' '.join(sys.argv)
self.runarguments = ' '.join(sys.argv[1:])
self.scriptname = os.path.split(sys.argv[0])[1]
md5_hash = hashlib.md5()
with open(sys.argv[0], "rb") as f:
# Read and update hash in chunks of 4K
for byte_block in iter(lambda: f.read(4096), b""):
md5_hash.update(byte_block)
self.scripthash = md5_hash.hexdigest()
m=re.match('(.*)\-(?P<noun>.*)\.py', self.scriptname)
if m:
self.scriptnoun = m.group('noun')
else:
self.scriptnoun = self.scriptname.replace('.py','')
self.runuser = getpass.getuser()
self.parser = ArgumentParser(
description=description, epilog=epilog)
report = 'Output\\Report.md'
self.parser.add_argument(
'-r', '--report', nargs='?',
const='Output\\Report.md',
const=report,
default=report,
help='Creates a report file (default: %(const)s).')
# The verbosity argument will accept: -v, or -vv, -vvv etc.
# Set default to 1, so that basic output will always appear.
self.parser.add_argument(
'-v', '--verbosity', help="Show verbose output",
action="count", default=1)
self.parser.add_argument(
'-x', '--example', action='store_const', const='Example',
help='Uses input files from the %(const)s subdir.')
action="count", default=0)
# self.parser.add_argument(
# '-x', '--example', action='store_const', const='Example',
# help='Uses input files from the %(const)s subdir.')
if not version:
version = __version__
zip = f'Build\\{self.scriptnoun}.{version}.zip'
self.parser.add_argument(
'-z', '--zip', nargs='?', const='Output\\Output.zip',
'-z', '--zip', nargs='?', const=zip, default=zip,
help='Creates a zip file %(const)s containing all output.' +
' (default: %(const)s).')
if '-v' in sys.argv or '--verbosity' in sys.argv:
print(opening)
self.list = []
# Whether or not to create a zip file
self.zip = None
# The report for the entire dataset
self.report = ''
self.runtime = datetime.now().strftime('%H:%M:%S, %d %b %Y')
self.runcommand = ' '.join(sys.argv)
self.runarguments = ' '.join(sys.argv[1:])
self.scriptname = os.path.split(sys.argv[0])[1]
self.runuser = getpass.getuser()
# It is usefull to be able to iterate over all the datasheets.
# Basically, avoid using .list. in all DataSet references.
......@@ -475,13 +494,8 @@ class DataSet:
datasetfilename = getattr(self.args, datasetname+'_file')
if dataset.direction == 'Input':
if self.args.example:
dataset.file.suggest(
datasetfilename,
force_dir=self.args.example)
else:
dataset.file.suggest(
datasetfilename)
dataset.file.suggest(
datasetfilename)
if urlparse(dataset.file.suggested).netloc:
if (not dataset.file.exist) \
or ((dataset.checksum is not None)
......@@ -522,11 +536,22 @@ class DataSet:
base, ext = os.path.splitext(datasetfilename)
dataset.file.suggest(datasetfilename)
dataset.update_properties()
if self.args.zip:
# Create a zip file containing everything
basezip, extzip = os.path.splitext(self.args.zip)
# Setting self.zip indicates creating a zip file
self.zip = basezip+'.zip'
os.makedirs(os.path.dirname(
os.path.abspath(dataset.file.path)), exist_ok=True)
# Make sure we can create the report
os.makedirs(os.path.dirname(
os.path.abspath(self.args.report)), exist_ok=True)
# Always create a zip file containing everything
# First make sure the directory exists
zippath = os.path.dirname(os.path.abspath(self.args.zip))
os.makedirs(zippath, exist_ok=True)
basezip, extzip = os.path.splitext(self.args.zip)
# Setting self.zip indicates creating a zip file
self.zip = basezip+'.zip'
def save(self):
for data in self:
......@@ -534,7 +559,7 @@ class DataSet:
if not data.closed:
data.close(auto_report=False, also_save=True)
def close(self, file_report=False, save=True):
def close(self, file_report=False, save=True, version = False):
'''
Method to close the dataset.
Most importantly save files.
......@@ -544,12 +569,23 @@ class DataSet:
report_content += f'* Script: {self.scriptname}\n'
report_content += textwrap.indent(
f'* Command line: {self.runcommand}\n', PY_INDENT)
report_content += textwrap.indent(
f'* Filename: {self.scriptname}\n', PY_INDENT)
report_content += textwrap.indent(
f'* Arguments: {self.runarguments}\n', PY_INDENT)
report_content += textwrap.indent(
f'* Hash: {self.scripthash}\n', PY_INDENT)
report_content += textwrap.indent(
f'* Executed at: {self.runtime}\n', PY_INDENT)
report_content += textwrap.indent(
f'* Executed by: {self.runuser}\n', PY_INDENT)
if version:
report_content += textwrap.indent(
f'* Version: {version}\n', PY_INDENT)
report_content += textwrap.indent(
f'* Depends upon module: {__name__}\n', PY_INDENT)
report_content += textwrap.indent(
f'* With version: {__version__}\n', 2*PY_INDENT)
for data in self:
if data.direction == 'Input':
report_content += data.get_report()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment