Commit 9cf72293 authored by Hans van den Heuvel's avatar Hans van den Heuvel
Browse files

Added references to output.

parent 0c46b68b
......@@ -7,6 +7,7 @@ import mcra
import pandas as pd
from datetime import datetime
import textwrap
import os
# Small utility to create hyperlink to hyperlink :-)
......@@ -100,6 +101,11 @@ dataset.add(
default_name='Report.xlsx',
default_dir='Output')
#
dataset.add(
name='references',
default_name='References.csv',
default_dir='Output')
#
#############################################################################
......@@ -117,6 +123,12 @@ dataset.verbose(1, 'Input file : {file}; {version}; {props}'.format(
props=dataset.efsa.properties,
version=efsa_version))
#
# Also reading the ProcStudies Evaluation
efsa_procstudies = pd.read_excel(
dataset.efsa.file.path, sheet_name=1)
# ... and the References
dataset.references.sheet = pd.read_excel(
dataset.efsa.file.path, sheet_name=3)
#############################################################################
# Phase 2. Processing the data.
......@@ -231,6 +243,21 @@ if dataset.food_composition.sheet is not None:
efsa_combined['idFoodProcessed'].str.contains('-')),
'idFoodProcessed'] = efsa_combined['idFromFood']
# We also have to add the references to the file.
efsa_procstudies = efsa_procstudies.astype('str')
refs = efsa_procstudies.groupby(
['Matrix FoodEx2 Code', 'Study Reference']
).size().reset_index().sort_values(by=['Study Reference'])
refs = refs[['Matrix FoodEx2 Code', 'Study Reference']]
refs = refs.groupby(['Matrix FoodEx2 Code']).agg(
lambda column: ", ".join(column))
efsa_combined = efsa_combined.merge(
# Left join with processing type sheet,
refs,
left_on='Matrix FoodEx2 Code', right_on='Matrix FoodEx2 Code',
how='left').assign()
#############################################################################
# Phase 3. Exporting the data.
# Seems obvious what to do here.
......@@ -239,19 +266,20 @@ header = ['idProcessingType', 'idSubstance', 'SubstanceName',
'idFoodProcessed', 'idFoodUnProcessed', 'FoodUnprocessedName',
'Nominal', 'Upper', 'NominalUncertaintyUpper',
'UpperUncertaintyUpper', 'KeyFacets Interpreted',
'Matrix Code Interpreted', 'MCRA_ProcessingType_Description']
'Matrix Code Interpreted', 'MCRA_ProcessingType_Description',
'Study Reference']
dataset.processing_factor.sheet = efsa_combined[
(efsa_combined['FCToProcType'].notna() |
efsa_combined['FXToProcType'].notna()) &
efsa_combined['idSubstance'].notna()][header]
#
# Writing output file
dataset.processing_factor.save()
dataset.processing_factor.close()
# In case of debugging, just dump the sheet we've been working on.
if dataset.args.verbosity > 3:
efsa_combined.mcra.dump('.\Output\dump.xlsx')
efsa_combined.mcra.dump(os.path.join(
dataset.report.file.directory, 'dump.xlsx'))
#############################################################################
# Phase 4. Report about the data.
......@@ -303,7 +331,8 @@ header = ['Matrix FoodEx2 Code', 'Matrix Code Interpreted', 'Matrix Code',
dataset.report.sheet = report_sheet[header]
#
# We also need some further text reporting:
# Let's make a new column of the combination of 'idSubstance' and 'idFoodUnProcessed'
# Let's make a new column of the combination
# of 'idSubstance' and 'idFoodUnProcessed'
mismatch_table = efsa_combined[
(efsa_combined['FCToProcType'].notna() |
efsa_combined['FXToProcType'].notna()) &
......@@ -315,12 +344,12 @@ mismatch_table = mismatch_table.mcra.join(
double_types = mismatch_table.groupby(
['idProcessingType', 'idSubstanceFoodProc'],
as_index=False).agg(
{'idSubstance' : 'first',
'idFoodUnProcessed' : 'first',
'FoodUnprocessedName' : 'first',
'KeyFacets Interpreted' : 'first',
'Matrix Code Interpreted' : 'first',
'MCRA_ProcessingType_Description' : 'first'}).drop(
{'idSubstance': 'first',
'idFoodUnProcessed': 'first',
'FoodUnprocessedName': 'first',
'KeyFacets Interpreted': 'first',
'Matrix Code Interpreted': 'first',
'MCRA_ProcessingType_Description': 'first'}).drop(
'idSubstanceFoodProc', axis=1)
......@@ -396,12 +425,16 @@ Substance conversion duplicates
'''+double_types.to_markdown(showindex=False)+r'''
'''
dataset.references.close()
dataset.report.save()
# Save this also to the dataset sheet.
with pd.ExcelWriter(dataset.report.file.path, mode='a') as writer:
double_types.to_excel(writer,
index=False,sheet_name='Substances')
dataset.report.close(auto_report=False)
double_types.to_excel(
writer,
index=False,
sheet_name='Substances')
dataset.report.close(auto_report=False, also_save=False)
dataset.close()
......@@ -77,15 +77,14 @@ class McraAccessor:
'''
# Due to the SettingWithCopyWarning we do it a bit cumbersome
df = pd.DataFrame()
df[[join_left,join_right]] = self._obj[[join_left,join_right]]
df[[join_left, join_right]] = self._obj[[join_left, join_right]]
df[name] = df.loc[:, (join_left, join_right)].apply(
lambda x: sep.join(x.dropna()), axis=1)
df = df.drop([join_left,join_right], axis=1)
df = df.drop([join_left, join_right], axis=1)
# Not ideal yet, but slightly better than it used to be....
self._obj = self._obj.merge(df, left_index=True, right_index=True)
return self._obj
def dump(self, filename):
'''
For debugging purposes, to dump a file from memory a bit more easily
......@@ -116,6 +115,7 @@ class DataFile:
self.default_base = os.path.splitext(self.default_name)[0]
self.default_dir = default_dir
self.path = None
self.directory = None
self.reportpath = None
self.zippath = None
self.suggested = None
......@@ -202,9 +202,11 @@ class DataFile:
self.default_dir, self.suggested)
else:
self.path = self.suggested
head, tail = os.path.split(self.path)
if force_dir is not None:
head, tail = os.path.split(self.path)
self.path = os.path.join(force_dir, tail)
head = force_dir
self.path = os.path.join(head, tail)
self.directory = head
base, ext = os.path.splitext(self.path)
self.reportpath = base+'.md'
self.extension = ext
......@@ -326,12 +328,14 @@ class DataSheet:
**kwargs)
self.update_properties()
def close(self, auto_report=True):
def close(self, auto_report=True, also_save=True):
'''
If auto_report is False, no report on the object will me made.
If the report contains no content, it will not be created as file.
If however, you added something to the report, it WILL be created.
'''
if also_save:
self.save()
self.file.update()
self.update_properties()
if auto_report:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment