Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Biometris
MCRA.DataConversionTools
Commits
9cf72293
Commit
9cf72293
authored
Apr 15, 2020
by
Hans van den Heuvel
Browse files
Added references to output.
parent
0c46b68b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Convert-EUProcessingFactorsDB/Convert-EUProcessingFactorsDB.py
View file @
9cf72293
...
...
@@ -7,6 +7,7 @@ import mcra
import
pandas
as
pd
from
datetime
import
datetime
import
textwrap
import
os
# Small utility to create hyperlink to hyperlink :-)
...
...
@@ -100,6 +101,11 @@ dataset.add(
default_name
=
'Report.xlsx'
,
default_dir
=
'Output'
)
#
dataset
.
add
(
name
=
'references'
,
default_name
=
'References.csv'
,
default_dir
=
'Output'
)
#
#############################################################################
...
...
@@ -117,6 +123,12 @@ dataset.verbose(1, 'Input file : {file}; {version}; {props}'.format(
props
=
dataset
.
efsa
.
properties
,
version
=
efsa_version
))
#
# Also reading the ProcStudies Evaluation
efsa_procstudies
=
pd
.
read_excel
(
dataset
.
efsa
.
file
.
path
,
sheet_name
=
1
)
# ... and the References
dataset
.
references
.
sheet
=
pd
.
read_excel
(
dataset
.
efsa
.
file
.
path
,
sheet_name
=
3
)
#############################################################################
# Phase 2. Processing the data.
...
...
@@ -231,6 +243,21 @@ if dataset.food_composition.sheet is not None:
efsa_combined
[
'idFoodProcessed'
].
str
.
contains
(
'-'
)),
'idFoodProcessed'
]
=
efsa_combined
[
'idFromFood'
]
# We also have to add the references to the file.
efsa_procstudies
=
efsa_procstudies
.
astype
(
'str'
)
refs
=
efsa_procstudies
.
groupby
(
[
'Matrix FoodEx2 Code'
,
'Study Reference'
]
).
size
().
reset_index
().
sort_values
(
by
=
[
'Study Reference'
])
refs
=
refs
[[
'Matrix FoodEx2 Code'
,
'Study Reference'
]]
refs
=
refs
.
groupby
([
'Matrix FoodEx2 Code'
]).
agg
(
lambda
column
:
", "
.
join
(
column
))
efsa_combined
=
efsa_combined
.
merge
(
# Left join with processing type sheet,
refs
,
left_on
=
'Matrix FoodEx2 Code'
,
right_on
=
'Matrix FoodEx2 Code'
,
how
=
'left'
).
assign
()
#############################################################################
# Phase 3. Exporting the data.
# Seems obvious what to do here.
...
...
@@ -239,19 +266,20 @@ header = ['idProcessingType', 'idSubstance', 'SubstanceName',
'idFoodProcessed'
,
'idFoodUnProcessed'
,
'FoodUnprocessedName'
,
'Nominal'
,
'Upper'
,
'NominalUncertaintyUpper'
,
'UpperUncertaintyUpper'
,
'KeyFacets Interpreted'
,
'Matrix Code Interpreted'
,
'MCRA_ProcessingType_Description'
]
'Matrix Code Interpreted'
,
'MCRA_ProcessingType_Description'
,
'Study Reference'
]
dataset
.
processing_factor
.
sheet
=
efsa_combined
[
(
efsa_combined
[
'FCToProcType'
].
notna
()
|
efsa_combined
[
'FXToProcType'
].
notna
())
&
efsa_combined
[
'idSubstance'
].
notna
()][
header
]
#
# Writing output file
dataset
.
processing_factor
.
save
()
dataset
.
processing_factor
.
close
()
# In case of debugging, just dump the sheet we've been working on.
if
dataset
.
args
.
verbosity
>
3
:
efsa_combined
.
mcra
.
dump
(
'.\Output\dump.xlsx'
)
efsa_combined
.
mcra
.
dump
(
os
.
path
.
join
(
dataset
.
report
.
file
.
directory
,
'dump.xlsx'
))
#############################################################################
# Phase 4. Report about the data.
...
...
@@ -303,7 +331,8 @@ header = ['Matrix FoodEx2 Code', 'Matrix Code Interpreted', 'Matrix Code',
dataset
.
report
.
sheet
=
report_sheet
[
header
]
#
# We also need some further text reporting:
# Let's make a new column of the combination of 'idSubstance' and 'idFoodUnProcessed'
# Let's make a new column of the combination
# of 'idSubstance' and 'idFoodUnProcessed'
mismatch_table
=
efsa_combined
[
(
efsa_combined
[
'FCToProcType'
].
notna
()
|
efsa_combined
[
'FXToProcType'
].
notna
())
&
...
...
@@ -315,12 +344,12 @@ mismatch_table = mismatch_table.mcra.join(
double_types
=
mismatch_table
.
groupby
(
[
'idProcessingType'
,
'idSubstanceFoodProc'
],
as_index
=
False
).
agg
(
{
'idSubstance'
:
'first'
,
'idFoodUnProcessed'
:
'first'
,
'FoodUnprocessedName'
:
'first'
,
'KeyFacets Interpreted'
:
'first'
,
'Matrix Code Interpreted'
:
'first'
,
'MCRA_ProcessingType_Description'
:
'first'
}).
drop
(
{
'idSubstance'
:
'first'
,
'idFoodUnProcessed'
:
'first'
,
'FoodUnprocessedName'
:
'first'
,
'KeyFacets Interpreted'
:
'first'
,
'Matrix Code Interpreted'
:
'first'
,
'MCRA_ProcessingType_Description'
:
'first'
}).
drop
(
'idSubstanceFoodProc'
,
axis
=
1
)
...
...
@@ -396,12 +425,16 @@ Substance conversion duplicates
'''
+
double_types
.
to_markdown
(
showindex
=
False
)
+
r
'''
'''
dataset
.
references
.
close
()
dataset
.
report
.
save
()
# Save this also to the dataset sheet.
with
pd
.
ExcelWriter
(
dataset
.
report
.
file
.
path
,
mode
=
'a'
)
as
writer
:
double_types
.
to_excel
(
writer
,
index
=
False
,
sheet_name
=
'Substances'
)
dataset
.
report
.
close
(
auto_report
=
False
)
double_types
.
to_excel
(
writer
,
index
=
False
,
sheet_name
=
'Substances'
)
dataset
.
report
.
close
(
auto_report
=
False
,
also_save
=
False
)
dataset
.
close
()
Convert-EUProcessingFactorsDB/mcra.py
View file @
9cf72293
...
...
@@ -77,15 +77,14 @@ class McraAccessor:
'''
# Due to the SettingWithCopyWarning we do it a bit cumbersome
df
=
pd
.
DataFrame
()
df
[[
join_left
,
join_right
]]
=
self
.
_obj
[[
join_left
,
join_right
]]
df
[[
join_left
,
join_right
]]
=
self
.
_obj
[[
join_left
,
join_right
]]
df
[
name
]
=
df
.
loc
[:,
(
join_left
,
join_right
)].
apply
(
lambda
x
:
sep
.
join
(
x
.
dropna
()),
axis
=
1
)
df
=
df
.
drop
([
join_left
,
join_right
],
axis
=
1
)
df
=
df
.
drop
([
join_left
,
join_right
],
axis
=
1
)
# Not ideal yet, but slightly better than it used to be....
self
.
_obj
=
self
.
_obj
.
merge
(
df
,
left_index
=
True
,
right_index
=
True
)
return
self
.
_obj
def
dump
(
self
,
filename
):
'''
For debugging purposes, to dump a file from memory a bit more easily
...
...
@@ -116,6 +115,7 @@ class DataFile:
self
.
default_base
=
os
.
path
.
splitext
(
self
.
default_name
)[
0
]
self
.
default_dir
=
default_dir
self
.
path
=
None
self
.
directory
=
None
self
.
reportpath
=
None
self
.
zippath
=
None
self
.
suggested
=
None
...
...
@@ -202,9 +202,11 @@ class DataFile:
self
.
default_dir
,
self
.
suggested
)
else
:
self
.
path
=
self
.
suggested
head
,
tail
=
os
.
path
.
split
(
self
.
path
)
if
force_dir
is
not
None
:
head
,
tail
=
os
.
path
.
split
(
self
.
path
)
self
.
path
=
os
.
path
.
join
(
force_dir
,
tail
)
head
=
force_dir
self
.
path
=
os
.
path
.
join
(
head
,
tail
)
self
.
directory
=
head
base
,
ext
=
os
.
path
.
splitext
(
self
.
path
)
self
.
reportpath
=
base
+
'.md'
self
.
extension
=
ext
...
...
@@ -326,12 +328,14 @@ class DataSheet:
**
kwargs
)
self
.
update_properties
()
def
close
(
self
,
auto_report
=
True
):
def
close
(
self
,
auto_report
=
True
,
also_save
=
True
):
'''
If auto_report is False, no report on the object will me made.
If the report contains no content, it will not be created as file.
If however, you added something to the report, it WILL be created.
'''
if
also_save
:
self
.
save
()
self
.
file
.
update
()
self
.
update_properties
()
if
auto_report
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment