Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Biometris
MCRA.DataConversionTools
Commits
e7dfcb81
Commit
e7dfcb81
authored
Apr 05, 2020
by
Hans van den Heuvel
Browse files
Better solution for splitting and combining columns.
parent
56e82f7b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Convert-EUProcessingFactorsDB/Convert-EUProcessingFactorsDB.py
View file @
e7dfcb81
...
...
@@ -143,8 +143,7 @@ efsa_combined = dataset.efsa.sheet.merge(
right_on
=
[
'FromFX'
,
'FXToRpc'
],
how
=
'left'
).
assign
(
)
# First let's copy the columns which we want in the output unaltered
# So this is a copy FROM FIELD : TO FIELD
# First let's copy the columns which we want in the output unaltered so far
efsa_combined
.
mcra
.
copycolumn
({
'ParamName Active Substance'
:
'SubstanceName'
,
'Matrix Code'
:
'idFoodUnProcessed'
,
'Raw Primary Commodity'
:
'FoodUnprocessedName'
,
...
...
@@ -195,8 +194,7 @@ else:
###############################################
# Request by Waldo, please also add the description of the Processing Type.
# So, again, a left join :-)
# Also add the description of the Processing Type.
efsa_combined
=
efsa_combined
.
merge
(
# Left join with processing type sheet,
dataset
.
processing_type
.
sheet
,
...
...
@@ -222,15 +220,9 @@ if dataset.food_composition.sheet is not None:
fcs
=
dataset
.
food_composition
.
sheet
[(
dataset
.
food_composition
.
sheet
[
'idToFood'
].
str
.
startswith
(
'P'
)
&
dataset
.
food_composition
.
sheet
[
'idFromFood'
].
str
.
contains
(
'-'
))]
# Now split the first column
fs
=
pd
.
DataFrame
()
# Bit of a mess, to combine again.
fs
[[
'idFromFood-Left'
,
'idFromFood-Right'
]]
=
\
fcs
[
'idFromFood'
].
str
.
rsplit
(
'-'
,
n
=
1
,
expand
=
True
)
fcs
=
fcs
.
merge
(
fs
,
left_index
=
True
,
right_index
=
True
)
# New columns is properly joined now
fcs
[
'idToFood-PC'
]
=
fcs
.
loc
[:,
(
'idToFood'
,
'idFromFood-Right'
)].
apply
(
lambda
x
:
'-'
.
join
(
x
.
dropna
()),
axis
=
1
)
fcs
[(
fcs
[
'idToFood'
].
str
.
startswith
(
'P'
)
&
fcs
[
'idFromFood'
].
str
.
contains
(
'-'
))].
assign
()
fcs
=
fcs
.
mcra
.
splitjoin
(
name
=
'idToFood-PC'
,
split
=
'idFromFood'
,
join
=
'idToFood'
)
# Finally a left join to combine
efsa_combined
=
efsa_combined
.
merge
(
# Left join with processing type sheet,
...
...
Convert-EUProcessingFactorsDB/mcra.py
View file @
e7dfcb81
...
...
@@ -50,6 +50,30 @@ class McraAccessor:
for
col
in
columnnames
:
self
.
_obj
[
col
]
=
''
def
splitjoin
(
self
,
name
,
split
,
join
,
split_sep
=
'-'
,
right_split
=
True
,
join_sep
=
'-'
):
'''
Splits a column, and then joins the result with another column
'''
# Due to the SettingWithCopyWarning we do it a bit cumbersome
df
=
pd
.
DataFrame
()
df
[
join
]
=
self
.
_obj
[
join
]
if
right_split
:
df
[
split
]
=
self
.
_obj
[
split
].
str
.
rsplit
(
split_sep
,
n
=
1
).
str
[
1
]
else
:
df
[
split
]
=
self
.
_obj
[
split
].
str
.
rsplit
(
split_sep
,
n
=
1
).
str
[
0
]
df
[
name
]
=
df
.
loc
[:,
(
join
,
split
)].
apply
(
lambda
x
:
'-'
.
join
(
x
.
dropna
()),
axis
=
1
)
df
=
df
.
drop
([
join
,
split
],
axis
=
1
)
#self._obj[name] = df.loc[:, (name)]
##self._obj[name] = df[name]
self
.
_obj
=
self
.
_obj
.
merge
(
df
,
left_index
=
True
,
right_index
=
True
)
#self._obj.loc[name] = df[name]
print
(
self
.
_obj
)
return
self
.
_obj
class
DataFile
:
'''
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment