Commit e7dfcb81 authored by Hans van den Heuvel's avatar Hans van den Heuvel
Browse files

Better solution for splitting and combining columns.

parent 56e82f7b
......@@ -143,8 +143,7 @@ efsa_combined = dataset.efsa.sheet.merge(
right_on=['FromFX', 'FXToRpc'], how='left').assign(
)
# First let's copy the columns which we want in the output unaltered
# So this is a copy FROM FIELD : TO FIELD
# First let's copy the columns which we want in the output unaltered so far
efsa_combined.mcra.copycolumn({'ParamName Active Substance': 'SubstanceName',
'Matrix Code': 'idFoodUnProcessed',
'Raw Primary Commodity': 'FoodUnprocessedName',
......@@ -195,8 +194,7 @@ else:
###############################################
# Request by Waldo, please also add the description of the Processing Type.
# So, again, a left join :-)
# Also add the description of the Processing Type.
efsa_combined = efsa_combined.merge(
# Left join with processing type sheet,
dataset.processing_type.sheet,
......@@ -222,15 +220,9 @@ if dataset.food_composition.sheet is not None:
fcs = dataset.food_composition.sheet[(
dataset.food_composition.sheet['idToFood'].str.startswith('P') &
dataset.food_composition.sheet['idFromFood'].str.contains('-'))]
# Now split the first column
fs = pd.DataFrame()
# Bit of a mess, to combine again.
fs[['idFromFood-Left', 'idFromFood-Right']] = \
fcs['idFromFood'].str.rsplit('-', n=1, expand=True)
fcs = fcs.merge(fs, left_index=True, right_index=True)
# New columns is properly joined now
fcs['idToFood-PC'] = fcs.loc[:, ('idToFood', 'idFromFood-Right')].apply(
lambda x: '-'.join(x.dropna()), axis=1)
fcs[(fcs['idToFood'].str.startswith('P') & fcs['idFromFood'].str.contains('-'))].assign()
fcs=fcs.mcra.splitjoin(name='idToFood-PC', split='idFromFood', join='idToFood')
# Finally a left join to combine
efsa_combined = efsa_combined.merge(
# Left join with processing type sheet,
......
......@@ -50,6 +50,30 @@ class McraAccessor:
for col in columnnames:
self._obj[col] = ''
def splitjoin(self, name, split, join,
split_sep='-', right_split=True, join_sep='-'):
'''
Splits a column, and then joins the result with another column
'''
# Due to the SettingWithCopyWarning we do it a bit cumbersome
df = pd.DataFrame()
df[join] = self._obj[join]
if right_split:
df[split] = self._obj[split].str.rsplit(split_sep, n=1).str[1]
else:
df[split] = self._obj[split].str.rsplit(split_sep, n=1).str[0]
df[name]= df.loc[:, (join, split)].apply(
lambda x: '-'.join(x.dropna()), axis=1)
df=df.drop([join,split],axis=1)
#self._obj[name] = df.loc[:, (name)]
##self._obj[name] = df[name]
self._obj=self._obj.merge(df, left_index=True, right_index=True)
#self._obj.loc[name] = df[name]
print(self._obj)
return self._obj
class DataFile:
'''
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment