Commit 92cfb8ce authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

improved reporting on req species

parent f5988ea4
......@@ -21,7 +21,7 @@ cv_precp_src = "EObs_v200e_rr_5yrmean"
cv_temp_dir = cv_precp_dir
cv_temp_src = "EObs_v200e_tg_5yrmean"
sp_req_src = r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\DOREN-2020-11-30.xlsx'
sp_req_sheet, column, skip = 'Soorten', 'wetenschappelijke_naam', 0
sp_req_sheet, column, skip = 'Soorten', 'wetenschappelijke naam', 1
# sp_req_src = r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\2020-09-17 uniek soorten per habitat (2).xlsx'
testing = False
......@@ -30,7 +30,7 @@ doren = dc.Doren(header_src=param_header_src, sp_src=param_sp_src)
doren.initiate(sample=testing)
doren.apply_requirements('req1', 'req2', 'req3', 'req4', 'req8', 'req9', 'req10',
aoi_src=None if testing else param_aoi_src, dem_src=param_dem_src)
doren.get_requested_species(xls=sp_req_src, sheet=sp_req_sheet, col=column, skip=skip, simplify_names=False)
doren.get_requested_species(xls=sp_req_src, sheet=sp_req_sheet, col=column, skip=skip, simplify_names=True)
doren.add_covar(covar_dir=cv_cntr_dir, covar_src=cv_cntr_src, covar_name='country', raster=False, column='SOV_A3')
doren.add_covar(covar_dir=cv_soil_dir, covar_src=cv_soil_src, covar_name='soil_type', nominal=True)
doren.add_yearly_covar(covar_dir=cv_precp_dir, covar_src_basename=cv_precp_src, covar_name='five_yearly_precip')
......@@ -52,8 +52,8 @@ doren.write_stuff('species_single_file')
doren.write_stuff('plot_covars_file')
doren.write_stuff('report')
with open(r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_{}.pkl'.
format(doren.timestamp), 'wb') as handle:
pickle.dump(doren, handle, protocol=pickle.HIGHEST_PROTOCOL)
if not testing:
with open(r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_{}.pkl'.
format(doren.timestamp), 'wb') as handle:
pickle.dump(doren, handle, protocol=pickle.HIGHEST_PROTOCOL)
......@@ -20,4 +20,4 @@ echo $SLURM_ARRAY_TASK_ID
# Run
cd /home/WUR/roelo008/projs/doren_2019
python run_species.py $SLURM_ARRAY_TASK_ID 50 doren_20201201.pkl
\ No newline at end of file
python run_species.py $SLURM_ARRAY_TASK_ID 50 doren_20201202.pkl
\ No newline at end of file
......@@ -18,7 +18,7 @@ n_batches = int(args.n_batches)
# Recover the Pickled doren object
pkl_src = os.path.join('./pkl_src/', args.pkl)
# pkl_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_20201130.pkl'
# pkl_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_20201201.pkl'
with open(pkl_src, 'rb') as handle:
doren = pickle.load(handle)
......
......@@ -84,7 +84,10 @@ class Doren:
colnames_n = ['plot_obs_id', 'taxonomy', 'taxon_group', 'taxon_group_id', 'turboveg2_concept',
'matched_concept', 'match', 'layer', 'cover_perc', 'cover_code']
species.rename(columns=dict(zip(list(species), colnames_n)), inplace=True)
species['species_name_hdr'] = species.loc[:, species_col].astype(str).apply(do.strip_leading_quote)
# Simplify species name to remove subspecies etc
setattr(self, 'eva_species_col', species_col)
species['species_name_hdr'] = species.loc[:, self.eva_species_col].astype(str).apply(do.strip_leading_quote)
species['species_name_hdr'] = species.species_name_hdr.apply(do.simplify_species)
self.spec = species
......@@ -108,7 +111,8 @@ class Doren:
'Date of recording', 'Dataset', 'EUNIS_Old', 'EUNIS_New'])
self.report += 'Starting @ {0} with {1} EVA headers containing {2} unique ' \
'species.\n\n'.format(self.timestamp_full, eva.shape[0], len(self.species))
'species (based on column {3} after simplification).\n\n' \
.format(self.timestamp_full, eva.shape[0], len(self.species), self.eva_species_col)
try:
eva.set_index('PlotObservationID', drop=False, verify_integrity=True, inplace=True)
......@@ -167,16 +171,21 @@ class Doren:
# Read csv file with requested species and drop empty rows
reqs = pd.read_excel(xls, sheet_name=sheet, skiprows=skip)
msg1 = 'Read {0} species requested for processing (source: "{1}", ' \
'sheet: "{2}", column: "{3}").\n'.format(reqs.shape[0], xls, sheet, col)
pre = reqs.shape[0]
reqs.dropna(subset=[col], inplace=True, axis=0)
reqs.drop_duplicates(subset=[col], inplace=True)
post = reqs.shape[0]
if post < pre:
msg1 += 'Dropping {0} duplicates and/or NAs, remaining: {1}\n'.format(pre-post, reqs.shape[0])
# Optionally simplify species names
if simplify_names:
reqs.loc[:, 'species_name_hdr'] = reqs.loc[:, col].astype(str).apply(do.strip_leading_quote)
reqs.loc[:, 'species_name_hdr'] = reqs.species_name_hdr.astype(str).apply(do.simplify_species)
# Indicate if species names are matched in EVA database
# reqs.loc[:, 'species_in_EVA'] = reqs.loc[:, col].apply(lambda x: x in self.species)
# NOTE: self.species may change as a result of adding covariables!
reqs.loc[:, col] = reqs.loc[:, 'species_name_hdr']
# Attach as df
self.request_df = reqs
......@@ -186,6 +195,14 @@ class Doren:
self.req_sp = list(req_sp_set)
self.req_found = self.species.intersection(req_sp_set) # self.species is set van eva simplified matched_concept
self.req_not_found = req_sp_set.difference(self.species)
if simplify_names:
msg2 = 'Simplyfing requested species names to non-subspecies with {0} remaining.\n'.format(len(req_sp_set))
else:
msg2 = 'Now considering {0} species for processing\n'.format(len(self.req_sp))
msg3 = ' {0} are matching with (simplified) species in DOREN EVA column {1}\n'.format(len(self.req_found),
self.eva_species_col)
msg4 = ' {0} not matching with (simplified) species in DOREN EVA column {1}\n'.format(len(self.req_not_found),
self.eva_species_col)
# List alternatives by looking for not-found-species in the 'turboveg2_concept' column instead of
# 'matched_concept'.
......@@ -199,14 +216,10 @@ class Doren:
container.append(dat)
self.req_not_found_df = pd.concat(container)
msg1 = '\nRead {0} species requested for processing (source: {1}, sheet: {2}, column {3}), ' \
'of which {4} match a species in EVA ("matched_concept") and {5} do not (but matching ' \
'"turboveg2_concepts" given in brackets): \n'\
.format(len(self.req_sp), os.path.abspath(xls), sheet, col, len(self.req_found), len(self.req_not_found))
msg2 = self.req_not_found_df.sort_index().to_csv(sep='\t')
self.report += msg1 + msg2 + '\n'
msg5 = self.req_not_found_df.sort_index().to_csv(sep='\t')
self.report += msg1 + msg2 + msg3 + msg4 + msg5 + '\n'
if self.verbose:
print(msg1, msg2)
print(msg1, msg2, msg3, msg4, msg5)
def rename(self, old_name, new_name):
"""
......@@ -396,7 +409,7 @@ class Doren:
self.eva.dropna(axis=0, subset=['totN_kmol_ha'], how='any', inplace=True)
self.update_status(covar=['totN_kmol_ha'])
msg = '\nAdded NDep from POSCH: {0} rows remaining.\n'.format(self.eva.shape[0])
msg = '\nAdded NDep from POSCH: {0} rows remaining.\n\n'.format(self.eva.shape[0])
self.report += msg
if self.verbose:
print(msg)
......@@ -519,10 +532,14 @@ class Doren:
set(eunis_new2bos.keys()),
set(eunis_old2structuur.keys()),
set(eunis_old2bos.keys())]))
msg1 = 'Adding doorvertaling van EUNIS type naar structuurtype.\n'
if len(missing) > 0:
msg = 'EUNIS codes w/o doorvertaling naar vegtype:\n'
msg2 = '\n'.join('{0}: {1}'.format(a, b) for a, b in enumerate(list(missing)))
self.report += msg + msg2 + '\n'
msg2 = 'EUNIS codes w/o doorvertaling naar vegtype:\n'
msg3 = '\n'.join('{0}: {1}'.format(a, b) for a, b in enumerate(list(missing)))
self.report += msg1 + msg2 + msg3 + '\n'
else:
self.report += msg1
# Where to use old and new EUNIS?
use_new_indx = self.eva.loc[self.eva.eunis_src == 'eunis_new'].index
......
......@@ -25,3 +25,14 @@ assert piv.drop(piv.loc[piv.stype_count == 1].index).empty
piv = pd.pivot_table(data=eunis_old, index='EUNIS_OLD', columns='type', values='hoog_laag', aggfunc='count')
piv['stype_count'] = piv.notna().sum(axis=1)
assert piv.drop(piv.loc[piv.stype_count == 1].index).empty
# test from clas
eunis_new.fillna(value='?', inplace=True)
eunis_old.fillna(value='?', inplace=True)
# mappings from old/new eunis types to Wieger Wamelink categories. Also include "?" as key
eunis_new2structuur = {**dict(zip(eunis_new.eunis_code, eunis_new.type)), **{'?': '?'}}
eunis_new2bos = {**dict(zip(eunis_new.eunis_code, eunis_new.hoog_laag)), **{'?': '?'}}
eunis_old2structuur = {**dict(zip(eunis_old.EUNIS_OLD, eunis_old.type)), **{'?': '?'}}
eunis_old2bos = {**dict(zip(eunis_old.EUNIS_OLD, eunis_old.hoog_laag)), **{'?': '?'}}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment