Commit bb987536 authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

include updates to EUNIS types

parent 38b2912d
'''
Script to verify delivery of EUNIS vegetation types by Stephan H, november 2020
'''
import pandas as pd
import pickle
eva_headers = r'W:\PROJECTS\doren19\a_brondata\EVA\EVA_Doren_header.csv'
eva_eunis = r'\\wur\dfs-root\PROJECTS\doren19\a_brondata\covariables\EUNIS\EVA-EUNIS.csv'
pkl_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_20201028.pkl'
with open(pkl_src, 'rb') as handle:
doren = pickle.load(handle)
headers = pd.read_csv(eva_headers, sep='\t', skiprows=[0,1], index_col='PlotObservationID')
eunis = pd.read_csv(eva_eunis, sep='\t', index_col='PlotObservationID')
colnames_n = ['plot_id', 'releve_nr', 'date_of_recording', 'expert_system',
'longitude', 'latitude', 'location_uncertainty_m', 'dataset', 'regime']
eunis.rename(columns=dict(zip(list(eunis), colnames_n)), inplace=True)
n_headers = headers.shape[0]
n_eunis = eunis.shape[0]
print('EVA headers: {}'.format(n_headers))
print('EUNIS headers: {}'.format(n_eunis))
t1 = "all EVA headers are present in the EUNIS database"
diff = headers.index.difference(eunis.index)
if diff.empty:
print('{}: passed'.format(t1))
else:
print('{}: failed'.format(t1))
print('There are {0} EVA headers that are not present in the EUNIS db'.format(len(diff)))
headers.loc[diff].to_clipboard(sep=';')
......@@ -4,6 +4,8 @@ import pickle
from utils import doren_classes as dc
# os.environ['GDAL_DATA'] = r'C:\Users\roelo008\Miniconda3\envs\doren\Library\share\gdal'
os.environ['PROJ_LIB'] = r'c:\Users\roelo008\Miniconda3\envs\doren\\Library\\share\\proj'
os.environ['GDAL_DATA'] = r'c:\Users\roelo008\Miniconda3\envs\doren\\Library\\share'
param_header_src = r'W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20201118\EVA_Doren_header.csv'
param_sp_src = r'W:\PROJECTS\Doren19\a_brondata\EVA\delivery_201909\EVA_Doren_species.csv'
......@@ -18,16 +20,17 @@ cv_precp_dir = r'w:\PROJECTS\Doren19\a_brondata\covariables\EObs\2_compiled'
cv_precp_src = "EObs_v200e_rr_5yrmean"
cv_temp_dir = cv_precp_dir
cv_temp_src = "EObs_v200e_tg_5yrmean"
sp_req_src = r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\2020-09-17 uniek soorten per habitat.xlsx'
sp_req_sheet, column = 'PGO-1', 'newsoort'
sp_req_src = r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\soortenlijsten\soortenlijst te draaien 30-11-2020.xlsx'
sp_req_sheet, column = 'soortenlijst opgeschoond', 'Hans_EU_schoon'
# sp_req_src = r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\2020-09-17 uniek soorten per habitat (2).xlsx'
testing = True
testing = False
doren = dc.Doren(header_src=param_header_src, sp_src=param_sp_src)
doren.initiate(sample=testing)
doren.apply_requirements('req1', 'req2', 'req3', 'req4', 'req8', 'req9', 'req10',
aoi_src=None if testing else param_aoi_src, dem_src=param_dem_src)
doren.get_requested_species(xls=sp_req_src, sheet=sp_req_sheet, col=column, simplify_names=False)
doren.add_covar(covar_dir=cv_cntr_dir, covar_src=cv_cntr_src, covar_name='country', raster=False, column='SOV_A3')
doren.add_covar(covar_dir=cv_soil_dir, covar_src=cv_soil_src, covar_name='soil_type', nominal=True)
doren.add_yearly_covar(covar_dir=cv_precp_dir, covar_src_basename=cv_precp_src, covar_name='five_yearly_precip')
......@@ -35,21 +38,22 @@ doren.add_yearly_covar(covar_dir=cv_temp_dir, covar_src_basename=cv_temp_src, co
doren.add_eunis()
doren.add_posch(posch_src_dir=param_posch)
doren.get_requested_species(xls=sp_req_src, sheet=sp_req_sheet, col=column, simplify_names=False)
doren.test()
'''
sp = 'Holcus lanatus'
doren.select_plts_w_species(species_name=sp)
for structuurtype in doren.structuurtypes:
doren.nearest_positive_queryd(col='structuurtype', val=structuurtype)
doren.get_bedekking_selected_sp()
doren.write_stuff('plot_covars_file')
doren.write_stuff('species_single_file')
'''
doren.write_stuff('species_list')
doren.write_stuff('typische_soorten')
doren.write_stuff('report')
doren.write_stuff('headers_shp')
# doren.write_stuff('headers_shp')
with open(r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_{}.pkl'.
format(doren.timestamp), 'wb') as handle:
......
......@@ -20,4 +20,4 @@ echo $SLURM_ARRAY_TASK_ID
# Run
cd /home/WUR/roelo008/projs/doren_2019
python run_species.py $SLURM_ARRAY_TASK_ID 50 doren_20201121.pkl
\ No newline at end of file
python run_species.py $SLURM_ARRAY_TASK_ID 50 doren_20201130.pkl
\ No newline at end of file
......@@ -48,4 +48,4 @@ for i, j in enumerate(sel, start=1):
except OSError:
continue
doren.write_stuff('report')
\ No newline at end of file
doren.write_stuff('report')
......@@ -106,6 +106,7 @@ class Doren:
memory_map=True, quotechar='"',
usecols=['PlotObservationID', 'TV2 relevé number', 'Country','Longitude', 'Latitude',
'Date of recording', 'Dataset', 'EUNIS_Old', 'EUNIS_New'])
try:
eva.set_index('PlotObservationID', drop=False, verify_integrity=True, inplace=True)
except ValueError:
......@@ -158,7 +159,7 @@ class Doren:
# Read csv file with requested species and drop empty rows
reqs = pd.read_excel(xls, sheet_name=sheet)
reqs.dropna(subset=[col], inplace=True)
reqs.dropna(subset=[col], inplace=True, axis=0)
# Optionally simplify species names
if simplify_names:
......@@ -371,15 +372,14 @@ class Doren:
self.eva['tot_N_kmol_ha_v'] = self.eva.loc[:, 'totN_kg_ha_v'].divide(14)
# Total NDep differentiated according to plot Open/Forest classification
if hasattr(self.eva, 'openbos'):
f = self.eva.loc[self.eva.openbos == 'bos'].index # Forest
v = self.eva.loc[self.eva.openbos == 'x'].index # Open vegetation
if hasattr(self.eva, 'hooglaag'):
f = self.eva.loc[self.eva.hooglaag == 'hoog'].index # Forest
v = self.eva.loc[self.eva.hooglaag == 'laag'].index # Open vegetation
x = self.eva.index.difference(f.union(v)) # Anders/niet bekend
self.eva.loc[f, 'tot_N_kmol_ha'] = self.eva.loc[f, 'tot_N_kmol_ha_f']
self.eva.loc[v, 'tot_N_kmol_ha'] = self.eva.loc[v, 'tot_N_kmol_ha_v']
self.eva.loc[x, 'tot_N_kmol_ha'] = self.eva.loc[x, 'tot_N_kmol_ha_v'] # Gebruik Open Veg waneer onduidelijk
# TODO: Checken met WW
else:
print('Cannot assign NDep data differenntiated to structuurtype. ')
......@@ -484,23 +484,24 @@ class Doren:
Add doorvertaling van EUNIS type to self.eva, based on vertalingstabellen in Teams
EUNIS --> Structuurtype
EUNIS --> Open/Bos onderscheid
Doorvertaling is onderscheidend naar oude/nieuwe EUNIS_code
:return: extra column to self.eva: eunis_sh
Doorvertaling is onderscheidend naar oude/nieuwe EUNIS_code, gebaseerd op self.eva.eunis_src kolom.
:return: self.eva.eunis_structuurtype
self.eva.openbos
"""
# Read naming conventions and downstream classifcation of EUNIS types
eunis_new_cats = pd.read_excel(r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\EUNIS\EUNIS-classification.xlsx',
sheet_name='EUNIS')
sheet_name='eindlijst opgeschoond')
eunis_old_cats = pd.read_excel(r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\EUNIS\Selectie_oude_EUNIS_typecodes.xlsx',
sheet_name='Sheet1')
sheet_name='eindlijst opgeschoond')
eunis_new_cats.fillna(value='?', inplace=True)
eunis_old_cats.fillna(value='?', inplace=True)
# mappings from old/new eunis types to Wieger Wamelink categories. Also include "?" as key
eunis_new2structuur = {**dict(zip(eunis_new_cats.eunis_code_new, eunis_new_cats.type)), **{'?': '?'}}
eunis_new2bos = {**dict(zip(eunis_new_cats.eunis_code_new, eunis_new_cats.bos_open)), **{'?': '?'}}
eunis_new2structuur = {**dict(zip(eunis_new_cats.eunis_code, eunis_new_cats.type)), **{'?': '?'}}
eunis_new2bos = {**dict(zip(eunis_new_cats.eunis_code, eunis_new_cats.hoog_laag)), **{'?': '?'}}
eunis_old2structuur = {**dict(zip(eunis_old_cats.EUNIS_OLD, eunis_old_cats.type)), **{'?': '?'}}
eunis_old2bos = {**dict(zip(eunis_old_cats.EUNIS_OLD, eunis_old_cats.bos_open)), **{'?': '?'}}
eunis_old2bos = {**dict(zip(eunis_old_cats.EUNIS_OLD, eunis_old_cats.hoog_laag)), **{'?': '?'}}
# Verify that all provided EUNIS codes are covered in one of the translation dictionaries.
missing = set(self.eva.eunis_code).difference(set().union(*[set(eunis_new2structuur.keys()),
......@@ -517,10 +518,10 @@ class Doren:
use_old_indx = self.eva.loc[self.eva.eunis_src == 'eunis_old'].index
self.eva.loc[use_new_indx, 'structuurtype'] = self.eva.loc[use_new_indx, 'eunis_code'].map(eunis_new2structuur)
self.eva.loc[use_old_indx, 'structuurtype'] = self.eva.loc[use_old_indx, 'eunis_code'].map(eunis_old2structuur)
self.eva.loc[use_new_indx, 'openbos'] = self.eva.loc[use_new_indx, 'eunis_code'].map(eunis_new2bos)
self.eva.loc[use_old_indx, 'openbos'] = self.eva.loc[use_old_indx, 'eunis_code'].map(eunis_old2bos)
self.eva.loc[use_new_indx, 'hooglaag'] = self.eva.loc[use_new_indx, 'eunis_code'].map(eunis_new2bos)
self.eva.loc[use_old_indx, 'hooglaag'] = self.eva.loc[use_old_indx, 'eunis_code'].map(eunis_old2bos)
self.eva.structuurtype.fillna('?', inplace=True)
self.eva.openbos.fillna('?', inplace=True)
self.eva.hooglaag.fillna('?', inplace=True)
# Append unique structuurtypes as a list
self.structuurtypes = [x for x in set(self.eva.structuurtype) if x != '?']
......@@ -548,7 +549,7 @@ class Doren:
"""header data of plots containing the species"""
self.sel_species = species_name
self.sel_species_ascii = self.sel_species.replace('ë', 'e').encode('ascii', 'replace')
self.sel_species_ascii = self.sel_species.replace('ë', 'e').encode('ascii', 'replace').decode('UTF-8')
self.positive_plots = set(self.spec.plot_obs_id[self.spec.species_name_hdr == species_name])
self.negative_plots = self.eva.index.difference(self.positive_plots)
......@@ -847,7 +848,7 @@ class Doren:
os.mkdir(pg_dir)
out_cols = ['plot_obs_id', 'tot_N_kmol_ha', 'soil_type_label', 'country_label', 'five_yearly_precip',
'five_yearly_temp', 'eunis_code', 'structuurtype', 'openbos']
'five_yearly_temp', 'eunis_code', 'structuurtype', 'hooglaag']
for col in out_cols:
assert hasattr(self.eva, col), 'Cannot write due to missing column {}'.format(col)
out_df = self.eva.loc[:, out_cols]
......@@ -961,7 +962,7 @@ class Doren:
self.report += 'Testing dataset integrity\n'
# Check for NAs
for check_col in self.status['covars'] + ['plot_obs_id', 'date_of_recording', 'structuurtype', 'eunis_old',
'eunis_new', 'eunis_code', 'openbos', 'plot_coordinates_wgs84']:
'eunis_new', 'eunis_code', 'hooglaag', 'plot_coordinates_wgs84']:
nas = self.eva.loc[:, check_col].isna()
if any(nas):
msg = '{0} -- {1} NA values found (index: {2})\n'.format(check_col, sum(nas), self.eva.loc[nas].index)
......@@ -992,6 +993,16 @@ class Doren:
spec_plots)
self.report += msg
# Assert 1:1 doorvertaling EUNIS types naar structuurtypes
piv = pd.pivot_table(data=self.eva, index='eunis_code', columns='structuurtype', values='plot_obs_id',
aggfunc='count')
piv['stype_count'] = piv.notna().sum(axis=1)
sel = piv.drop(piv.loc[piv.stype_count == 1].index)
if sel.empty:
self.report += 'Alle EUNIS typen vertalen 1:1 door naar een structuurtype.\n'
else:
self.report += 'Sommige EUNIS typen vertalen door naar > 1 structuurtype\n'
self.report += sel.to_csv(sep='\t')
"""
Verify that EUNIS types are translated 1:1 to structuurtypen
HDR 30/11/2020
"""
import pandas as pd
eunis_new = pd.read_excel(
r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\EUNIS\EUNIS-classification.xlsx',
sheet_name='eindlijst opgeschoond')
eunis_old = pd.read_excel(
r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\EUNIS\Selectie_oude_EUNIS_typecodes.xlsx',
sheet_name='eindlijst opgeschoond')
# NO NAs
assert eunis_new.drop('tweede type', axis=1).isna().sum(axis=0).sum() == 0
assert eunis_old.drop('tweede type', axis=1).isna().sum(axis=0).sum() == 0
# NEW EUNIS code to Structuurtype
piv = pd.pivot_table(data=eunis_new, index='eunis_code', columns='type', values='hoog_laag', aggfunc='count')
piv['stype_count'] = piv.notna().sum(axis=1)
assert piv.drop(piv.loc[piv.stype_count == 1].index).empty
# OLD EUNIS code to Structuurtype
piv = pd.pivot_table(data=eunis_old, index='EUNIS_OLD', columns='type', values='hoog_laag', aggfunc='count')
piv['stype_count'] = piv.notna().sum(axis=1)
assert piv.drop(piv.loc[piv.stype_count == 1].index).empty
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment