Skip to content
Snippets Groups Projects
Commit 7e36f4fd authored by roelo008's avatar roelo008
Browse files

Added country as covariable

parent 572d3719
Branches
No related tags found
No related merge requests found
......@@ -6,11 +6,11 @@ doren = dc.Doren(header_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projec
doren.initiate(sample=False)
doren.apply_requirements('req1', 'req2', 'req3', 'req8', 'req9', 'req10',
doren.apply_requirements('req1', 'req2', 'req3', 'req4', 'req8', 'req9', 'req10',
aoi_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\geodata\eva_aoi_fin_3035.shp',
dem_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\DEM\DTM_3035.tif')
doren.add_posch(posch_src_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\POSCH_dep')
doren.add_posch(posch_src_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\POSCH_dep\v2')
doren.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\soil\b_processed',
covar_src='WRBLEV1_laea.tif', covar_name='soil_type', nominal=True)
......@@ -18,18 +18,22 @@ doren.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects
doren.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\DEM',
covar_src='DTM_3035.tif', covar_name='elevation')
doren.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\b_geodata\eur_cntrs_3035',
covar_src='esri_eur_cntrs_3035.shp', covar_name='country', raster=False, column='CNTRYNAME')
doren.add_yearly_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled',
covar_src_basename="EObs_v200e_rr_5yrmean", covar_name='5_yearly_precip')
doren.add_yearly_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled',
covar_src_basename="EObs_v200e_tg_5yrmean", covar_name='5_yearly_temp')
doren.write_stuff('report')
doren.filter_by_buffer(species_name='Calluna vulgaris', buffer_size=50000)
doren.write_stuff('buffer_png')
doren.write_stuff('headers_PG')
doren.filter_by_buffer(species_name='Rumex acetosa', buffer_size=25000)
doren.write_stuff('buffer_png')
doren.write_stuff('headers_PG')
doren.write_stuff('report')
......@@ -27,7 +27,8 @@ class Doren:
self.eva = None # holder for header df
self.spec = None # holder for species df
self.species = None # all unique species and their frequency
self.species_nrs = None # dictionary mapping species name to number
self.species2nr = None # dictionary mapping species name to number
self.nr2species = None # reversed dictionary
self.positive_plots = None # holder for df with plots containing a certain species
self.nearby_plots = None # holder for df with plots within Xm buffer of self.positive_plots
self.buffer_gdf = None # holder for geodataframe w. buffers
......@@ -51,6 +52,20 @@ class Doren:
:return: self.eva and self species populated with pandas dataframes
"""
# Read EVA species data
species = pd.read_csv(self.sp_src, sep='\t')
colnames_n = ['plot_obs_id', 'taxonomy', 'taxon_group', 'taxon_group_id', 'turboveg2_concept',
'matched_concept', 'match', 'layer', 'cover_perc', 'cover_code', 'species_name_hdr']
species.rename(columns=dict(zip(list(species), colnames_n)), inplace=True)
species['species_name_hdr'] = species.turboveg2_concept.apply(do.strip_leading_quote)
species['species_name_hdr'] = species.species_name_hdr.apply(do.simplify_species)
self.spec = species
# Unique species
self.species = set(self.spec.species_name_hdr)
self.species2nr = {v: i for i, v in enumerate(pd.Series(list(self.species)).sort_values())}
self.nr2species = {v: k for k, v in self.species2nr.items()}
# Read EVA Header data
eva = pd.read_csv(self.header_src, comment='#', sep='\t', low_memory=False, index_col='PlotObservationID')
if sample:
......@@ -67,16 +82,6 @@ class Doren:
self.eva = gp.GeoDataFrame(eva, geometry='plot_coordinates_3035', crs={"init": "epsg:3035"})
del eva
# Read EVA species data
species = pd.read_csv(self.sp_src, sep='\t')
colnames_n = ['plot_obs_id', 'taxonomy', 'taxon_group', 'taxon_group_id', 'turboveg2_concept',
'matched_concept', 'match', 'layer', 'cover_perc', 'cover_code', 'species_name_hdr']
species.rename(columns=dict(zip(list(species), colnames_n)), inplace=True)
species['species_name_hdr'] = species.turboveg2_concept.apply(do.strip_leading_quote)
species['species_name_hdr'] = species.species_name_hdr.apply(do.simplify_species)
self.species_nrs = {v: i for i,v in enumerate(species.species_name_hdr)}
self.spec = species
self.update_status()
self.report += 'Starting @ {0} with {1} EVA headers containing {2} unique ' \
'species.\n\n'.format(self.timestamp_full, self.status['n_plots'], self.status['n_species'])
......@@ -92,7 +97,7 @@ class Doren:
'''Apply wich requirements?'''
requirements = [req for req in reqs]
# Additional data sources. TODO: double check if provided if corresponding requirement is requested
# Additional data sources.
aoi_src = kwargs.get('aoi_src', None) # AOI as provided or None
dem_src = kwargs.get('dem_src', None) # DEM source
......@@ -113,25 +118,35 @@ class Doren:
possible_requirements = {}
if 'req1' in requirements:
possible_requirements['req1'] = self.eva.loc[self.eva.latitude.isna(), :].index
if 'req2' in requirements:
possible_requirements['req2'] = self.eva.loc[self.eva.longitude.isna(), :].index
if 'req3' in requirements:
possible_requirements['req3'] = self.eva.index.difference(gp.sjoin(left_df=self.eva, right_df=do.get_aoi(aoi_src), how='inner',
possible_requirements['req3'] = self.eva.index.difference(gp.sjoin(left_df=self.eva,
right_df=do.get_aoi(aoi_src), how='inner',
op='within', lsuffix='eva_', rsuffix='aoi_').index)
if 'req4' in requirements:
possible_requirements['req4'] = do.get_raster_vals(coords=self.eva.plot_coordinates_3035.loc[self.eva.longitude.notna() &
self.eva.latitude.notna()],
rast_src=dem_src).query('vals.isna() or vals > 500').index
rast_src=dem_src).query('vals.isna() or vals > 500').index
if 'req5' in requirements:
possible_requirements['req5'] = self.eva.loc[self.eva.altitude_m.isna(), :].index,
if 'req6' in requirements:
possible_requirements['req6'] = self.eva.loc[(self.eva['eunis'] == '?'), :].index
if 'req7' in requirements:
possible_requirements['req7'] = self.eva.loc[self.eva.eunis.isna(), :].index
if 'req8' in requirements:
possible_requirements["req8"] = self.eva.loc[self.eva.date_of_recording.isna(), :].index
if 'req9' in requirements:
possible_requirements["req9"] = self.eva.loc[self.eva['year'] < 1950, :].index
if 'req10' in requirements:
possible_requirements["req10"] = self.eva.index.difference(set(self.spec.plot_obs_id))
......@@ -156,9 +171,9 @@ class Doren:
'''
try:
nh3 = pd.read_csv(os.path.join(posch_src_dir, 'NH3-20200331.csv'), sep=',', comment='!',
nh3 = pd.read_csv(os.path.join(posch_src_dir, 'NH3-20200505.csv'), sep=',', comment='!',
index_col='plot_obs_id')
nox = pd.read_csv(os.path.join(posch_src_dir, 'NOx-20200331.csv'), sep=',', comment='!',
nox = pd.read_csv(os.path.join(posch_src_dir, 'NOx-20200505.csv'), sep=',', comment='!',
index_col='plot_obs_id')
except OSError:
print('Ndeposition data not found in {0}'.format(posch_src_dir))
......@@ -176,44 +191,57 @@ class Doren:
c2 = mapper['plot_obs_id'].isin(nh3.index)
c1 = mapper['year'].isin(nh3.columns)
mapper = mapper.loc[c1 & c2]
self.eva.loc[c2 & c1, 'nh3_mg/m2'] = nh3.lookup(mapper['plot_obs_id'], mapper['year'])
self.eva['nh3_mg/m2'] = self.eva['nh3_mg/m2'].fillna(-99)
self.eva.loc[c2 & c1, 'nh3_mg_m2'] = nh3.lookup(mapper['plot_obs_id'], mapper['year'])
# NOx
mapper = self.eva.assign(year=('year_' + self.eva['year'].astype(str)), plot_obs_id=self.eva.index)
c2 = mapper['plot_obs_id'].isin(nox.index)
c1 = mapper['year'].isin(nox.columns)
mapper = mapper.loc[c1 & c2]
self.eva.loc[c2 & c1, 'nox_mg/m2'] = nox.lookup(mapper['plot_obs_id'], mapper['year'])
self.eva['nox_mg/m2'] = self.eva['nox_mg/m2'].fillna(-99)
self.eva.loc[c2 & c1, 'nox_mg_m2'] = nox.lookup(mapper['plot_obs_id'], mapper['year'])
self.eva.dropna(axis=0, subset=['nh3_mg_m2', 'nox_mg_m2'], how='any', inplace=True)
# Totals in various units'''
self.eva['totN_mg/m2'] = self.eva.loc[:, ["nh3_mg/m2", "nox_mg/m2"]].sum(axis=1)
self.eva['totN_kg/ha'] = self.eva.loc[:, 'totN_mg/m2'].divide(100)
self.eva['totN_mol/ha'] = self.eva.loc[:, 'totN_kg/ha'].divide(14)
self.eva['totN_mg_m2'] = self.eva.loc[:, ["nh3_mg_m2", "nox_mg_m2"]].sum(axis=1)
self.eva['totN_kg_ha'] = self.eva.loc[:, 'totN_mg_m2'].divide(100)
self.eva['totN_mol_ha'] = self.eva.loc[:, 'totN_kg_ha'].divide(14)
self.update_status()
self.report += 'Added NDep from POSCH: {0} rows remaining.\n'.format(self.eva.shape[0])
def add_covar(self, covar_dir, covar_src, covar_name, nominal=False, keep_all=False):
'''
def add_covar(self, covar_dir, covar_src, covar_name, nominal=False, keep_all=False, raster=True, **kwargs):
"""
Sample values from a geospatial raster and add as new column
:param covar_dir: directory containing the raster
:param covar_src: *.tif geospatial raster with CRS assumed to be EPSG::3035
:param covar_name: name as to apepear in the column
:param nominal: boolean: nominal raster or not, default = False
:param keep_all: boolean: join how = left if True else 'inner'
:param raster: boolean, if True covar comes from a Raster datasource, if False from polygon. Def True
:return: updated self.eva with column covar name
'''
"""
# Get raster values based on 3035 easting-northing
cov_df = do.get_raster_vals(coords=self.eva.plot_coordinates_3035,
rast_src=os.path.join(covar_dir, covar_src), nominal=nominal)
if not os.path.exists(os.path.join(covar_dir, covar_src)):
raise OSError('Covariable does not exist: {0}'.format(os.path.join(covar_dir, covar_src)))
# translate raster values to categories if nominal
if nominal:
nominal_dict = do.get_raster_nominals(covar_name)
cov_df['label'] = cov_df.vals.map(nominal_dict)
if raster:
# Get raster values based on 3035 easting-northing
cov_df = do.get_raster_vals(coords=self.eva.plot_coordinates_3035,
rast_src=os.path.join(covar_dir, covar_src), nominal=nominal)
# translate raster values to categories if nominal
if nominal:
nominal_dict = do.get_raster_nominals(covar_name)
cov_df['label'] = cov_df.vals.map(nominal_dict)
else:
# Get the values from a column in a polygon shapefile
column = kwargs.get('column', None)
covar_gdf = gp.read_file(os.path.join(covar_dir, covar_src))
joined = gp.sjoin(left_df=self.eva, right_df=covar_gdf, how='left', op='within', lsuffix='eva_',
rsuffix='covar_')
cov_df = pd.DataFrame(data={'label': joined.loc[:, column]}, index=self.eva.index)
# drop NA, rename columns to reflect covariable name
cov_df.dropna(axis=0, how='any', inplace=True)
......@@ -280,9 +308,13 @@ class Doren:
nearby_plots = gp.sjoin(left_df=self.eva.loc[plots_wo_species], right_df=buff_gdf, op='within',
how='inner').index
msg = 'Found {0} plots w/o {1} within {2}m radius of {3} ' \
'positive plots containing\n\n'.format(len(nearby_plots), species_name, buffer_size,
len(plots_w_species), species_name)
self.report += msg
if self.verbose:
print('Found {0} plots w/o {1} within {2}m radius of {3} '
'positive plots'.format(len(nearby_plots), species_name, buffer_size, len(plots_w_species)))
print(msg)
self.buffer_gdf = buff_gdf
self.positive_plots = plots_w_species
......@@ -309,11 +341,14 @@ class Doren:
csv_out_name = '{0}_{1}_Species_Inventory.csv'.format(self.basename, self.timestamp)
sp_inv = pd.pivot_table(data=self.spec, index='species_name_hdr', values='plot_obs_id', aggfunc='count')
sp_inv['species_nr'] = sp_inv.index.maps(self.species_nrs).tolist()
sp_inv['species_nr'] = sp_inv.index.map(self.species2nr).tolist()
sp_inv.rename({'plot_obs_id': 'count'}, inplace=True)
sp_inv.sort_values(by='plob_obs_id', ascending=False).to_csv(os.path.join(csv_out_dir, csv_out_name),
sep=';', index=True)
self.report += 'Written Species list 2 file: {0}\n\n'.format(os.path.join(csv_out_dir, csv_out_name))
elif what == 'report':
'''Write processing report'''
......@@ -328,6 +363,8 @@ class Doren:
f.write(source)
f.write(footer)
self.report += 'Written processing report to file: {0}\n\n'.format(os.path.join(out_dir,report_name))
elif what == 'eva_headers':
csv_out_dir = os.path.join(out_dir, 'csv')
......@@ -338,11 +375,32 @@ class Doren:
self.eva.drop(labels=['plot_coordinates_wgs84', 'plot_coordinates_3035', "plot_point_wgs84"], axis=1). \
to_csv(os.path.join(out_dir, headers_name), sep=';')
# TODO: headers to Shapefile
self.report += 'Written EVA headers to file: {0}\n\n'.format(os.path.join(csv_out_dir, headers_name))
elif what == 'headers_ndep':
elif what == 'headers_PG':
'''Write headers with NDep specifically for Paul G'''
# TODO
pg_dir = os.path.join(out_dir, 'pg_input')
if not os.path.isdir(pg_dir):
os.mkdir(pg_dir)
pg_out_name_1 = '{0}_{1}_PGInput_NDep_{2}_{3}.txt'.format(self.basename, self.timestamp,
self.buffer_species, self.buffer_size)
pg_out_name_2 = '{0}_{1}_PGInput_Species_{2}_{3}.txt'.format(self.basename, self.timestamp,
self.buffer_species, self.buffer_size)
# First file with plot headers and NDep, sorted by plot header ID
self.eva.loc[self.positive_plots.union(self.nearby_plots),
['totN_mol_ha']].sort_index(axis=0, ascending=True)\
.to_csv(os.path.join(pg_dir, pg_out_name_1), sep='\t', index=True)
sp_sel = self.spec.loc[self.spec.plot_obs_id.isin(list(self.positive_plots.union(self.nearby_plots))),
['plot_obs_id', 'species_name_hdr']]
sp_sel['species_nr'] = sp_sel.species_name_hdr.map(self.species2nr).tolist()
sp_sel.loc[:, ['plot_obs_id', 'species_nr']].sort_values(by='species_nr', ascending=True)\
.to_csv(os.path.join(pg_dir, pg_out_name_2), sep='\t', index=False)
self.report += 'Written FORTRAN input to files: {0} {1}\n\n'.format(os.path.join(pg_dir, pg_out_name_1),
os.path.join(pg_dir, pg_out_name_2))
elif what == 'headers_shp':
......@@ -354,6 +412,8 @@ class Doren:
self.eva.drop(labels=['plot_coordinates_wgs84', 'plot_point_wgs84'], axis=1) \
.to_file(os.path.join(shp_out_dir, shp_out_name))
self.report += 'Written EVA headers to Shapefile: {0}\n\n'.format(os.path.join(shp_out_dir, shp_out_name))
elif what == 'buffer_png':
'''Draw map of Europe with positive plots, buffers and nearby plots'''
......@@ -397,6 +457,8 @@ class Doren:
fig.savefig(os.path.join(png_out_dir, png_out_name))
plt.close()
self.report += 'Written Buffer image to : {0}\n\n'.format(os.path.join(png_out_dir, png_out_name))
else:
print('{0} is not a valid reporting request'.format(what))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment