Commit 7e36f4fd authored by roelo008's avatar roelo008
Browse files

Added country as covariable

parent 572d3719
......@@ -6,11 +6,11 @@ doren = dc.Doren(header_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projec
doren.initiate(sample=False)
doren.apply_requirements('req1', 'req2', 'req3', 'req8', 'req9', 'req10',
doren.apply_requirements('req1', 'req2', 'req3', 'req4', 'req8', 'req9', 'req10',
aoi_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\geodata\eva_aoi_fin_3035.shp',
dem_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\DEM\DTM_3035.tif')
doren.add_posch(posch_src_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\POSCH_dep')
doren.add_posch(posch_src_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\POSCH_dep\v2')
doren.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\soil\b_processed',
covar_src='WRBLEV1_laea.tif', covar_name='soil_type', nominal=True)
......@@ -18,18 +18,22 @@ doren.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects
doren.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\DEM',
covar_src='DTM_3035.tif', covar_name='elevation')
doren.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\b_geodata\eur_cntrs_3035',
covar_src='esri_eur_cntrs_3035.shp', covar_name='country', raster=False, column='CNTRYNAME')
doren.add_yearly_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled',
covar_src_basename="EObs_v200e_rr_5yrmean", covar_name='5_yearly_precip')
doren.add_yearly_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled',
covar_src_basename="EObs_v200e_tg_5yrmean", covar_name='5_yearly_temp')
doren.write_stuff('report')
doren.filter_by_buffer(species_name='Calluna vulgaris', buffer_size=50000)
doren.write_stuff('buffer_png')
doren.write_stuff('headers_PG')
doren.filter_by_buffer(species_name='Rumex acetosa', buffer_size=25000)
doren.write_stuff('buffer_png')
doren.write_stuff('headers_PG')
doren.write_stuff('report')
......@@ -27,7 +27,8 @@ class Doren:
self.eva = None # holder for header df
self.spec = None # holder for species df
self.species = None # all unique species and their frequency
self.species_nrs = None # dictionary mapping species name to number
self.species2nr = None # dictionary mapping species name to number
self.nr2species = None # reversed dictionary
self.positive_plots = None # holder for df with plots containing a certain species
self.nearby_plots = None # holder for df with plots within Xm buffer of self.positive_plots
self.buffer_gdf = None # holder for geodataframe w. buffers
......@@ -51,6 +52,20 @@ class Doren:
:return: self.eva and self species populated with pandas dataframes
"""
# Read EVA species data
species = pd.read_csv(self.sp_src, sep='\t')
colnames_n = ['plot_obs_id', 'taxonomy', 'taxon_group', 'taxon_group_id', 'turboveg2_concept',
'matched_concept', 'match', 'layer', 'cover_perc', 'cover_code', 'species_name_hdr']
species.rename(columns=dict(zip(list(species), colnames_n)), inplace=True)
species['species_name_hdr'] = species.turboveg2_concept.apply(do.strip_leading_quote)
species['species_name_hdr'] = species.species_name_hdr.apply(do.simplify_species)
self.spec = species
# Unique species
self.species = set(self.spec.species_name_hdr)
self.species2nr = {v: i for i, v in enumerate(pd.Series(list(self.species)).sort_values())}
self.nr2species = {v: k for k, v in self.species2nr.items()}
# Read EVA Header data
eva = pd.read_csv(self.header_src, comment='#', sep='\t', low_memory=False, index_col='PlotObservationID')
if sample:
......@@ -67,16 +82,6 @@ class Doren:
self.eva = gp.GeoDataFrame(eva, geometry='plot_coordinates_3035', crs={"init": "epsg:3035"})
del eva
# Read EVA species data
species = pd.read_csv(self.sp_src, sep='\t')
colnames_n = ['plot_obs_id', 'taxonomy', 'taxon_group', 'taxon_group_id', 'turboveg2_concept',
'matched_concept', 'match', 'layer', 'cover_perc', 'cover_code', 'species_name_hdr']
species.rename(columns=dict(zip(list(species), colnames_n)), inplace=True)
species['species_name_hdr'] = species.turboveg2_concept.apply(do.strip_leading_quote)
species['species_name_hdr'] = species.species_name_hdr.apply(do.simplify_species)
self.species_nrs = {v: i for i,v in enumerate(species.species_name_hdr)}
self.spec = species
self.update_status()
self.report += 'Starting @ {0} with {1} EVA headers containing {2} unique ' \
'species.\n\n'.format(self.timestamp_full, self.status['n_plots'], self.status['n_species'])
......@@ -92,7 +97,7 @@ class Doren:
'''Apply wich requirements?'''
requirements = [req for req in reqs]
# Additional data sources. TODO: double check if provided if corresponding requirement is requested
# Additional data sources.
aoi_src = kwargs.get('aoi_src', None) # AOI as provided or None
dem_src = kwargs.get('dem_src', None) # DEM source
......@@ -113,25 +118,35 @@ class Doren:
possible_requirements = {}
if 'req1' in requirements:
possible_requirements['req1'] = self.eva.loc[self.eva.latitude.isna(), :].index
if 'req2' in requirements:
possible_requirements['req2'] = self.eva.loc[self.eva.longitude.isna(), :].index
if 'req3' in requirements:
possible_requirements['req3'] = self.eva.index.difference(gp.sjoin(left_df=self.eva, right_df=do.get_aoi(aoi_src), how='inner',
possible_requirements['req3'] = self.eva.index.difference(gp.sjoin(left_df=self.eva,
right_df=do.get_aoi(aoi_src), how='inner',
op='within', lsuffix='eva_', rsuffix='aoi_').index)
if 'req4' in requirements:
possible_requirements['req4'] = do.get_raster_vals(coords=self.eva.plot_coordinates_3035.loc[self.eva.longitude.notna() &
self.eva.latitude.notna()],
rast_src=dem_src).query('vals.isna() or vals > 500').index
rast_src=dem_src).query('vals.isna() or vals > 500').index
if 'req5' in requirements:
possible_requirements['req5'] = self.eva.loc[self.eva.altitude_m.isna(), :].index,
if 'req6' in requirements:
possible_requirements['req6'] = self.eva.loc[(self.eva['eunis'] == '?'), :].index
if 'req7' in requirements:
possible_requirements['req7'] = self.eva.loc[self.eva.eunis.isna(), :].index
if 'req8' in requirements:
possible_requirements["req8"] = self.eva.loc[self.eva.date_of_recording.isna(), :].index
if 'req9' in requirements:
possible_requirements["req9"] = self.eva.loc[self.eva['year'] < 1950, :].index
if 'req10' in requirements:
possible_requirements["req10"] = self.eva.index.difference(set(self.spec.plot_obs_id))
......@@ -156,9 +171,9 @@ class Doren:
'''
try:
nh3 = pd.read_csv(os.path.join(posch_src_dir, 'NH3-20200331.csv'), sep=',', comment='!',
nh3 = pd.read_csv(os.path.join(posch_src_dir, 'NH3-20200505.csv'), sep=',', comment='!',
index_col='plot_obs_id')
nox = pd.read_csv(os.path.join(posch_src_dir, 'NOx-20200331.csv'), sep=',', comment='!',
nox = pd.read_csv(os.path.join(posch_src_dir, 'NOx-20200505.csv'), sep=',', comment='!',
index_col='plot_obs_id')
except OSError:
print('Ndeposition data not found in {0}'.format(posch_src_dir))
......@@ -176,44 +191,57 @@ class Doren:
c2 = mapper['plot_obs_id'].isin(nh3.index)
c1 = mapper['year'].isin(nh3.columns)
mapper = mapper.loc[c1 & c2]
self.eva.loc[c2 & c1, 'nh3_mg/m2'] = nh3.lookup(mapper['plot_obs_id'], mapper['year'])
self.eva['nh3_mg/m2'] = self.eva['nh3_mg/m2'].fillna(-99)
self.eva.loc[c2 & c1, 'nh3_mg_m2'] = nh3.lookup(mapper['plot_obs_id'], mapper['year'])
# NOx
mapper = self.eva.assign(year=('year_' + self.eva['year'].astype(str)), plot_obs_id=self.eva.index)
c2 = mapper['plot_obs_id'].isin(nox.index)
c1 = mapper['year'].isin(nox.columns)
mapper = mapper.loc[c1 & c2]
self.eva.loc[c2 & c1, 'nox_mg/m2'] = nox.lookup(mapper['plot_obs_id'], mapper['year'])
self.eva['nox_mg/m2'] = self.eva['nox_mg/m2'].fillna(-99)
self.eva.loc[c2 & c1, 'nox_mg_m2'] = nox.lookup(mapper['plot_obs_id'], mapper['year'])
self.eva.dropna(axis=0, subset=['nh3_mg_m2', 'nox_mg_m2'], how='any', inplace=True)
# Totals in various units'''
self.eva['totN_mg/m2'] = self.eva.loc[:, ["nh3_mg/m2", "nox_mg/m2"]].sum(axis=1)
self.eva['totN_kg/ha'] = self.eva.loc[:, 'totN_mg/m2'].divide(100)
self.eva['totN_mol/ha'] = self.eva.loc[:, 'totN_kg/ha'].divide(14)
self.eva['totN_mg_m2'] = self.eva.loc[:, ["nh3_mg_m2", "nox_mg_m2"]].sum(axis=1)
self.eva['totN_kg_ha'] = self.eva.loc[:, 'totN_mg_m2'].divide(100)
self.eva['totN_mol_ha'] = self.eva.loc[:, 'totN_kg_ha'].divide(14)
self.update_status()
self.report += 'Added NDep from POSCH: {0} rows remaining.\n'.format(self.eva.shape[0])
def add_covar(self, covar_dir, covar_src, covar_name, nominal=False, keep_all=False):
'''
def add_covar(self, covar_dir, covar_src, covar_name, nominal=False, keep_all=False, raster=True, **kwargs):
"""
Sample values from a geospatial raster and add as new column
:param covar_dir: directory containing the raster
:param covar_src: *.tif geospatial raster with CRS assumed to be EPSG::3035
:param covar_name: name as to apepear in the column
:param nominal: boolean: nominal raster or not, default = False
:param keep_all: boolean: join how = left if True else 'inner'
:param raster: boolean, if True covar comes from a Raster datasource, if False from polygon. Def True
:return: updated self.eva with column covar name
'''
"""
# Get raster values based on 3035 easting-northing
cov_df = do.get_raster_vals(coords=self.eva.plot_coordinates_3035,
rast_src=os.path.join(covar_dir, covar_src), nominal=nominal)
if not os.path.exists(os.path.join(covar_dir, covar_src)):
raise OSError('Covariable does not exist: {0}'.format(os.path.join(covar_dir, covar_src)))
# translate raster values to categories if nominal
if nominal:
nominal_dict = do.get_raster_nominals(covar_name)
cov_df['label'] = cov_df.vals.map(nominal_dict)
if raster:
# Get raster values based on 3035 easting-northing
cov_df = do.get_raster_vals(coords=self.eva.plot_coordinates_3035,
rast_src=os.path.join(covar_dir, covar_src), nominal=nominal)
# translate raster values to categories if nominal
if nominal:
nominal_dict = do.get_raster_nominals(covar_name)
cov_df['label'] = cov_df.vals.map(nominal_dict)
else:
# Get the values from a column in a polygon shapefile
column = kwargs.get('column', None)
covar_gdf = gp.read_file(os.path.join(covar_dir, covar_src))
joined = gp.sjoin(left_df=self.eva, right_df=covar_gdf, how='left', op='within', lsuffix='eva_',
rsuffix='covar_')
cov_df = pd.DataFrame(data={'label': joined.loc[:, column]}, index=self.eva.index)
# drop NA, rename columns to reflect covariable name
cov_df.dropna(axis=0, how='any', inplace=True)
......@@ -280,9 +308,13 @@ class Doren:
nearby_plots = gp.sjoin(left_df=self.eva.loc[plots_wo_species], right_df=buff_gdf, op='within',
how='inner').index
msg = 'Found {0} plots w/o {1} within {2}m radius of {3} ' \
'positive plots containing\n\n'.format(len(nearby_plots), species_name, buffer_size,
len(plots_w_species), species_name)
self.report += msg
if self.verbose:
print('Found {0} plots w/o {1} within {2}m radius of {3} '
'positive plots'.format(len(nearby_plots), species_name, buffer_size, len(plots_w_species)))
print(msg)
self.buffer_gdf = buff_gdf
self.positive_plots = plots_w_species
......@@ -309,11 +341,14 @@ class Doren:
csv_out_name = '{0}_{1}_Species_Inventory.csv'.format(self.basename, self.timestamp)
sp_inv = pd.pivot_table(data=self.spec, index='species_name_hdr', values='plot_obs_id', aggfunc='count')
sp_inv['species_nr'] = sp_inv.index.maps(self.species_nrs).tolist()
sp_inv['species_nr'] = sp_inv.index.map(self.species2nr).tolist()
sp_inv.rename({'plot_obs_id': 'count'}, inplace=True)
sp_inv.sort_values(by='plob_obs_id', ascending=False).to_csv(os.path.join(csv_out_dir, csv_out_name),
sep=';', index=True)
self.report += 'Written Species list 2 file: {0}\n\n'.format(os.path.join(csv_out_dir, csv_out_name))
elif what == 'report':
'''Write processing report'''
......@@ -328,6 +363,8 @@ class Doren:
f.write(source)
f.write(footer)
self.report += 'Written processing report to file: {0}\n\n'.format(os.path.join(out_dir,report_name))
elif what == 'eva_headers':
csv_out_dir = os.path.join(out_dir, 'csv')
......@@ -338,11 +375,32 @@ class Doren:
self.eva.drop(labels=['plot_coordinates_wgs84', 'plot_coordinates_3035', "plot_point_wgs84"], axis=1). \
to_csv(os.path.join(out_dir, headers_name), sep=';')
# TODO: headers to Shapefile
self.report += 'Written EVA headers to file: {0}\n\n'.format(os.path.join(csv_out_dir, headers_name))
elif what == 'headers_ndep':
elif what == 'headers_PG':
'''Write headers with NDep specifically for Paul G'''
# TODO
pg_dir = os.path.join(out_dir, 'pg_input')
if not os.path.isdir(pg_dir):
os.mkdir(pg_dir)
pg_out_name_1 = '{0}_{1}_PGInput_NDep_{2}_{3}.txt'.format(self.basename, self.timestamp,
self.buffer_species, self.buffer_size)
pg_out_name_2 = '{0}_{1}_PGInput_Species_{2}_{3}.txt'.format(self.basename, self.timestamp,
self.buffer_species, self.buffer_size)
# First file with plot headers and NDep, sorted by plot header ID
self.eva.loc[self.positive_plots.union(self.nearby_plots),
['totN_mol_ha']].sort_index(axis=0, ascending=True)\
.to_csv(os.path.join(pg_dir, pg_out_name_1), sep='\t', index=True)
sp_sel = self.spec.loc[self.spec.plot_obs_id.isin(list(self.positive_plots.union(self.nearby_plots))),
['plot_obs_id', 'species_name_hdr']]
sp_sel['species_nr'] = sp_sel.species_name_hdr.map(self.species2nr).tolist()
sp_sel.loc[:, ['plot_obs_id', 'species_nr']].sort_values(by='species_nr', ascending=True)\
.to_csv(os.path.join(pg_dir, pg_out_name_2), sep='\t', index=False)
self.report += 'Written FORTRAN input to files: {0} {1}\n\n'.format(os.path.join(pg_dir, pg_out_name_1),
os.path.join(pg_dir, pg_out_name_2))
elif what == 'headers_shp':
......@@ -354,6 +412,8 @@ class Doren:
self.eva.drop(labels=['plot_coordinates_wgs84', 'plot_point_wgs84'], axis=1) \
.to_file(os.path.join(shp_out_dir, shp_out_name))
self.report += 'Written EVA headers to Shapefile: {0}\n\n'.format(os.path.join(shp_out_dir, shp_out_name))
elif what == 'buffer_png':
'''Draw map of Europe with positive plots, buffers and nearby plots'''
......@@ -397,6 +457,8 @@ class Doren:
fig.savefig(os.path.join(png_out_dir, png_out_name))
plt.close()
self.report += 'Written Buffer image to : {0}\n\n'.format(os.path.join(png_out_dir, png_out_name))
else:
print('{0} is not a valid reporting request'.format(what))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment