Commit d7f3f8c1 authored by roelo008's avatar roelo008
Browse files

More classs methods

parent 8c4cb55e
from utils import doren_classes as dc
eva = dc.Doren(header_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\EVA\EVA_Doren_header.csv',
sp_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\EVA\EVA_Doren_species.csv',
verbose=True)
eva.initiate(sample=True)
eva.apply_requirements('req1', 'req2', 'req10')
eva.add_posch(posch_src_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\POSCH_dep')
eva.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\soil\b_processed',
covar_src='WRBLEV1_laea.tif', covar_name='soil_type', nominal=True)
eva.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\DEM',
covar_src='DTM_3035.tif', covar_name='elevation')
eva.add_yearly_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled',
covar_src_basename="EObs_v200e_rr_5yrmean", covar_name='5_yearly_temp')
eva.add_yearly_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled',
covar_src_basename="EObs_v200e_tg_5yrmean", covar_name='5_yearly_precip')
eva.filter_by_buffer(species_name='Rumex acetosa', buffer_size=50000)
doren = dc.Doren(header_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\EVA\EVA_Doren_header.csv',
sp_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\EVA\EVA_Doren_species.csv',
verbose=True)
doren.initiate(sample=True)
doren.apply_requirements('req1', 'req2', 'req3', 'req4', 'req8', 'req9', 'req10',
aoi_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\geodata\eva_aoi_fin_3035.shp',
dem_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\DEM\DTM_3035.tif')
doren.add_posch(posch_src_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\POSCH_dep')
doren.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\soil\b_processed',
covar_src='WRBLEV1_laea.tif', covar_name='soil_type', nominal=True)
doren.add_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\DEM',
covar_src='DTM_3035.tif', covar_name='elevation')
doren.add_yearly_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled',
covar_src_basename="EObs_v200e_rr_5yrmean", covar_name='5_yearly_precip')
doren.add_yearly_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled',
covar_src_basename="EObs_v200e_tg_5yrmean", covar_name='5_yearly_temp')
doren.report('report')
doren.filter_by_buffer(species_name='Calluna vulgaris', buffer_size=50000)
doren.report('buffer_png')
doren.filter_by_buffer(species_name='Rumex acetosa', buffer_size=25000)
doren.report('buffer_png')
......@@ -349,8 +349,8 @@ def get_raster_vals(coords, rast_src, nominal=False):
"""
if not os.path.isfile(rast_src):
print('{0} does not exists as valid rastersource'.format(rast_src))
raise
raise Exception('{0} does not exists as valid rastersource'.format(rast_src))
interpolation = 'nearest' if nominal else 'bilinear'
......
......@@ -8,7 +8,8 @@ import pandas as pd
import numpy as np
import geopandas as gp
import datetime
import pickle
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import shapely
from utils import doren as do
......@@ -25,13 +26,24 @@ class Doren:
self.sp_src = sp_src # path to EVA species source data
self.eva = None # holder for header df
self.spec = None # holder for species df
self.species = None # all unique species and their frequency
self.species_nrs = None # dictionary mapping species name to number
self.positive_plots = None # holder for df with plots containing a certain species
self.nearby_plots = None # holder for df with plots within Xm buffer of self.positive_plots
self.species = set() # all unique species
self.status = {'n_plots': 0, 'columns': [], 'n_species': 0} # relevant stats
self.report = '' # tracking everything in a report
self.out_dir = '' # where to write stuff?
self.buffer_gdf = None # holder for geodataframe w. buffers
self.buffer_species = None # holder for a species
self.buffer_size = None # holder for buffer size
self.status = {'n_plots': 0, # relevant stats
'columns': [],
'n_species': 0}
self.basename = 'DOREN' # basename for all output
self.verbose = verbose # boolean, report on progress?
self.report = '' # tracking everything in a report
self.base_out_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\scratch'
self.background_shp = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\geodata\europe_3035.shp'
self.timestamp = datetime.datetime.now().strftime("%Y%m%d")
self.timestamp_full = datetime.datetime.now().strftime("%Y%m%d%H%M")
self.aoi_shp = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\geodata\eva_aoi_fin_3035.shp'
def initiate(self, sample=False):
"""
......@@ -48,8 +60,9 @@ class Doren:
eva.longitude = pd.to_numeric(eva.longitude, errors='coerce', downcast='float')
eva['eunis_top'] = eva.apply(lambda row: do.first_letter(row.eunis), axis=1)
eva['veg_type'] = eva.apply(lambda row: do.eunis_to_veg(row.eunis), axis=1)
eva['year'] = eva.apply(lambda row: do.int_year(row.date_of_recording), axis=1)
eva['year'] = eva.date_of_recording.fillna(0).astype(np.uint16)
eva['plot_coordinates_wgs84'] = list(zip(eva.longitude, eva.latitude))
eva['plot_point_wgs84'] = [shapely.geometry.Point(x, y) for (x, y) in eva.plot_coordinates_wgs84]
eva['plot_coordinates_3035'] = eva.loc[:, 'plot_coordinates_wgs84'].apply(do.lon_lat_2_east_north)
self.eva = gp.GeoDataFrame(eva, geometry='plot_coordinates_3035', crs={"init": "epsg:3035"})
del eva
......@@ -61,23 +74,26 @@ class Doren:
species.rename(columns=dict(zip(list(species), colnames_n)), inplace=True)
species['species_name_hdr'] = species.turboveg2_concept.apply(do.strip_leading_quote)
species['species_name_hdr'] = species.species_name_hdr.apply(do.simplify_species)
self.species_nrs = {v: i for i,v in enumerate(species.species_name_hdr)}
self.spec = species
self.update_status()
self.report += 'Starting @ {0} with {1} EVA headers containing {2} unique ' \
'species.\n\n'.format(self.timestamp_full, self.status['n_plots', self.status['n_species']])
def apply_requirements(self, *reqs, **kwargs):
"""
Filter down EVA headers by applying one or more requirements
:param reqs: one or more requirements are 'reqX'
:param requirements: one or more requirements are 'reqX'
:param kwargs: source to additional files, if needed
:return: updated self.eva
"""
'''Apply wich requirements?'''
reqs = [req for req in reqs]
requirements = [req for req in reqs]
# Additional data sources. TODO: double check if provided if corresponding requirement is requested
aoi_src = kwargs.get('aoi_src', None) # AOI as provided or else whole world
aoi_src = kwargs.get('aoi_src', None) # AOI as provided or None
dem_src = kwargs.get('dem_src', None) # DEM source
'''Description of requirements'''
......@@ -94,34 +110,33 @@ class Doren:
"Req 10: plot has species inventory"]))
'''Which rows do NOT meet requirements?'''
req1 = self.eva.loc[self.eva.latitude.isna(), :].index
req2 = self.eva.loc[self.eva.longitude.isna(), :].index
if "req3" in reqs:
req3 = self.eva.index.difference(gp.sjoin(left_df=self.eva, right_df=do.get_aoi(aoi_src), how='inner',
op='within', lsuffix='eva_', rsuffix='aoi_').index)
if 'req4' in reqs:
req4 = do.get_raster_vals(coords=self.eva.plot_coordinates_3035.loc[self.eva.index.difference(req1.union(req2))],
rast_src=dem_src).query('vals.isna() or vals > 500').index
req5 = self.eva.loc[self.eva.altitude_m.isna(), :].index
req6 = self.eva.loc[(self.eva['eunis'] == '?'), :].index
req7 = self.eva.loc[self.eva.eunis.isna(), :].index
req8 = self.eva.loc[self.eva.date_of_recording.isna(), :].index
req9 = self.eva.loc[self.eva['year'] < 1950, :].index
req10 = self.eva.index.difference(set(self.spec.plot_obs_id))
possible_requirements = {
'req1': self.eva.loc[self.eva.latitude.isna(), :].index,
'req2': self.eva.loc[self.eva.longitude.isna(), :].index,
'req3': self.eva.index.difference(gp.sjoin(left_df=self.eva, right_df=do.get_aoi(aoi_src), how='inner',
op='within', lsuffix='eva_', rsuffix='aoi_').index),
'req4': do.get_raster_vals(coords=self.eva.plot_coordinates_3035.loc[self.eva.longitude.notna() &
self.eva.longitude.notna()],
rast_src=dem_src).query('vals.isna() or vals > 500').index,
'req5': self.eva.loc[self.eva.altitude_m.isna(), :].index,
'req6': self.eva.loc[(self.eva['eunis'] == '?'), :].index,
'req7': self.eva.loc[self.eva.eunis.isna(), :].index,
"req8": self.eva.loc[self.eva.date_of_recording.isna(), :].index,
'req9': self.eva.loc[self.eva['year'] < 1950, :].index,
"req10": self.eva.index.difference(set(self.spec.plot_obs_id))}
'''Create union of rows that DO NOT meet requirements and drop rows'''
drop_rows = set().union(*[eval(x) for x in reqs])
drop_rows = set().union(*[possible_requirements[x] for x in requirements])
self.eva.drop(labels=list(drop_rows), inplace=True)
self.update_status()
'''Messages on requirements'''
msgs = [do.query_msg(x, y) for x, y in zip([req_descs[x] for x in reqs], [eval(x) for x in reqs])]
footer = 'Union between requirements {0} gives {1} rows that DO NOT meet requirements\n'.format(', '.join(reqs),
len(drop_rows))
msgs = [do.query_msg(x, y) for x, y in zip([req_descs[x] for x in requirements],
[possible_requirements[x] for x in requirements])]
self.report += 'Applying requirements as follows: \n'
self.report += ''.join(msgs)
self.report += footer
self.prune_species()
self.update_status()
self.report += 'Union between requirements {0} gives {1} rows that DO NOT meet requirements. ' \
'Remaining: {2}\n\n'.format(', '.join(requirements), len(drop_rows), self.status['n_plots'])
def add_posch(self, posch_src_dir):
'''
......@@ -167,12 +182,11 @@ class Doren:
self.eva['totN_kg/ha'] = self.eva.loc[:, 'totN_mg/m2'].divide(100)
self.eva['totN_mol/ha'] = self.eva.loc[:, 'totN_kg/ha'].divide(14)
# TODO: update self.log
self.prune_species()
self.update_status()
self.report += 'Added NDep from POSCH: {0} rows remaining.\n'.format(self.eva.shape[0])
def add_covar(self, covar_dir, covar_src, covar_name, nominal=False, keep_all=False, **kwargs):
def add_covar(self, covar_dir, covar_src, covar_name, nominal=False, keep_all=False):
'''
Sample values from a geospatial raster and add as new column
:param covar_dir: directory containing the raster
......@@ -183,15 +197,9 @@ class Doren:
:return: updated self.eva with column covar name
'''
query = kwargs.get("query", 'year > 1900') # retrieve query if provided, else default to generic query
try:
self.eva.query(query)
except KeyError:
print('{0} is not a valid query, try quering one of columns {1}'.format(query, list(self.eva)))
# Get raster values based on 3035 easting-northing
cov_df = do.get_raster_vals(coords=self.eva.query(query).plot_coordinates_3035,
cov_df = do.get_raster_vals(coords=self.eva.plot_coordinates_3035,
rast_src=os.path.join(covar_dir, covar_src), nominal=nominal)
# translate raster values to categories if nominal
......@@ -205,35 +213,49 @@ class Doren:
[covar_name + '_{0}'.format(x) for x in cov_df.columns.tolist()])), inplace=True)
# join to self.eva
self.eva.join(other=cov_df, how='left' if keep_all else 'inner')
self.eva = self.eva.join(other=cov_df, how='left' if keep_all else 'inner')
msg = 'Covariable {0} based on {1} yields {2} values\n'.format(covar_name, covar_src, cov_df.shape[0])
if self.verbose:
print(msg)
# TODO: update self.log
self.update_status()
self.report += 'Added Covariable {0} from {1} with {2} plots remaining.\n'.format(covar_name, covar_src,
self.status['n_plots'])
def add_yearly_covar(self, covar_dir, covar_src_basename, covar_name, nominal=False):
"""
Add a covariable column to self.eva that is differentiated per year
:param covar_dir:
:param covar_src_basename: basename of yearly rasters. Assumed: <covar_src_basename>_<year>.tif
:param covar_src_basename: basename of yearly rasters. Assumed: <covar_src_basename><year>.tif
:param covar_name:
:param nominal:
:return:
"""
# get all years present in the EVA header dataframe
years = set(self.eva.year)
years = set(self.eva.year.astype(np.uint16))
# new col
self.eva['covar_name'] = None
# loop through years and add covariable for each. Make sure to retain all rows
for year in years:
self.add_covar(covar_dir=covar_dir, covar_src='{0}_{1}.tif'.format(covar_src_basename, year),
covar_name=covar_name, nomimal=nominal, keep_all=True, query='year == year')
src = os.path.join(covar_dir, '{0}{1}.tif'.format(covar_src_basename, year))
sel_eva = self.eva.loc[self.eva['year'] == year, :].index
vals = do.get_raster_vals(coords=self.eva.loc[sel_eva, 'plot_point_wgs84'],
rast_src=src, nominal=nominal)
# Q&D solution: I know the temperature and precipitation rasters are in WGS84!
self.eva.loc[sel_eva, covar_name] = vals.vals
self.eva.dropna(axis=0, subset=[covar_name], how='any', inplace=True)
self.update_status()
self.prune_species()
self.report += 'Added Covariable {0} from {1} with {2} plots remaining.\n'.format(covar_name, covar_dir,
self.status['n_plots'])
def filter_by_buffer(self, species_name, buffer_size):
"""
......@@ -245,6 +267,9 @@ class Doren:
plots_w_species = set(self.spec.plot_obs_id[self.spec.species_name_hdr == species_name])
plots_wo_species = self.eva.index.difference(plots_w_species)
assert plots_w_species.issubset(self.eva.index)
assert plots_wo_species.issubset(self.eva.index)
"""create buffers around each selected veg plot"""
buffers = [shapely.geometry.Point(p.x, p.y).buffer(buffer_size) for p in self.eva.loc[plots_w_species].geometry]
buff = shapely.ops.unary_union(buffers) # Using shapely directly instead of GeoPandas, neat!
......@@ -253,30 +278,118 @@ class Doren:
nearby_plots = gp.sjoin(left_df=self.eva.loc[plots_wo_species], right_df=buff_gdf, op='within',
how='inner').index
assert set(nearby_plots) <= set(plots_wo_species), 'nearby plots are not a subset of nearby plots :-( '
if self.verbose:
print('Found {0} plots w/o {1} within {2}m radius of {3} '
'positive plots'.format(len(nearby_plots), species_name, buffer_size, len(plots_w_species)))
self.buffer_gdf = buff_gdf
self.positive_plots = plots_w_species
self.nearby_plots = nearby_plots
self.buffer_species = species_name
self.buffer_size = buffer_size
def report(self, what, format):
def report(self, what):
"""
Write any contents to file.
"""
# preliminaries
timestamp_brief = datetime.datetime.now().strftime("%Y%m%d")
timestamp_full = datetime.datetime.now().strftime("%Y%m%d%H%M")
out_dir = os.path.join(self.out_dir, 'Buffers_{0}'.format(timestamp_brief))
out_dir = os.path.join(self.base_out_dir, '{0}_{1}'.format(self.basename, self.timestamp))
if not os.path.isdir(out_dir):
os.mkdir(out_dir)
pass
if what == 'species_list':
'''write species list and frequencies to file'''
csv_out_dir = os.path.join(out_dir, 'csv')
if not os.path.isdir(csv_out_dir):
os.mkdir(csv_out_dir)
csv_out_name = '{0}_{1}_Species_Inventory.csv'.format(self.basename, self.timestamp)
sp_inv = pd.pivot_table(data=self.spec, index='species_name_hdr', values='plot_obs_id', aggfunc='count')
sp_inv['species_nr'] = sp_inv.index.maps(self.species_nrs).tolist()
sp_inv.rename({'plot_obs_id': 'count'}, inplace=True)
sp_inv.sort_values(by='plob_obs_id', ascending=False).to_csv(os.path.join(csv_out_dir, csv_out_name),
sep=';', index=True)
elif what == 'report':
'''Write processing report'''
report_name = '{0}_{1}_processing_report.txt'.format(self.basename, self.timestamp)
header = 'Processing report for {0} created {1}'.format(self.basename, self.timestamp)
footer = '\nMade with Python 3.5 using Pandas by Hans Roelofsen, WEnR team B&B'
source = 'See git for source script: https://git.wur.nl/roelo008/doren_2019'
with open(os.path.join(out_dir, report_name), 'w') as f:
f.write(header)
f.write(self.report)
f.write(source)
f.write(footer)
elif what == 'eva_headers':
csv_out_dir = os.path.join(out_dir, 'csv')
if not os.path.isdir(csv_out_dir):
os.mkdir(csv_out_dir)
headers_name = '{0}_{1}_EVA_headers.csv'.format(self.basename, self.timestamp)
self.eva.drop(labels=['plot_coordinates_wgs84', 'plot_coordinates_3035', "plot_point_wgs84"], axis=1). \
to_csv(os.path.join(out_dir, headers_name), sep=';')
# TODO: headers to Shapefile
elif what == 'headers_ndep':
'''Write headers with NDep specifically for Paul G'''
# TODO
elif what == 'buffer_png':
'''Draw map of Europe with positive plots, buffers and nearby plots'''
png_out_dir = os.path.join(out_dir, 'buffer_png')
if not os.path.isdir(png_out_dir):
os.mkdir(png_out_dir)
png_out_name = '{0}_{1}_{2}_{3}m.png'.format(self.basename, self.timestamp, self.buffer_species,
self.buffer_size)
# read additional shapefiles
background = gp.read_file(self.background_shp)
aoi = gp.read_file(self.aoi_shp)
# set the canvas
fig = plt.figure(figsize=(8, 10))
ax = fig.add_subplot(111)
ax.set(xlim=[2448000, 5427000], ylim=[1884000, 4888000])
ax.set_aspect('equal')
plt.tick_params(axis='both', labelbottom=False, labeltop=False, labelleft=False, labelright=False)
# plot layers
background.plot(ax=ax, facecolor='#729b6f', edgecolor="none") # background countries
aoi.plot(ax=ax, facecolor='#2d2c2c', alpha=0.3, edgecolor="none") # AOI
self.buffer_gdf.boundary.plot(ax=ax, edgecolor='#232323') # buffers
self.eva.loc[self.nearby_plots].plot(ax=ax, marker='o', color='#ff9e17', markersize=2, legend=True)
self.eva.loc[self.positive_plots].plot(ax=ax, marker='o', color='#be2f00', markersize=2)
background.boundary.plot(ax=ax, edgecolor='#365733')
# legend
legend_patches = [mpatches.Patch(label='Resident plots, n={:,}'.format(len(self.positive_plots)),
facecolor='#be2f00'),
mpatches.Patch(label='Nearby plots, n={:,}'.format(len(self.nearby_plots)),
facecolor='#ff9e17'),
mpatches.Patch(label='Buffers. Size={:,}m'.format(self.buffer_size), edgecolor='#232323',
facecolor='none')]
plt.legend(handles=legend_patches, loc='upper left', fontsize='small', frameon=True,
title=self.buffer_species)
# save to png
fig.savefig(os.path.join(png_out_dir, png_out_name))
plt.close()
else:
print('{0} is not a valid reporting request'.format(what))
def update_status(self):
"""
......@@ -284,18 +397,10 @@ class Doren:
:return: updates self.status
"""
self.status.update(n_plots=self.eva.shape[0], columns=self.eva.columns.tolist(), n_species=len(self.species))
self.spec = self.spec.loc[self.spec.plot_obs_id.isin(self.eva.index)] # remove from the species df
self.species = set(self.spec.species_name_hdr)
self.status.update(n_plots=self.eva.shape[0], columns=self.eva.columns.tolist(), n_species=len(self.species))
def prune_species(self):
"""
Prune the species inventory to the plots currently present in the eva headers
:return: reduced species dataframe
"""
species_plot_2_drop = set(self.spec.plot_obs_id).difference(self.eva.index) # plots removed from the header df
self.spec = self.spec.loc[self.spec.plot_obs_id.isin(species_plot_2_drop)] # remove from the species df
def foo(*reqs):
return [x for x in reqs]
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment