Commit 572d3719 authored by roelo008's avatar roelo008
Browse files

Fixed lots of mistakes in the class

parent d7f3f8c1
......@@ -4,9 +4,9 @@ doren = dc.Doren(header_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projec
sp_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\EVA\EVA_Doren_species.csv',
verbose=True)
doren.initiate(sample=True)
doren.initiate(sample=False)
doren.apply_requirements('req1', 'req2', 'req3', 'req4', 'req8', 'req9', 'req10',
doren.apply_requirements('req1', 'req2', 'req3', 'req8', 'req9', 'req10',
aoi_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\geodata\eva_aoi_fin_3035.shp',
dem_src=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\DEM\DTM_3035.tif')
......@@ -24,12 +24,12 @@ doren.add_yearly_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_p
doren.add_yearly_covar(covar_dir=r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled',
covar_src_basename="EObs_v200e_tg_5yrmean", covar_name='5_yearly_temp')
doren.report('report')
doren.write_stuff('report')
doren.filter_by_buffer(species_name='Calluna vulgaris', buffer_size=50000)
doren.report('buffer_png')
doren.write_stuff('buffer_png')
doren.filter_by_buffer(species_name='Rumex acetosa', buffer_size=25000)
doren.report('buffer_png')
doren.write_stuff('buffer_png')
......@@ -71,9 +71,13 @@ def get_aoi(*src):
aoi_diss = aoi_raw.dissolve(by='featurecla')
return gp.GeoDataFrame(crs={"init": "epsg:3035"}, data={'aoi': 1}, index=[1], geometry=aoi_diss.geometry.values)
else:
return gp.GeoDataFrame(crs={"init": "epsg:4326"}, data={'aoi': 1}, index=[1],
return gp.GeoDataFrame(crs={"init": "epsg:3035"}, data={'aoi': 1}, index=[1],
geometry=[Polygon([(-180, 90), (180, 90), (180, -90), (-180, -90)])])
# TODO: convert AOI and world AOI to EPSG3035
'''
Extent in 3035
-3291518.0991032142192125,-6960799.0365917012095451 : 10028135.1418357975780964,6405773.0257125394418836
'''
def first_letter(x):
"""
......@@ -82,10 +86,9 @@ def first_letter(x):
:return: first letter of x
"""
try:
out = x[0]
return x[0]
except TypeError:
out = np.nan
return out
return np.nan
def int_year(x):
......@@ -101,20 +104,6 @@ def int_year(x):
return out
def apply_dict(x, dict):
"""
wrapper function around applying a dictionary
:param x: key
:param dict: dictionary
:return: dict[x] or 'None' if Key Error
"""
try:
out = dict[x]
except KeyError:
out = None
return out
def eunis_to_veg(eunis_in):
# dictionary mapping EUNIS type to veg. type, see E-mail Wieger W 18-09-2019
eunis_dict = {"A25a": "gras", "A25b": "gras", "A25c": "gras", "A25d": "gras", "B11a": "gras", "B11b": "gras",
......@@ -277,38 +266,6 @@ def generate_square_shapes(sq_ids, size):
return gp.GeoDataFrame(crs={"init": 'epsg:3035'}, geometry=shapes, data={'ID': list(u_sq_ids), 'size': str(size)})
def query_eva_for_species(eva_species_db, eva_header_db, species_name):
"""
Query the eva_db for plots containing a species name
:param eva_species_db: EVA plot-species database
:param: eva_header_db: EVA plot-header database
:param species_name: plant species name
:return: eva plot headers of plots containing species_name
"""
'''NO LONGER NEEDED, REPLACED WITH ONELINER 20200409'''
# query for species name
sel_plot_ids = eva_species_db.loc[eva_species_db['species_name_hdr'] == species_name, 'plot_obs_id']
if not sel_plot_ids.empty:
u_plot_ids = set(sel_plot_ids) # unique plots
print('Found {0} plots with species {1}'.format(len(u_plot_ids), species_name))
return eva_header_db.loc[eva_header_db['plot_obs_id'].isin(list(u_plot_ids)), :] # selected header data
else:
raise Exception('Species {0} not found even once in {1}'.format(species_name, eva_species_db))
def query_msg(desc, indx):
"""
Generate reporting mesage for a selection requirement
:param desc: description of the selection requirement (str)
:param indx: pandas df index of rows not compyling to requirement
:return: message
"""
return '{0} - {1}'.format(desc, '{0} rows failed\n'.format(len(indx)))
def get_raster_nominals(cov_name):
"""
Get the nominal categories associated with a covariable raster
......@@ -321,7 +278,7 @@ def get_raster_nominals(cov_name):
elif os.path.basename(os.getcwd()) == 'a_prep':
src = r'../utils/legend_mappings.json'
else:
src = r'c:/apps/proj_code/doren_2019/utils'
src = r'c:/apps/proj_code/doren_2019/utils' # fuck it
with open(src, 'r') as j:
all_mappings = json.load(j)
......@@ -330,8 +287,7 @@ def get_raster_nominals(cov_name):
mapping = all_mappings[cov_name]
out = {}
for k, v in mapping.items():
out[int(k)] = v
out['weg'] = np.nan # to accomodate categories that should not be present in the results
out[int(k)] = np.nan if v == 'weg' else v
return out
except KeyError:
print('No legend mapping available for {0}'.format(cov_name))
......@@ -351,7 +307,6 @@ def get_raster_vals(coords, rast_src, nominal=False):
if not os.path.isfile(rast_src):
raise Exception('{0} does not exists as valid rastersource'.format(rast_src))
interpolation = 'nearest' if nominal else 'bilinear'
if isinstance(coords.iloc[0], str):
......
......@@ -39,7 +39,7 @@ class Doren:
self.basename = 'DOREN' # basename for all output
self.verbose = verbose # boolean, report on progress?
self.report = '' # tracking everything in a report
self.base_out_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\scratch'
self.base_out_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\z_scratch'
self.background_shp = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\geodata\europe_3035.shp'
self.timestamp = datetime.datetime.now().strftime("%Y%m%d")
self.timestamp_full = datetime.datetime.now().strftime("%Y%m%d%H%M")
......@@ -79,7 +79,7 @@ class Doren:
self.update_status()
self.report += 'Starting @ {0} with {1} EVA headers containing {2} unique ' \
'species.\n\n'.format(self.timestamp_full, self.status['n_plots', self.status['n_species']])
'species.\n\n'.format(self.timestamp_full, self.status['n_plots'], self.status['n_species'])
def apply_requirements(self, *reqs, **kwargs):
"""
......@@ -110,20 +110,30 @@ class Doren:
"Req 10: plot has species inventory"]))
'''Which rows do NOT meet requirements?'''
possible_requirements = {
'req1': self.eva.loc[self.eva.latitude.isna(), :].index,
'req2': self.eva.loc[self.eva.longitude.isna(), :].index,
'req3': self.eva.index.difference(gp.sjoin(left_df=self.eva, right_df=do.get_aoi(aoi_src), how='inner',
op='within', lsuffix='eva_', rsuffix='aoi_').index),
'req4': do.get_raster_vals(coords=self.eva.plot_coordinates_3035.loc[self.eva.longitude.notna() &
self.eva.longitude.notna()],
rast_src=dem_src).query('vals.isna() or vals > 500').index,
'req5': self.eva.loc[self.eva.altitude_m.isna(), :].index,
'req6': self.eva.loc[(self.eva['eunis'] == '?'), :].index,
'req7': self.eva.loc[self.eva.eunis.isna(), :].index,
"req8": self.eva.loc[self.eva.date_of_recording.isna(), :].index,
'req9': self.eva.loc[self.eva['year'] < 1950, :].index,
"req10": self.eva.index.difference(set(self.spec.plot_obs_id))}
possible_requirements = {}
if 'req1' in requirements:
possible_requirements['req1'] = self.eva.loc[self.eva.latitude.isna(), :].index
if 'req2' in requirements:
possible_requirements['req2'] = self.eva.loc[self.eva.longitude.isna(), :].index
if 'req3' in requirements:
possible_requirements['req3'] = self.eva.index.difference(gp.sjoin(left_df=self.eva, right_df=do.get_aoi(aoi_src), how='inner',
op='within', lsuffix='eva_', rsuffix='aoi_').index)
if 'req4' in requirements:
possible_requirements['req4'] = do.get_raster_vals(coords=self.eva.plot_coordinates_3035.loc[self.eva.longitude.notna() &
self.eva.latitude.notna()],
rast_src=dem_src).query('vals.isna() or vals > 500').index
if 'req5' in requirements:
possible_requirements['req5'] = self.eva.loc[self.eva.altitude_m.isna(), :].index,
if 'req6' in requirements:
possible_requirements['req6'] = self.eva.loc[(self.eva['eunis'] == '?'), :].index
if 'req7' in requirements:
possible_requirements['req7'] = self.eva.loc[self.eva.eunis.isna(), :].index
if 'req8' in requirements:
possible_requirements["req8"] = self.eva.loc[self.eva.date_of_recording.isna(), :].index
if 'req9' in requirements:
possible_requirements["req9"] = self.eva.loc[self.eva['year'] < 1950, :].index
if 'req10' in requirements:
possible_requirements["req10"] = self.eva.index.difference(set(self.spec.plot_obs_id))
'''Create union of rows that DO NOT meet requirements and drop rows'''
drop_rows = set().union(*[possible_requirements[x] for x in requirements])
......@@ -131,8 +141,8 @@ class Doren:
self.update_status()
'''Messages on requirements'''
msgs = [do.query_msg(x, y) for x, y in zip([req_descs[x] for x in requirements],
[possible_requirements[x] for x in requirements])]
msgs = ['{0} - {1}'.format(x, '{0} rows failed\n'.format(len(y)))
for x, y in zip([req_descs[x] for x in requirements], [possible_requirements[x] for x in requirements])]
self.report += 'Applying requirements as follows: \n'
self.report += ''.join(msgs)
self.report += 'Union between requirements {0} gives {1} rows that DO NOT meet requirements. ' \
......@@ -185,7 +195,6 @@ class Doren:
self.update_status()
self.report += 'Added NDep from POSCH: {0} rows remaining.\n'.format(self.eva.shape[0])
def add_covar(self, covar_dir, covar_src, covar_name, nominal=False, keep_all=False):
'''
Sample values from a geospatial raster and add as new column
......@@ -197,7 +206,6 @@ class Doren:
:return: updated self.eva with column covar name
'''
# Get raster values based on 3035 easting-northing
cov_df = do.get_raster_vals(coords=self.eva.plot_coordinates_3035,
rast_src=os.path.join(covar_dir, covar_src), nominal=nominal)
......@@ -205,7 +213,7 @@ class Doren:
# translate raster values to categories if nominal
if nominal:
nominal_dict = do.get_raster_nominals(covar_name)
cov_df['label'] = cov_df.vals.apply(do.apply_dict, dict=nominal_dict)
cov_df['label'] = cov_df.vals.map(nominal_dict)
# drop NA, rename columns to reflect covariable name
cov_df.dropna(axis=0, how='any', inplace=True)
......@@ -215,15 +223,12 @@ class Doren:
# join to self.eva
self.eva = self.eva.join(other=cov_df, how='left' if keep_all else 'inner')
msg = 'Covariable {0} based on {1} yields {2} values\n'.format(covar_name, covar_src, cov_df.shape[0])
self.update_status()
msg = 'Added Covariable {0} from {1} with {2} plots remaining.\n'.format(covar_name, covar_src,
self.status['n_plots'])
if self.verbose:
print(msg)
self.update_status()
self.report += 'Added Covariable {0} from {1} with {2} plots remaining.\n'.format(covar_name, covar_src,
self.status['n_plots'])
self.report += msg
def add_yearly_covar(self, covar_dir, covar_src_basename, covar_name, nominal=False):
"""
......@@ -234,6 +239,7 @@ class Doren:
:param nominal:
:return:
"""
# get all years present in the EVA header dataframe
years = set(self.eva.year.astype(np.uint16))
......@@ -247,8 +253,6 @@ class Doren:
vals = do.get_raster_vals(coords=self.eva.loc[sel_eva, 'plot_point_wgs84'],
rast_src=src, nominal=nominal)
# Q&D solution: I know the temperature and precipitation rasters are in WGS84!
self.eva.loc[sel_eva, covar_name] = vals.vals
self.eva.dropna(axis=0, subset=[covar_name], how='any', inplace=True)
......@@ -267,9 +271,6 @@ class Doren:
plots_w_species = set(self.spec.plot_obs_id[self.spec.species_name_hdr == species_name])
plots_wo_species = self.eva.index.difference(plots_w_species)
assert plots_w_species.issubset(self.eva.index)
assert plots_wo_species.issubset(self.eva.index)
"""create buffers around each selected veg plot"""
buffers = [shapely.geometry.Point(p.x, p.y).buffer(buffer_size) for p in self.eva.loc[plots_w_species].geometry]
buff = shapely.ops.unary_union(buffers) # Using shapely directly instead of GeoPandas, neat!
......@@ -279,8 +280,6 @@ class Doren:
nearby_plots = gp.sjoin(left_df=self.eva.loc[plots_wo_species], right_df=buff_gdf, op='within',
how='inner').index
assert set(nearby_plots) <= set(plots_wo_species), 'nearby plots are not a subset of nearby plots :-( '
if self.verbose:
print('Found {0} plots w/o {1} within {2}m radius of {3} '
'positive plots'.format(len(nearby_plots), species_name, buffer_size, len(plots_w_species)))
......@@ -291,7 +290,7 @@ class Doren:
self.buffer_species = species_name
self.buffer_size = buffer_size
def report(self, what):
def write_stuff(self, what):
"""
Write any contents to file.
"""
......@@ -345,6 +344,16 @@ class Doren:
'''Write headers with NDep specifically for Paul G'''
# TODO
elif what == 'headers_shp':
shp_out_dir = os.path.join(out_dir, 'shp')
if not os.path.isdir(shp_out_dir):
os.mkdir(shp_out_dir)
shp_out_name = '{0}_{1}_EVA_headers.shp'.format(self.basename, self.timestamp)
self.eva.drop(labels=['plot_coordinates_wgs84', 'plot_point_wgs84'], axis=1) \
.to_file(os.path.join(shp_out_dir, shp_out_name))
elif what == 'buffer_png':
'''Draw map of Europe with positive plots, buffers and nearby plots'''
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment