Commit f91d1b57 authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

nearest neighbor method

parent 3670c789
......@@ -19,7 +19,7 @@ cv_temp_src = "EObs_v200e_tg_5yrmean"
sp_req_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\z_scratch\soortenlijst.csv'
doren = dc.Doren(header_src=param_header_src, sp_src=param_sp_src)
doren.initiate(sample=True)
doren.initiate(sample=False)
doren.rename(old_name='Oxycoccus palustris', new_name='Vaccinium oxycoccos')
doren.apply_requirements('req1', 'req2', 'req3', 'req4', 'req8', 'req9', 'req10', aoi_src=param_aoi_src,
dem_src=param_dem_src)
......@@ -30,9 +30,18 @@ doren.add_yearly_covar(covar_dir=cv_precp_dir, covar_src_basename=cv_precp_src,
doren.add_yearly_covar(covar_dir=cv_temp_dir, covar_src_basename=cv_temp_src, covar_name='5_yearly_temp')
doren.get_requested_species(src=sp_req_src)
sp = 'Empetrum nigrum'
sp2 = 'Calluna vulgaris'
doren.select_plts_w_species(sp)
doren.nearest_positive()
doren.write_stuff('headers_shp')
doren.write_stuff('species_single_file')
doren.write_stuff('report')
doren.write_stuff('species_list')
doren.write_stuff('typische_soorten')
doren.write_stuff('report')
doren.write_stuff('headers_shp')
with open(r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_{}.pkl'.
......
......@@ -19,6 +19,7 @@ n_batches = int(args.n_batches)
# Recover the Pickled doren object
pkl_src = './pkl_src/doren_20200630.pkl'
pkl_src = './pkl_src/doren_20200630.pkl'
pkl_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_20200630.pkl'
with open(pkl_src, 'rb') as handle:
doren = pickle.load(handle)
......@@ -35,8 +36,30 @@ for i, j in enumerate(sel, start=1):
try:
print(' doing species {0} of {1}: {2}'.format(i, len(sel), sp))
doren.select_plts_w_species(species_name=sp)
doren.filter_by_buffer_around_positive_plots(buffer_size=100000)
doren.nearest_positive()
# doren.filter_by_buffer_around_positive_plots(buffer_size=100000)
doren.write_stuff('species_single_file')
doren.write_stuff('headers_shp')
except OSError:
continue
"""Scratch below"""
sp = 'Empetrum nigrum'
doren.select_plts_w_species(sp)
doren.nearest_positive()
gdA = doren.eva.loc[doren.negative_plots, :]
gdB = doren.eva.loc[doren.positive_plots, ['plot_id', 'plot_coordinates_3035']]
ckdnearest(gdbA, gdbB)
import geopandas as gpd
from shapely.geometry import Point
gpd1 = gpd.GeoDataFrame([['John', 1, Point(1, 1)], ['Smith', 1, Point(2, 2)],
['Soap', 1, Point(0, 2)]],
columns=['Name', 'ID', 'geometry'])
gpd2 = gpd.GeoDataFrame([['Work', Point(0, 1.1)], ['Shops', Point(2.5, 2)],
['Home', Point(1, 1.1)]],
columns=['Place', 'geometry'])
ckdnearest(gpd1, gpd2)
\ No newline at end of file
......@@ -13,6 +13,8 @@ import re
import shapely
import pandas as pd
from shapely.geometry import Polygon
from scipy.spatial import cKDTree
# set pyproj project library if needed
if not os.environ['PROJ_LIB']:
......@@ -327,17 +329,27 @@ def get_raster_vals(coords, rast_src, nominal=False):
return pd.DataFrame(data={'vals': rast_vals}, index=coords.index)
def nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None):
def ckdnearest(gdA, gdB):
"""
Find the nearest point and return the corresponding value from specified column.
Src: https://automating-gis-processes.github.io/CSC18/lessons/L4/nearest-neighbour.html
Nearest neighbour search using Scipy cKDTree.
Courtesy: https://gis.stackexchange.com/questions/222315/geopandas-find-nearest-point-in-other-dataframe
:param gdA: geodataframe containing origin points, ie negative plots
:param gdB: geodataframe containing destination points, ie positive plots
:return: gdA with distance to nearest point gdB
"""
# Find the geometry that is closest
nearest = df2[geom2_col] == shapely.ops.nearest_points(row[geom1_col], geom_union)[1]
# Get the corresponding value from df2 (matching is based on the geometry)
value = df2[nearest][src_column].get_values()[0]
return value
nA = np.array(list(gdA.geometry.apply(lambda x: (x.x, x.y))))
nB = np.array(list(gdB.geometry.apply(lambda x: (x.x, x.y))))
btree = cKDTree(nB)
dist, idx = btree.query(nA, k=1)
nearest_ids = pd.Series(data=gdB.iloc[idx, gdB.columns.tolist().index('plot_id')].values, index=gdA.index,
name='nearestID')
dist2nearest = pd.Series(np.divide(dist, 1000), index=gdA.index, name='dist2nearest')
return nearest_ids, dist2nearest
'''
pattern = re.compile(r' s\.l\.$| subsp\.? | var\.? | aggr\.? | ""| ssp\.? | s\.? | mod\.? | \(')
......
......@@ -457,20 +457,22 @@ class Doren:
def nearest_positive(self):
"""
Determine distance of each negative plots to nearest positive plot.
:return:
:return: column 'dist2nearest' in self.eva with distance to nearest positive plot (0 for positive plots)
column 'nearestID' in self.eva with ID of nearest positive plot
"""
origin = self.eva.loc[self.eva.index.difference(self.positive_plots)]
target = self.eva.loc[self.positive_plots]
unary_union = target.geometry.unary_union
nearest_n = origin.apply(do.nearest, geom_union=unary_union, df1=origin, df2=target,
geom1_col='plot_coordinates_3035', geom2_col='plot_coordinates_3035',
src_column='plot_id', axis=1)
# TODO: join nearest_n series to self.eva and put distance 0 for self.eva.loc[self.positive_plots]
ids, dist = do.ckdnearest(gdA=self.eva.loc[self.negative_plots, :] ,gdB=self.eva.loc[self.positive_plots])
self.eva.loc[self.negative_plots, 'dist2nearest'] = dist
self.eva.loc[self.positive_plots, 'dist2nearest'] = 0
self.eva.loc[self.negative_plots, 'nearest_id'] = ids
self.eva.loc[self.positive_plots, 'nearest_id'] = self.eva.loc[self.positive_plots, 'plot_id']
msg = 'For {0} negative plots, calculated distance to nearest plot with {1}. Shortest distance was {2}, ' \
'longest {3}\n'.format(len(self.negative_plots), len(self.positive_plots), dist.min(), dist.max())
self.report += msg
if self.verbose:
print(msg)
def write_stuff(self, what, covars=False):
"""
......@@ -609,16 +611,17 @@ class Doren:
return None
# Dedicated directory for the PG output and output filename
pg_out_name = '{0}_NDep_{1}'.format(self.sel_species.replace(' ', '_'), str(int(self.buffer_size / 1000)))
pg_out_name = '{0}_NDep_Nearest'.format(self.sel_species.replace(' ', '_'))
pg_dir = os.path.join(out_dir, 'pg_input')
if not os.path.isdir(pg_dir):
os.mkdir(pg_dir)
# Compile dataframe for writing to file
out_cols = ['plot_id', 'totN_mol_ha', 'soil_type_label', 'country_label', '5_yearly_precip', '5_yearly_temp']
out_df = self.eva.loc[self.positive_plots.union(self.nearby_plots), out_cols]
out_cols = ['plot_id', 'totN_mol_ha', 'soil_type_label', 'country_label', '5_yearly_precip',
'5_yearly_temp', 'dist2nearest', 'nearest_id']
out_df = self.eva.loc[:, out_cols]
out_df.loc[self.positive_plots, 'respons'] = np.uint8(1)
out_df.loc[self.nearby_plots, 'respons'] = np.uint8(0)
out_df.loc[self.negative_plots, 'respons'] = np.uint8(0)
# Write to file and update report
out_df.astype({'respons': 'uint8'}).to_csv(os.path.join(pg_dir, '{}.csv'.format(pg_out_name)), sep=',',
......@@ -708,6 +711,10 @@ class Doren:
self.positive_plots = None
self.negative_plots = None
self.sel_species = None
try:
self.eva.drop(labels=['dist2nearest', 'nearest_id'], axis=1, inplace=True)
except KeyError:
pass
def reset_buffering(self):
# Reset all attributes related to buffering, to prevent plotting data related to species other than assumed
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment