Commit e3288ccf authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

new eva header file, query nearest method

parent ed39744a
......@@ -5,23 +5,22 @@ from utils import doren_classes as dc
# os.environ['GDAL_DATA'] = r'C:\Users\roelo008\Miniconda3\envs\doren\Library\share\gdal'
param_header_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\EVA\EVA_Doren_header.csv'
param_sp_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\EVA\EVA_Doren_species.csv'
param_aoi_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\geodata\AOI\ne_50m_cntrs_AOI_diss_fin.shp'
param_dem_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\DEM\DTM_3035.tif'
param_header_src = r'W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20201118\EVA_Doren_header.csv'
param_sp_src = r'W:\PROJECTS\Doren19\a_brondata\EVA\delivery_201909\EVA_Doren_species.csv'
param_aoi_src = r'W:\PROJECTS\Doren19\a_brondata\AOI\ne_50m_cntrs_AOI_diss_fin.shp'
param_dem_src = r'W:\PROJECTS\Doren19\a_brondata\covariables\DEM\DTM_3035.tif'
param_posch = r'W:\PROJECTS\Doren19\a_brondata\POSCH_dep\20201012delivery'
cv_soil_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\soil\b_processed'
cv_soil_dir = r'w:\PROJECTS\Doren19\a_brondata\covariables\soil\b_processed'
cv_soil_src = 'WRBLEV1_laea.tif'
cv_cntr_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\geodata\countries'
cv_cntr_dir = r'w:\PROJECTS\Doren19\a_brondata\covariables\countries'
cv_cntr_src = 'ne_50m_cntrs_sel_buff_diss_2_3035.shp'
cv_precp_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled'
cv_precp_dir = r'w:\PROJECTS\Doren19\a_brondata\covariables\EObs\2_compiled'
cv_precp_src = "EObs_v200e_rr_5yrmean"
cv_temp_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\EObs\2_compiled'
cv_temp_dir = cv_precp_dir
cv_temp_src = "EObs_v200e_tg_5yrmean"
sp_req_src = r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\2020-09-17 uniek soorten per habitat.xlsx'
sp_req_sheet, col = 'PGO-1', 'newsoort'
sp_req_sheet, column = 'PGO-1', 'newsoort'
# sp_req_src = r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\2020-09-17 uniek soorten per habitat (2).xlsx'
eunis_plot_source = r'w:\PROJECTS\Doren19\a_brondata\covariables\EUNIS\levering_20201110\EVA_Doren_header.txt'
testing = False
......@@ -34,31 +33,18 @@ doren.add_covar(covar_dir=cv_soil_dir, covar_src=cv_soil_src, covar_name='soil_t
doren.add_yearly_covar(covar_dir=cv_precp_dir, covar_src_basename=cv_precp_src, covar_name='five_yearly_precip')
doren.add_yearly_covar(covar_dir=cv_temp_dir, covar_src_basename=cv_temp_src, covar_name='five_yearly_temp')
doren.add_eunis(eunis_plot_src=eunis_plot_source)
doren.add_eunis()
doren.add_posch(posch_src_dir=param_posch)
doren.get_requested_species(xls=sp_req_src, sheet=sp_req_sheet, col=col, simplify_names=False)
doren.get_requested_species(xls=sp_req_src, sheet=sp_req_sheet, col=column, simplify_names=False)
doren.test()
doren.write_stuff('species_list')
doren.write_stuff('typische_soorten')
doren.write_stuff('report')
doren.write_stuff('headers_shp')
with open(r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_{}.pkl'.
format(doren.timestamp), 'wb') as handle:
format(doren.timestamp), 'wb') as handle:
pickle.dump(doren, handle, protocol=pickle.HIGHEST_PROTOCOL)
'''
pre = dc.Doren(header_src=param_header_src, sp_src=param_sp_src)
pre.initiate(sample=False, species_col='turboveg2_concept')
pre.select_plts_w_species('Sparganium natans')
print(pre.eva.shape)
print(len(pre.positive_plots))
post = dc.Doren(header_src=param_header_src, sp_src=param_sp_src)
post.initiate(sample=False, species_col='matched_concept')
post.select_plts_w_species('Sparganium natans')
print(post.eva.shape)
print(len(post.positive_plots))
'''
......@@ -20,4 +20,4 @@ echo $SLURM_ARRAY_TASK_ID
# Run
cd /home/WUR/roelo008/projs/doren_2019
python run_species.py $SLURM_ARRAY_TASK_ID 50 doren_20201005.pkl
\ No newline at end of file
python run_species.py $SLURM_ARRAY_TASK_ID 50 doren_20201121.pkl
\ No newline at end of file
......@@ -16,24 +16,23 @@ batch_nr = int(args.batch_nr)
n_batches = int(args.n_batches)
# Recover the Pickled doren object
# pkl_src = os.path.join('./pkl_src/', args.pkl)
pkl_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_20201118.pkl'
pkl_src = os.path.join('./pkl_src/', args.pkl)
#pkl_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_20201118.pkl'
with open(pkl_src, 'rb') as handle:
doren = pickle.load(handle)
# doren.base_out_dir = r'./c_out'
doren.base_out_dir = r'./c_out'
# doren.req_sp = None
# sp_req_src = r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\2020-09-17 uniek soorten per habitat (2).xlsx'
# doren.get_requested_species(xls=sp_req_src, sheet='Verdringingssoorten', col='wetenschappelijke naam', simplify_names=False)
structuurtypes = list(set(doren.eva.structuurtype))
structuurtypes = [x for x in list(set(doren.eva.structuurtype)) if x != '?']
# Determine subset of requested species to proces
sel = np.array_split(np.arange(0, len(doren.req_sp)), n_batches)[batch_nr].tolist()
print('Commencing processing of {0} species...'.format(len(sel)))
# Process subset of requested species
for i, j in enumerate(sel, start=0):
sp = doren.req_sp[j]
......@@ -41,7 +40,7 @@ for i, j in enumerate(sel, start=0):
print('Doing species {0} of {1}: {2}'.format(i, len(sel), sp))
doren.select_plts_w_species(species_name=sp)
for structuurtype in structuurtypes:
doren.nearest_positive_of_type(col='structuurtype', val=structuurtype)
doren.nearest_positive_queryd(col='structuurtype', val=structuurtype)
doren.get_bedekking_selected_sp()
doren.write_stuff('species_single_file')
if batch_nr == n_batches and i == len(sel):
......
......@@ -26,7 +26,7 @@ def eva_colnames_orig():
"""
:return: list of columns names of the original EVA database
"""
return ['PlotID', 'TV2 relevé number', 'Country', 'Cover abundance scale', 'Date of recording',
return ['PlotObservationID', 'PlotID', 'TV2 relevé number', 'Country', 'Cover abundance scale', 'Date of recording',
'Relevé area (m²)', 'Altitude (m)', 'Aspect (°)', 'Slope (°)', 'Cover total (%)', 'Cover tree layer (%)',
'Cover shrub layer (%)', 'Cover herb layer (%)', 'Cover moss layer (%)', 'Cover lichen layer (%)',
'Cover algae layer (%)', 'Cover litter layer (%)', 'Cover open water (%)', 'Cover bare rock (%)',
......@@ -34,14 +34,15 @@ def eva_colnames_orig():
'Height lowest shrubs (m)', 'Aver. height (high) herbs (cm)', 'Aver. height lowest herbs (cm)',
'Maximum height herbs (cm)', 'Maximum height cryptogams (mm)', 'Mosses identified (y/n)',
'Lichens identified (y/n)', 'Remarks', 'Locality', 'Name association', 'Name alliance',
'Full species list (y/n/?)', 'EUNIS', 'Longitude', 'Latitude', 'Location uncertainty (m)', 'Dataset']
'Full species list (y/n/?)', 'EUNIS', 'Longitude', 'Latitude', 'Location uncertainty (m)', 'Dataset',
'EUNIS_Old', 'EUNIS_New']
def eva_colnames_new():
"""
:return: list of improved colnames for the EVA database
"""
return ['plot_id', 'tv2_releve_nr', 'country', 'cover_abundance_scale', 'date_of_recording',
return ['plot_obs_id', 'plot_id', 'tv2_releve_nr', 'country', 'cover_abundance_scale', 'date_of_recording',
'releve_area_m2', 'altitude_m', 'aspect_deg', 'slope_deg', 'cover_total_perc', 'cover_tree_layer_perc',
'cover_shrub_layer_perc', 'cover_herb_layer_perc', 'cover_moss_layer_perc', 'cover_lichen_layer_perc',
'cover_algae_layer_perc', 'cover_litter_layer_perc', 'cover_open_water_perc', 'cover_bare_rock_perc',
......@@ -49,7 +50,7 @@ def eva_colnames_new():
'avg_height_high_herbs_cm', 'avg_height_lowest_herbs_cm', 'maximum_height_herbs_cm',
'maximum_height_cryptogams_mm', 'mosses_identified_bool', 'lichens_identified_bool', 'remarks',
'locality', 'name_association', 'name_alliance', 'full_species_list_bool', 'eunis', 'longitude',
'latitude', 'location_uncertainty_m', 'dataset']
'latitude', 'location_uncertainty_m', 'dataset', 'eunis_old', 'eunis_new']
def get_aoi(*src):
......@@ -381,14 +382,14 @@ def get_raster_vals(coords, rast_src, nominal=False):
return pd.DataFrame(data={'vals': rast_vals}, index=coords.index)
def ckdnearest(gdA, gdB):
def ckdnearest(gdA, gdB, id_col):
"""
Nearest neighbour search using Scipy cKDTree.
Courtesy: https://gis.stackexchange.com/questions/222315/geopandas-find-nearest-point-in-other-dataframe
:param gdA: geodataframe containing origin points, ie negative plots
:param gdB: geodataframe containing destination points, ie positive plots
:return: gdA with distance to nearest point gdB
:return: series with gdA.index with nearest ID from gdB,
series with gdA.index with distance to nearest gdB
"""
nA = np.array(list(gdA.geometry.apply(lambda x: (x.x, x.y))))
......@@ -396,43 +397,9 @@ def ckdnearest(gdA, gdB):
btree = cKDTree(nB)
dist, idx = btree.query(nA, k=1)
nearest_ids = pd.Series(data=gdB.iloc[idx, gdB.columns.tolist().index('plot_id')].values, index=gdA.index,
nearest_ids = pd.Series(data=gdB.iloc[idx, gdB.columns.tolist().index(id_col)].values, index=gdA.index,
name='nearestID')
dist2nearest = pd.Series(np.divide(dist, 1000), index=gdA.index, name='dist2nearest')
return nearest_ids, dist2nearest
'''
pattern = re.compile(r' s\.l\.$| subsp\.? | var\.? | aggr\.? | ""| ssp\.? | s\.? | mod\.? | \(')
sp1 = 'Rosa spinosissima' # begint met s
sp2 = 'Rosa spinosissima s.l.'
sp3 = 'Rosa spinosissima subsp. HANS'
sp4 = 'Rosa spinosissima subsp HANS'
sp5 = 'Rosa spinosissima var. HANS'
sp6 = 'Rosa spinosissima var HANS'
sp7 = 'Rosa spinosissima aggr. HANS'
sp8 = 'Rosa spinosissima aggr HANS'
sp8 = 'Rosa spinosissima ssp. HANS'
sp9 = 'Rosa spinosissima ssp HANS'
sp10 = 'Rosa spinosissima s. HANS'
sp11 = 'Rosa spinosissima s HANS'
sp12 = 'Rosa spinosissima mod. HANS'
sp12 = 'Rosa spinosissima mod HANS'
sps = [sp1,sp2,sp3,sp4,sp5,sp6,sp7,sp8,sp9,sp10,sp11,sp12]
[re.split(pattern, x) for x in sps]
pattern = re.compile(r'[A-Z]{1}[a-z]{1,} [a-z]{1,}')
import shapely
import geopandas as gp
cities = cities.loc[cities.name.str.isin(['Vatican City', 'Amsterdam', 'Athens', 'Bern', 'Paris', 'Berlin', 'Rome'])]
cities = cities.loc[cities.name.isin(['Vatican City', 'Amsterdam', 'Athens', 'Bern', 'Paris', 'Berlin', 'Rome'])]
gdf2 = gp.GeoDataFrame(data={'name': ['NorthSea', 'Coast'], 'geometry':[shapely.geometry.Point(3, 53), shapely.geometry.Point(1.653, 50.819)]},
index=[1,2])
cities = pd.concat([cities, gdf2])
rast_vals = rast.point_query(cities.geometry, raster.read(1), interpolate='bilinear', affine=raster.affine, nodata=raster.nodata)
print('\n'.join([str(x) for x in rast_vals]))
out = pd.DataFrame(data={'vals': rast_vals}, index=cities.index)
'''
\ No newline at end of file
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment