Commit 24010e2c authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

Reading XLS for requested species list

parent 3d1c3f13
......@@ -20,8 +20,6 @@ sp_req_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\z_scra
doren = dc.Doren(header_src=param_header_src, sp_src=param_sp_src)
doren.initiate(sample=False)
# doren.rename(old_name='Oxycoccus palustris', new_name='Vaccinium oxycoccos')
doren.apply_requirements('req1', 'req2', 'req3', 'req4', 'req8', 'req9', 'req10', aoi_src=param_aoi_src,
dem_src=param_dem_src)
doren.add_covar(covar_dir=cv_cntr_dir, covar_src=cv_cntr_src, covar_name='country', raster=False, column='SOVEREIGNT')
......@@ -29,7 +27,9 @@ doren.add_posch(posch_src_dir=param_posch)
doren.add_covar(covar_dir=cv_soil_dir, covar_src=cv_soil_src, covar_name='soil_type', nominal=True)
doren.add_yearly_covar(covar_dir=cv_precp_dir, covar_src_basename=cv_precp_src, covar_name='5_yearly_precip')
doren.add_yearly_covar(covar_dir=cv_temp_dir, covar_src_basename=cv_temp_src, covar_name='5_yearly_temp')
doren.get_requested_species(src=sp_req_src)
doren.get_requested_species(xls=r'c:\Users\roelo008\Wageningen University & Research\DOREN - General\2020-09-17 uniek soorten per habitat.xlsx',
sheet='PGO-1', col='newsoort', simplify_names=False)
doren.write_stuff('species_list')
doren.write_stuff('typische_soorten')
......@@ -39,3 +39,5 @@ doren.write_stuff('headers_shp')
with open(r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_{}.pkl'.
format(doren.timestamp), 'wb') as handle:
pickle.dump(doren, handle, protocol=pickle.HIGHEST_PROTOCOL)
name: doren
channels:
- conda-forge
- defaults
dependencies:
- affine=2.3.0=py_0
- attrs=20.2.0=pyh9f0ad1d_0
- blas=1.0=mkl
- boost-cpp=1.74.0=hd4e6614_0
- boto3=1.9.88=py_0
- botocore=1.12.253=py_0
- bzip2=1.0.8=he774522_3
- ca-certificates=2020.6.20=hecda079_0
- certifi=2018.8.24=py35_1001
- click=7.1.2=pyh9f0ad1d_0
- click-plugins=1.1.1=py_0
- cligj=0.5.0=py_0
- curl=7.71.1=h4b64cdc_6
- cycler=0.10.0=py_2
- docutils=0.14=py35_1
- expat=2.2.9=h33f27b4_2
- fiona=1.7.13=py35_0
- freetype=2.10.2=hd328e21_0
- freexl=1.0.5=hd288d7e_1002
- gdal=2.2.2=py35h202a4aa_1
- geopandas=0.6.3=py_0
- geos=3.6.2=h9ef7328_2
- hdf4=4.2.13=hf8e6fe8_1003
- hdf5=1.8.18=hcf527f2_1
- icc_rt=2019.0.0=h0cc432a_1
- icu=64.2=he025d50_1
- intel-openmp=2019.4=245
- jmespath=0.10.0=pyh9f0ad1d_0
- jpeg=9d=he774522_0
- kealib=1.4.7=vc14_3
- kiwisolver=1.0.1=py35he980bc4_2
- krb5=1.17.1=hc04afaa_3
- libcurl=7.71.1=h4b64cdc_6
- libgdal=2.2.2=h309aa3f_1
- libiconv=1.16=he774522_0
- libkml=1.3.0=he9e54da_1012
- libnetcdf=4.4.1.1=hf30bd8e_8
- libpng=1.6.37=ha81a0f5_2
- libpq=12.3=hd9aa61d_0
- libspatialindex=1.9.3=he025d50_3
- libspatialite=4.3.0a=h582a839_1023
- libssh2=1.9.0=hb06d900_5
- libtiff=4.1.0=h885aae3_6
- libxml2=2.9.10=h1006b36_2
- lz4-c=1.9.2=h62dcd97_2
- matplotlib=3.0.0=py35hd159220_0
- mkl=2018.0.3=1
- mkl_fft=1.0.9=py35_0
- mkl_random=1.0.1=py35_0
- munch=2.5.0=py_0
- numpy=1.15.2=py35ha559c80_0
- numpy-base=1.15.2=py35h8128ebf_0
- openjpeg=2.3.1=h57dd2e7_3
- openssl=1.1.1g=he774522_1
- pandas=0.23.4=py35h830ac7b_0
- pip=10.0.1=py35_0
- proj4=4.9.3=hfa6e2cd_9
- pyparsing=2.4.7=pyh9f0ad1d_0
- pyproj=1.9.5.1=py35haf3c679_5
- pyqt=5.9.2=py35h6538335_2
- python=3.5.6=he025d50_0
- python-dateutil=2.8.1=py_0
- pytz=2020.1=pyh9f0ad1d_0
- qt=5.9.7=h506e8af_3
- rasterio=0.36.0=py35h49e1f75_1
- rasterstats=0.14.0=py_0
- rtree=0.8.3=py35_0
- s3transfer=0.1.13=py35_1
- scipy=1.1.0=py35h4f6bf74_1
- setuptools=40.2.0=py35_0
- shapely=1.6.4=py35hc90234e_1
- simplejson=3.16.1=py35hfa6e2cd_0
- sip=4.19.8=py35h6538335_1000
- six=1.15.0=pyh9f0ad1d_0
- snuggs=1.4.7=py_0
- sqlite=3.33.0=he774522_0
- tbb=2020.1=he980bc4_0
- tbb4py=2019.1=py35he980bc4_1000
- tk=8.6.10=he774522_0
- tornado=5.1.1=py35hfa6e2cd_0
- urllib3=1.20=py35_1
- vc=14.1=h0510ff6_4
- vs2015_runtime=14.16.27012=hf0eaf9b_3
- wheel=0.35.1=py_0
- wincertstore=0.2=py35hfebbdb8_0
- xerces-c=3.2.3=ha925a31_1
- xlrd=1.2.0=pyh9f0ad1d_1
- xz=5.2.5=h62dcd97_1
- zlib=1.2.11=h62dcd97_1009
- zstd=1.4.5=h1f3a1b7_2
prefix: C:\Users\roelo008\Miniconda3\envs\doren
......@@ -117,36 +117,41 @@ class Doren:
self.report += 'Starting @ {0} with {1} EVA headers containing {2} unique ' \
'species.\n\n'.format(self.timestamp_full, self.status['n_plots'], self.status['n_species'])
def get_requested_species(self, src, **kwargs):
def get_requested_species(self, xls, sheet, col, simplify_names=False):
"""
read list of species requested for processing
:param src: file path to source document
:param kwargs:
:return:
:param xls: file path to Excel sheet
:param sheet: sheet name
:param col: column name
:return: self.req_sp: all requested species
self.req_found: subset of above, found
self.req_found: subset of above, not found
"""
# Read csv file with requested species ('typische soorten')
reqs = pd.read_csv(src)
reqs.dropna(subset=['wteneschappelijke_soortnaam'], inplace=True)
reqs = pd.read_excel(xls, sheet_name=sheet)
reqs.dropna(subset=[col], inplace=True)
# Simplify suggested species names
reqs.loc[:, 'species_name_hdr'] = reqs.wteneschappelijke_soortnaam.astype(str).apply(do.strip_leading_quote)
reqs.loc[:, 'species_name_hdr'] = reqs.species_name_hdr.astype(str).apply(do.simplify_species)
if simplify_names:
reqs.loc[:, 'species_name_hdr'] = reqs.loc[:, col].astype(str).apply(do.strip_leading_quote)
reqs.loc[:, 'species_name_hdr'] = reqs.species_name_hdr.astype(str).apply(do.simplify_species)
# Indicate if species names are matched in EVA database
reqs.loc[:, 'species_in_EVA'] = reqs.species_name_hdr.apply(lambda x: x in self.species)
reqs.loc[:, 'species_in_EVA'] = reqs.loc[:, col].apply(lambda x: x in self.species)
# NOTE: self.species may change as a result of adding covariables!
# Attach as df
self.request_df = reqs
# Update attributes
req_sp_set = set(self.request_df.species_name_hdr)
req_sp_set = set(self.request_df.loc[:, col])
self.req_sp = list(req_sp_set)
self.req_found = self.species.intersection(req_sp_set)
self.req_not_found = req_sp_set.difference(self.species)
# List alternatives
# List alternatives by looking for not-found-species in the 'turboveg2_concept' column instead of
# 'matched_concept'.
container = []
for sp in self.req_not_found:
alternatives = self.spec.loc[self.spec.turboveg2_concept == sp, 'matched_concept']
......@@ -424,7 +429,7 @@ class Doren:
self.positive_plots = set(self.spec.plot_obs_id[self.spec.species_name_hdr == species_name])
self.negative_plots = self.eva.index.difference(self.positive_plots)
def filter_by_buffer_around_positive_plots(self, buffer_size: object) -> object:
def filter_by_buffer_around_positive_plots(self, buffer_size):
"""
Filter plots by buffering X meter around self.positive plots
:returns self.buffer_gdf: geodataframe with polygon geometry of all buffers *buffer_size* around
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment