Commit 3a6f90ac authored by roelo008's avatar roelo008
Browse files

update to requested species attribute

parent 24cc4c5f
......@@ -91,9 +91,9 @@ def int_year(x):
return out
def eunis_to_veg(eunis_in):
def eunis_to_veg_dict():
# dictionary mapping EUNIS type to veg. type, see E-mail Wieger W 18-09-2019
eunis_dict = {"A25a": "gras", "A25b": "gras", "A25c": "gras", "A25d": "gras", "B11a": "gras", "B11b": "gras",
return {"A25a": "gras", "A25b": "gras", "A25c": "gras", "A25d": "gras", "B11a": "gras", "B11b": "gras",
"B13a": "gras", "B13b": "gras", "B14a": "gras", "B14b": "gras", "B14b!,IE16": "gras", "B14c": "gras",
"B14c!,E11a": "gras", "B14c!,E12b": "gras", "B15a": "heide", "B15b": "heide", "B15b,B16a": "heide",
"B16a": "heide", "B21a": "gras", "B31a": "gras", "B31b": "gras", "B34a": "gras", "C11a": "gras",
......@@ -152,12 +152,11 @@ def eunis_to_veg(eunis_in):
"F42,F61a,F61b": "heide", "F42,F61b": "heide", "F42,F74a": "heide", "F42,G3A": "heide",
"F51": "struweel", "F51,F53": "struweel", "F51,F53,F61a": "struweel", "F51,F54": "struweel",
"F51,F55": "struweel", "F51,F61a": "struweel", "F51,F61b": "struweel", "F51,F62": "struweel",
"F51,F74a": "struweel", "F51,F93": "struweel", "F51,G22": "struweel", "F51,G24": "struweel", "F51,G37": "struweel", "F53": "struweel", "F53,F61a": "struweel", "F53,G24": "struweel", "F54": "struweel", "F54,G13": "struweel", "F54,G17a": "struweel", "F54,G1C": "struweel", "F54,G21": "struweel", "F55": "struweel", "F55,G24": "struweel", "F61a": "struweel", "F61a,F61b": "struweel", "F61a,F67": "struweel", "F61a,F74a": "struweel", "F61a,G24": "struweel", "F61b": "struweel", "F62": "struweel", "F62,F73": "struweel", "F62,G24": "struweel", "F62,G37": "struweel", "F67": "struweel", "F67,F68a": "struweel", "F68a": "struweel", "F68b": "struweel", "F68b,F73,F94": "struweel", "F68c": "struweel", "F71": "struweel", "F73": "struweel", "F73,F74c": "struweel", "F73,F94": "struweel", "F73,G24": "struweel", "F73,G37": "struweel", "F74a": "struweel", "F74b": "struweel", "F74b,F74c": "struweel", "F74c": "struweel", "F81": "struweel", "F91": "struweel", "F92": "struweel", "F92,G13": "struweel", "F93": "struweel", "F93,G13": "struweel", "F94": "struweel", "Fa": "struweel", "Fb": "struweel", "G": "bos", "G11": "bos", "G11,G12b": "bos", "G11,G14": "bos", "G12a": "bos", "G12a,G12b": "bos", "G12a,G13": "bos", "G12a,G22": "bos", "G12a,G3Db": "bos", "G12b": "bos", "G12b,G17a": "bos", "G12b,G18": "bos", "G12b,G26": "bos", "G13": "bos", "G13,G17a": "bos", "G13,G1Ba": "bos", "G13,G1C": "bos", "G13,G22": "bos", "G13,G25a": "bos", "G14": "bos", "G14,G15": "bos", "G14,G16b": "bos", "G14,G18": "bos", "G14,G19a": "bos", "G14,G1Ab": "bos", "G14,G1C": "bos", "G14,G3B": "bos", "G14,G3Da": "bos", "G14,G3Db": "bos", "G15": "bos", "G15,G19a": "bos", "G15,G19b": "bos", "G15,G3A": "bos", "G15,G3Da": "bos", "G15,G3Db": "bos", "G16a": "bos", "G16a,G17a": "bos", "G16a,G1Aa": "bos", "G16a,G1Ab": "bos", "G16b": "bos", "G17a": "bos", "G17a,G18": "bos", "G17a,G1Aa": "bos", "G17a,G22": "bos", "G17a,G24": "bos", "G17a,G37": "bos", "G17b": "bos", "G18": "bos", "G19a": "bos", "G19a!,G3A": "bos", "G19a,G3A": "bos", "G19a-,G3Db": "bos", "G19b": "bos", "G1Aa": "bos", "G1Aa,G1Ab": "bos", "G1Aa,G1C": "bos", "G1Aa,G31b": "bos", "G1Aa,G34a": "bos", "G1Aa,G37": "bos", "G1Aa,G3A": "bos", "G1Aa,G3B": "bos", "G1Aa,G3F2": "bos", "G1Ab": "bos", "G1Ab,G22": "bos", "G1Ba": "bos", "G1C": "bos", "G1C,G22": "bos", "G1C,I1": "bos", "G21": "bos", "G21!,G24": "bos", "G21!,G37": "bos", "G21,G22": "bos", "G21,G24": "bos", "G22": "bos", "G22,G37": "bos", "G24": "bos", "G24,G25a": "bos", "G24,G39b": "bos", "G25a": "bos", "G26": "bos", "G28": "bos", "G31a": "bos", "G31a,G3F2": "bos", "G31b": "bos", "G31c": "bos", "G32": "bos", "G32,G3Da": "bos", "G32,G3F2": "bos", "G34a": "bos", "G34b": "bos", "G34b,G34c": "bos", "G34b,G3B": "bos", "G34b,G3F2": "bos", "G34c": "bos", "G34c,G37": "bos", "G34c,G3F2": "bos", "G36": "bos", "G37": "bos", "G37,G39b": "bos", "G39a": "bos", "G39b": "bos", "G3A": "bos", "G3A,G3C": "bos", "G3B": "bos", "G3C": "bos", "G3Da": "bos", "G3Da,G3Db": "bos", "G3Da,G3F2": "bos", "G3Db": "bos", "G3F2": "bos", "H": "gras", "H21": "gras", "H22": "gras", "H23": "gras", "H24": "gras", "H25": "gras", "H26a": "gras", "H26b": "gras", "H26c": "gras", "H31a": "gras", "H31b": "gras", "H31c": "gras", "H31d": "gras", "H32a": "gras", "H32b": "gras", "H32c": "gras", "H32d": "gras", "H32f": "gras", "H34": "gras", "H51b": "gras", "I1": "gras", "I1,IE16": "gras", "I14": "gras", "IE16": "gras", "IE1E": "gras", "IE28": "gras", "IE51": "gras"}
try:
out = eunis_dict[eunis_in]
except KeyError:
out = 'other'
return out
"F51,F74a": "struweel", "F51,F93": "struweel", "F51,G22": "struweel", "F51,G24": "struweel",
"F51,G37": "struweel", "F53": "struweel", "F53,F61a": "struweel", "F53,G24": "struweel", "F54": "struweel",
"F54,G13": "struweel", "F54,G17a": "struweel", "F54,G1C": "struweel", "F54,G21": "struweel",
"F55": "struweel", "F55,G24": "struweel", "F61a": "struweel", "F61a,F61b": "struweel",
"F61a,F67": "struweel", "F61a,F74a": "struweel", "F61a,G24": "struweel", "F61b": "struweel", "F62": "struweel", "F62,F73": "struweel", "F62,G24": "struweel", "F62,G37": "struweel", "F67": "struweel", "F67,F68a": "struweel", "F68a": "struweel", "F68b": "struweel", "F68b,F73,F94": "struweel", "F68c": "struweel", "F71": "struweel", "F73": "struweel", "F73,F74c": "struweel", "F73,F94": "struweel", "F73,G24": "struweel", "F73,G37": "struweel", "F74a": "struweel", "F74b": "struweel", "F74b,F74c": "struweel", "F74c": "struweel", "F81": "struweel", "F91": "struweel", "F92": "struweel", "F92,G13": "struweel", "F93": "struweel", "F93,G13": "struweel", "F94": "struweel", "Fa": "struweel", "Fb": "struweel", "G": "bos", "G11": "bos", "G11,G12b": "bos", "G11,G14": "bos", "G12a": "bos", "G12a,G12b": "bos", "G12a,G13": "bos", "G12a,G22": "bos", "G12a,G3Db": "bos", "G12b": "bos", "G12b,G17a": "bos", "G12b,G18": "bos", "G12b,G26": "bos", "G13": "bos", "G13,G17a": "bos", "G13,G1Ba": "bos", "G13,G1C": "bos", "G13,G22": "bos", "G13,G25a": "bos", "G14": "bos", "G14,G15": "bos", "G14,G16b": "bos", "G14,G18": "bos", "G14,G19a": "bos", "G14,G1Ab": "bos", "G14,G1C": "bos", "G14,G3B": "bos", "G14,G3Da": "bos", "G14,G3Db": "bos", "G15": "bos", "G15,G19a": "bos", "G15,G19b": "bos", "G15,G3A": "bos", "G15,G3Da": "bos", "G15,G3Db": "bos", "G16a": "bos", "G16a,G17a": "bos", "G16a,G1Aa": "bos", "G16a,G1Ab": "bos", "G16b": "bos", "G17a": "bos", "G17a,G18": "bos", "G17a,G1Aa": "bos", "G17a,G22": "bos", "G17a,G24": "bos", "G17a,G37": "bos", "G17b": "bos", "G18": "bos", "G19a": "bos", "G19a!,G3A": "bos", "G19a,G3A": "bos", "G19a-,G3Db": "bos", "G19b": "bos", "G1Aa": "bos", "G1Aa,G1Ab": "bos", "G1Aa,G1C": "bos", "G1Aa,G31b": "bos", "G1Aa,G34a": "bos", "G1Aa,G37": "bos", "G1Aa,G3A": "bos", "G1Aa,G3B": "bos", "G1Aa,G3F2": "bos", "G1Ab": "bos", "G1Ab,G22": "bos", "G1Ba": "bos", "G1C": "bos", "G1C,G22": "bos", "G1C,I1": "bos", "G21": "bos", "G21!,G24": "bos", "G21!,G37": "bos", "G21,G22": "bos", "G21,G24": "bos", "G22": "bos", "G22,G37": "bos", "G24": "bos", "G24,G25a": "bos", "G24,G39b": "bos", "G25a": "bos", "G26": "bos", "G28": "bos", "G31a": "bos", "G31a,G3F2": "bos", "G31b": "bos", "G31c": "bos", "G32": "bos", "G32,G3Da": "bos", "G32,G3F2": "bos", "G34a": "bos", "G34b": "bos", "G34b,G34c": "bos", "G34b,G3B": "bos", "G34b,G3F2": "bos", "G34c": "bos", "G34c,G37": "bos", "G34c,G3F2": "bos", "G36": "bos", "G37": "bos", "G37,G39b": "bos", "G39a": "bos", "G39b": "bos", "G3A": "bos", "G3A,G3C": "bos", "G3B": "bos", "G3C": "bos", "G3Da": "bos", "G3Da,G3Db": "bos", "G3Da,G3F2": "bos", "G3Db": "bos", "G3F2": "bos", "H": "gras", "H21": "gras", "H22": "gras", "H23": "gras", "H24": "gras", "H25": "gras", "H26a": "gras", "H26b": "gras", "H26c": "gras", "H31a": "gras", "H31b": "gras", "H31c": "gras", "H31d": "gras", "H32a": "gras", "H32b": "gras", "H32c": "gras", "H32d": "gras", "H32f": "gras", "H34": "gras", "H51b": "gras", "I1": "gras", "I1,IE16": "gras", "I14": "gras", "IE16": "gras", "IE1E": "gras", "IE28": "gras", "IE51": "gras"}
def simplify_species(species_name):
......@@ -167,12 +166,12 @@ def simplify_species(species_name):
:param species_name:
:return: species_name minus subsp or other flags
"""
pattern = re.compile(' subsp?. | var?. | aggr?. | ""| mod?. | \(')
pattern = re.compile(r's.l.$| subsp.? | var.? | aggr.? | ""| ssp.? ?| s. ?| mod.? ?| \(')
if re.search(pattern, species_name):
out = re.split(pattern, species_name)[0]
else:
out = species_name
return out
return out.strip()
def strip_leading_quote(species_name):
......
......@@ -61,7 +61,7 @@ class Doren:
def initiate(self, sample=False):
"""
Read source data, sanitize column names and fix some datatypes
:return: self.eva and self species populated with pandas dataframes
:return: self.eva and self.species populated with (geo)dataframes
"""
# Read EVA species data
......@@ -82,6 +82,7 @@ class Doren:
if sample:
# Always use first 4 rows (for header and comments) plus 500 random rows
userows = [0, 1, 2, 3] + sorted(random.sample(range(4, 1357330), 500))
eva = pd.read_csv(self.header_src, comment='#', sep='\t', low_memory=False, index_col='PlotObservationID',
skiprows=lambda x: x not in userows if sample else None, memory_map=True,
usecols=['PlotObservationID', 'PlotID', 'TV2 relevé number', 'Country','Longitude', 'Latitude',
......@@ -90,12 +91,13 @@ class Doren:
eva.rename(columns=dict(zip(do.eva_colnames_orig(), do.eva_colnames_new())), inplace=True)
eva.longitude = pd.to_numeric(eva.longitude, errors='coerce', downcast='float')
eva['eunis_top'] = eva.apply(lambda row: do.first_letter(row.eunis), axis=1)
eva['veg_type'] = eva.apply(lambda row: do.eunis_to_veg(row.eunis), axis=1)
eva['eunis_top'] = eva.eunis.apply(do.first_letter)
eva['veg_type'] = eva.eunis.map(do.eunis_to_veg_dict())
eva.veg_type.fillna('other', inplace=True)
eva['year'] = eva.date_of_recording.fillna(0).astype(np.uint16)
eva['plot_coordinates_wgs84'] = list(zip(eva.longitude, eva.latitude))
eva['plot_point_wgs84'] = [shapely.geometry.Point(x, y) for (x, y) in eva.plot_coordinates_wgs84]
eva['plot_coordinates_3035'] = eva.loc[:, 'plot_coordinates_wgs84'].apply(do.lon_lat_2_east_north)
eva['plot_coordinates_3035'] = eva.plot_coordinates_wgs84.apply(do.lon_lat_2_east_north)
self.eva = gp.GeoDataFrame(eva, geometry='plot_coordinates_3035', crs={"init": "epsg:3035"})
del eva
......@@ -112,8 +114,19 @@ class Doren:
"""
# assumed to be CSV sheet for now
src = pd.read_csv(src)
self.requested_species = list(set(src.wteneschappelijke_soortnaam))
reqs = pd.read_csv(src)
reqs.loc[:, 'foo'] = reqs.wteneschappelijke_soortnaam.astype(str).apply(do.strip_leading_quote)
reqs.loc[:, 'foo'] = reqs.foo.astype(str).apply(do.simplify_species)
requested_species = set(reqs.foo)
match = self.species.intersection(requested_species)
non_match = requested_species.difference(self.species)
self.requested_species = list(requested_species)
msg = 'Read {0} species requested for processing, of which {1} match a species in EVA and {2} do not.'.format(
len(self.requested_species), len(match), len(non_match))
self.report += msg
if self.verbose:
print(msg)
def apply_requirements(self, *reqs, **kwargs):
"""
......@@ -143,7 +156,7 @@ class Doren:
"Req 9: <date_of_recording> is jonger dan 1950",
"Req 10: plot has species inventory"]))
'''Which rows do NOT meet requirements?'''
'''Determine which rows do not meet the requirements'''
possible_requirements = {}
if 'req1' in requirements:
possible_requirements['req1'] = self.eva.loc[self.eva.latitude.isna(), :].index
......@@ -267,7 +280,7 @@ class Doren:
else:
# Get the values from a column in a polygon shapefile
column = kwargs.get('column', None)
column = kwargs.get('column', Exception('Provide column keyword argument'))
covar_gdf = gp.read_file(os.path.join(covar_dir, covar_src))
joined = gp.sjoin(left_df=self.eva, right_df=covar_gdf, how='left', op='within', lsuffix='eva_',
rsuffix='covar_')
......@@ -327,7 +340,9 @@ class Doren:
self.negative_plots: plot IDs of all plots not containing species_name
"""
if species_name not in self.species:
msg = 'Failed to buffer for species {0}, not found in EVA database\n'.format(species_name)
msg = ' species {0} not found in EVA database\n'.format(species_name)
if self.verbose:
print(msg)
self.report += msg
raise OSError(msg)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment