Skip to content
Snippets Groups Projects
Commit c1872321 authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

other small updates

parent 5f806341
No related branches found
No related tags found
No related merge requests found
import os
import json
import re
import pandas as pd
import numpy as np
import geopandas as gp
......@@ -47,12 +49,14 @@ def ckdnearest(gdA: gp.GeoDataFrame, gdB: gp.GeoDataFrame):
class DorenPlots:
def __init__(self, source_data: dict, prepared_data: dict):
self.species_df = pd.read_csv(
source_data["eva_species"],
sep="\t",
prepared_data["eva_species"],
sep=",",
usecols=[0, 5, 8],
header=0,
names=["plot_obs_id", "matched_concept", "cover_percentage"],
comment='#',
)
# Add simplified species name
......@@ -70,9 +74,22 @@ class DorenPlots:
self.preparation_menu = json.load(f)
self.preparation_menu['prepared_headers'] = prepared_data['prepared_headers']
def get_plots_inventory_for_species(self, species_identifier: str) -> pd.DataFrame:
pattern = re.compile("s.l.|sl.|var|var.|agg.|agg|aggr.|aggr|sensu")
sel = self.species_df.query(
f"matched_concept == '{species_identifier}' or matched_concept_simplified == '{re.split(pattern, species_identifier)[0].strip()}'"
)
if not sel.empty:
print(f' found {sel.shape[0]} ({len(set(sel.plot_obs_id))}) plots for {species_identifier}')
return sel
else:
raise ValueError(f"No plots found for {species_identifier}")
def identify_headers_with_species(
self,
species_identifier: str,
shout: bool=False
):
"""
Mark headers in dataframe which contain a species. Adds or overwrites column 'has_species'
......@@ -82,18 +99,30 @@ class DorenPlots:
:return: dataframe with headers
"""
plots = set(
# Match requested species name directly on matched concept column
plots1 = set(
self.species_df.query(
f"matched_concept == '{species_identifier}'"
).plot_obs_id
)
# Simplify requested species name and search on simplified matched_concept column
pattern = re.compile(" s\.l\.| sl\. | var | var\. |agg\. | agg | aggr\. | aggr | sensu ")
plots2 = set(self.species_df.query(
f"matched_concept_simplified == '{re.split(pattern, species_identifier)[0].strip()}'"
).plot_obs_id)
self.header_gdf = self.header_gdf.assign(
has_species=self.header_gdf.index.isin(plots)
has_species=self.header_gdf.index.isin(plots1.union(plots2))
)
if self.header_gdf.has_species.sum() == 0:
raise ValueError(f"No plots found for {species_identifier}")
else:
if shout:
print(f'# Found {len(plots1)} plots directly and {len(plots2)} indirectly for {species_identifier}')
def identify_headers_of_structuurtype(self, strucuurtype: str):
""" "
Mark headers in dataframe which belong to the requested structuurtype
......
This diff is collapsed.
......@@ -134,6 +134,7 @@ FILTERS = {
PREPARED_DATA = {
"prepared_headers": r"w:\projects\DOREN22\b_prepareddata\eva_headers\20230222-1254_eva_headers.csv",
"eva_species": r'W:\PROJECTS\DOREN22\b_prepareddata\eva_headers\20230222-1254_eva_species.csv'
}
BASE_OUT_DIRECTORY = r"w:\PROJECTS\DOREN22\\b_prepareddata\\eva_headers"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment