Skip to content
Snippets Groups Projects
Commit 60a6d2ff authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

updates to header selection process

parent 9a99935f
No related branches found
No related tags found
No related merge requests found
......@@ -2,87 +2,39 @@ import datetime
import os
import preparation_headers as prep
# from src import preparation_headers as prep
print(f'Starting {datetime.datetime.now().strftime("%d %b %Y %H:%M:%S")}')
BASE_OUT_DIRECTORY = r"w:\PROJECTS\DOREN22\\b_prepareddata\\eva_headers"
SAMPLE = True
BASENAME = 'eva_headers'
df, msg1 = prep.get_headers()
df, msg2 = prep.add_xy_3035(df)
df, msg3 = prep.filter_to_years(df, earliest_year=1950)
df, msg4 = prep.filter_to_species_lists(df)
df, msg5 = prep.filter_to_aoi(df)
df, msg6 = prep.filter_to_elevation(df, max_elevation=500)
df, msg7 = prep.add_country(df)
df, msg8 = prep.add_nh3_max(df)
df, msg9 = prep.add_nox_max(df)
df, msg11 = prep.add_n_totals(df)
df, msg12 = prep.add_soil_type(df)
df, msg13 = prep.add_yearly_precipitation(df)
df, msg14 = prep.add_yearly_temperature(df)
df, msg15 = prep.to_single_eunis(df)
df, msg16 = prep.overwrite_eunis(
df,
src=r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Calluna_Avenula.csv",
src_col="PlotObservationID",
target_eunis="S42",
)
df, msg17 = prep.overwrite_eunis(
df,
src=r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Calluna_Molinea.csv",
src_col="PlotObservationID",
target_eunis="S42",
)
df, msg18 = prep.overwrite_eunis(
df,
src=r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Empetrum_Avenula.csv",
src_col="PlotObservationID",
target_eunis="S42",
)
df, msg19 = prep.overwrite_eunis(
df,
src=r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Empetrum_Molinea.csv",
src_col="PlotObservationID",
target_eunis="S42",
)
df, msg20 = prep.overwrite_eunis(
df,
src=r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Erica_Avenula.csv",
src_col="PlotObservationID",
target_eunis="S41",
)
df, msg21 = prep.overwrite_eunis(
df,
src=r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Erica_Molinea.csv",
src_col="PlotObservationID",
target_eunis="S41",
)
report = f'Starting {datetime.datetime.now().strftime("%d %b %Y %H:%M:%S")}'
print(report)
df, report = prep.get_headers(msg=report)
if SAMPLE:
df, report = prep.sample(df=df, msg=report, n=500)
df, report = prep.add_xy_3035(df, msg=report)
df, report = prep.filter_to_years(df, msg=report)
df, report = prep.filter_to_species_lists(df, msg=report)
df, report = prep.filter_to_aoi(df, msg=report)
df, report = prep.filter_to_elevation(df, msg=report)
df, report = prep.add_country(df, msg=report)
df, report = prep.add_nh3_max(df, msg=report)
df, report = prep.add_nox_max(df, msg=report)
df, report = prep.add_n_totals(df, msg=report)
df, report = prep.add_soil_type(df, msg=report)
df, report = prep.add_yearly_precipitation(df, msg=report)
df, report = prep.add_yearly_temperature(df, msg=report)
df, report = prep.to_single_eunis(df, msg=report)
df, report = prep.overwrite_eunis(gdf=df, msg=report)
ts = datetime.datetime.now().strftime("%Y%m%d-%H%M")
base_out = r"w:\PROJECTS\DOREN22\\b_prepareddata"
df.drop("xy_3035", axis=1).to_csv(
os.path.join(base_out, f"eva_headers_{ts}.csv"), sep=",", index=True
os.path.join(BASE_OUT_DIRECTORY, f"{ts}_{BASENAME}.csv"), sep=",", index=True
)
with open(os.path.join(base_out, f"eva_haders_{ts}.txt"), "w") as f:
for m in [
msg1,
msg3,
msg4,
msg5,
msg6,
msg7,
msg8,
msg9,
msg11,
msg12,
msg13,
msg14,
msg15,
msg16,
msg17,
msg18,
msg19,
msg20,
msg21,
]:
f.write(f"{m}\n")
df.to_file(os.path.join(base_out, f'eva_headers_{ts}.shp'))
with open(os.path.join(BASE_OUT_DIRECTORY, f"{ts}_{BASENAME}.txt"), "w") as f:
f.write(report)
df.to_file(os.path.join(BASE_OUT_DIRECTORY, f'{ts}_{BASENAME}.shp'))
print(f'Done @ {datetime.datetime.now().strftime("%d %b %Y %H:%M:%S")}!')
import numpy as np
import geopandas as gp
import os
import pandas as pd
import shapely
def lonlat(y: int, x: int) -> (float, float):
"""
Convert row, col in EMEP grid to lat-lon of cell centre.
See:
:param y: column number
:param x: row number
:return:
"""
xpol = 8
ypol = 110
d = 50
longitude_0 = np.divide(np.pi, 3)
R = 6370
M = np.multiply(np.divide(R, d), np.add(1, np.sin(longitude_0)))
r = np.sqrt(np.square(x - xpol) + np.square(y - ypol))
delta_zero = -32
latitude = 90 - np.divide(360, np.pi) * np.arctan(np.divide(r, M))
longitude = delta_zero + np.divide(180, np.pi) * np.arctan(np.divide((x - xpol), (ypol-y)))
return longitude, latitude
def to_rowcol(longitude: float, latitude: float) -> (int, int):
"""
:param longitude:
:param latitude:
:return:
"""
xpol = 8
ypol = 110
d = 50
longitude_0 = np.divide(np.pi, 3)
R = 6370
M = np.multiply(np.divide(R, d), np.add(1, np.sin(longitude_0)))
x = np.add(xpol, np.multiply(np.multiply(M,
np.tan(np.subtract(np.divide(np.pi, 4),
np.divide(latitude, 2)))),
np.sin(np.subtract(longitude, longitude_0))))
longitude = 166.43
latitude = 40.65
x = xpol + M * np.tan(np.subtract(np.divide(np.pi, 4), np.divide(latitude, 2))) * np.sin(np.subtract(longitude, delta_zero))
y = ypol - M * np.tan(np.subtract(np.divide(np.pi, 4), np.divide(latitude, 2))) * np.cos(np.subtract(longitude, delta_zero))
return x, y
full = gp.read_file(r'w:\PROJECTS\DOREN22\a_sourcedata\EMEP\emep_grid\EMEP_GRID_FULL.shp')
full = full.assign(latlon=full.apply(lambda row: f'{row.long}_{row.lat}', axis=1)).drop_duplicates(subset='latlon')
valid = pd.read_csv(r'w:\PROJECTS\DOREN22\a_sourcedata\EMEP\emep_grid\EMEPgrid.csv', sep=',').rename(columns={'long center': 'long',
'lat center': 'lat'})
gdf = gp.GeoDataFrame(valid, geometry=valid.apply(lambda row: shapely.geometry.Point(row.long, row.lat), axis=1))
gdf.to_file(r'w:\PROJECTS\DOREN22\a_sourcedata\EMEP\emep_grid\EMEPgrid_pnt.shp')
valid['latlon'] = valid.apply(lambda row: f'{np.round(row.long, 2)}_{np.round(row.lat, 2)}', axis=1)
to_lonlat(31,1)
\ No newline at end of file
......@@ -7,9 +7,58 @@ import rasterstats
import rasterio as rio
import numpy as np
SOURCE_DATA = {
'eva_headers': r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20201118\EVA_Doren_header.csv",
'eva_species': r"w:\PROJECTS\Doren19\a_brondata\EVA\delivery_201909\EVA_Doren_species.csv",
'aoi': r"W:\PROJECTS\Doren19\a_brondata\AOI\ne_50m_cntrs_AOI_diss_fin.shp",
'elevation': r"W:\PROJECTS\Doren19\a_brondata\covariables\DEM\DTM_3035.tif",
'soil_map': r"w:\PROJECTS\Doren19\a_brondata\covariables\soil\b_processed\WRBLEV1_laea.tif",
'country_map': r"w:\PROJECTS\Doren19\a_brondata\covariables\countries\ne_50m_cntrs_sel_buff_diss_2_3035.shp",
'temperature_directory': r"w:\PROJECTS\Doren19\a_brondata\covariables\EObs\2_compiled\epsg3035",
'temperature_basename': r"EObs_v200e_tg_5yrmean",
'precipitation_directory': r"w:\PROJECTS\Doren19\a_brondata\covariables\EObs\2_compiled\epsg3035",
'precipitation_basename': r"EObs_v200e_rr_5yrmean",
'nh3_averaged': r"w:\PROJECTS\DOREN22\b_prepareddata\EMEP_averaged\max_20221230\NH3_a_30-20221230.csv",
'nox_averaged': r"w:\PROJECTS\DOREN22\b_prepareddata\EMEP_averaged\max_20221230\NOx_a_30-20221230.csv",
}
EUNIS_OVERWRITE = {
r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Calluna_Avenula.csv": 'S42',
r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Calluna_Molinea.csv": 'S42',
r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Empetrum_Avenula.csv": 'S42',
r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Empetrum_Molinea.csv": 'S42',
r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Erica_Avenula.csv": 'S41',
r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20210112\Erica_Molinea.csv": 'S41',
}
FILTERS = {
'earliest_year': 1950,
'latest_year': 2020,
'maximum_elevation': 500,
}
def sample(
df: pd.DataFrame,
msg: str,
n: int,
) -> (pd.DataFrame, str):
"""
Return sample of the dataframe
:param msg:
:param df:
:param n:
:return:
"""
update = f'Random sampling to {n} records.'
print(update)
return df.sample(n), f"{msg}\n{update}"
def get_headers(
headers_src: str = r"W:\PROJECTS\Doren19\a_brondata\EVA\delivery_20201118\EVA_Doren_header.csv",
msg: str,
headers_src: str = SOURCE_DATA['eva_headers'],
) -> (pd.DataFrame, str):
"""
Read EVA header file, using selected columns and dropping rows with NA in selected columns
......@@ -17,8 +66,6 @@ def get_headers(
:return: pandas dataframe
"""
df = (
pd.read_csv(
headers_src,
......@@ -62,12 +109,14 @@ def get_headers(
.dropna(axis=0, how="all", subset=["eunis_old", "eunis_new"])
.astype({"date_of_recording": int})
)
msg = f"Reading headers from file {headers_src}, {df.shape[0]:,} remaining after dropping NAs in latitude, longitude and date fields and (eunis_old AND eunis_new)."
print(msg)
return (df, msg)
update = f"Reading headers from file {headers_src}, {df.shape[0]:,} remaining after dropping NAs in latitude, longitude and date fields and (eunis_old AND eunis_new)."
print(update)
return (df, f"{msg}\n{update}")
def filter_to_years(df: pd.DataFrame, earliest_year) -> (pd.DataFrame, str):
def filter_to_years(
df: pd.DataFrame, msg: str, earliest_year=FILTERS['earliest_year'],
) -> (pd.DataFrame, str):
"""
Restrict header data frame to plots not older than a year
:param df:
......@@ -76,14 +125,15 @@ def filter_to_years(df: pd.DataFrame, earliest_year) -> (pd.DataFrame, str):
"""
pre = df.shape[0]
df = df.query(f"date_of_recording >= {earliest_year}")
msg = f"Filtering {pre:,} headers for not older than {earliest_year}, {df.shape[0]:,} remaining."
print(msg)
return (df, msg)
update = f"Filtering {pre:,} headers for not older than {earliest_year}, {df.shape[0]:,} remaining."
print(update)
return (df, f"{msg}\n{update}")
def filter_to_species_lists(
df: pd.DataFrame,
species_src=r"w:\PROJECTS\Doren19\a_brondata\EVA\delivery_201909\EVA_Doren_species.csv",
msg: str,
species_src=SOURCE_DATA['eva_species'],
) -> (pd.DataFrame, str):
"""
Restrict the dataframe to plots which have a corresponding species inventory
......@@ -105,14 +155,16 @@ def filter_to_species_lists(
)
)
]
msg = f"Filtering {pre:,} headers for having a valid species list in {species_src}, {df.shape[0]:,} remaining."
print(msg)
return (df, msg)
update = f"Filtering {pre:,} headers for having a valid species list in {species_src}, {df.shape[0]:,} remaining."
print(update)
return (df, f"{msg}\n{update}")
def filter_to_aoi(
gdf, aoi_src=r"W:\PROJECTS\Doren19\a_brondata\AOI\ne_50m_cntrs_AOI_diss_fin.shp"
):
gdf: gp.GeoDataFrame,
msg: str,
aoi_src=SOURCE_DATA['aoi'],
) -> (gp.GeoDataFrame, str):
"""
Filter EVA plots to within AOI
:param gdf: EVA gdf
......@@ -127,15 +179,16 @@ def filter_to_aoi(
left_df=gdf, right_df=gp.read_file(aoi_src), how="inner", predicate="within"
).index
]
msg = f"Filtering {pre:,} headers to AOI {aoi_src}, {out.shape[0]:,} remaining."
print(msg)
return (out, msg)
update = f"Filtering {pre:,} headers to AOI {aoi_src}, {out.shape[0]:,} remaining."
print(update)
return (out, f"{msg}\n{update}")
def filter_to_elevation(
gdf: gp.GeoDataFrame,
elevation_src=r"W:\PROJECTS\Doren19\a_brondata\covariables\DEM\DTM_3035.tif",
max_elevation=500,
msg: str,
elevation_src=SOURCE_DATA['elevation'],
max_elevation=FILTERS['maximum_elevation'],
) -> (gp.GeoDataFrame, str):
dem = rio.open(elevation_src)
......@@ -149,12 +202,12 @@ def filter_to_elevation(
affine=dem.transform,
)
gdf = gdf.loc[gdf.elevation <= max_elevation, :]
msg = f"Filtering {pre:,} headers for elevation below {max_elevation}m based on {elevation_src}, with {gdf.shape[0]:,} headers remaining."
print(msg)
return (gdf, msg)
update = f"Filtering {pre:,} headers for elevation below {max_elevation}m based on {elevation_src}, with {gdf.shape[0]:,} headers remaining."
print(update)
return (gdf, f"{msg}\n{update}")
def add_xy_3035(df: pd.DataFrame) -> (gp.GeoDataFrame, str):
def add_xy_3035(df: pd.DataFrame, msg: str) -> (gp.GeoDataFrame, str):
"""
Add EPSG3035 easting northing coordinates as shapely point
:param df:
......@@ -168,42 +221,18 @@ def add_xy_3035(df: pd.DataFrame) -> (gp.GeoDataFrame, str):
df["xy_3035"] = [Point(x, y) for (x, y) in zip(eastings, northings)]
# Return
msg = f"Adding EPSG3035 coordinates for {df.shape[0]:,} headers"
print(msg)
return (gp.GeoDataFrame(df, geometry="xy_3035", crs="epsg:3035"), msg)
def add_country(
gdf: gp.GeoDataFrame,
country_src=r"w:\PROJECTS\Doren19\a_brondata\covariables\countries\ne_50m_cntrs_sel_buff_diss_2_3035.shp",
drop_na=True,
) -> (gp.GeoDataFrame, str):
"""
Add country name to EVA header DF, based on coordinates
:param df:
:param drop_na: remove headers with NA for country
:return:
"""
# category_map = pd.read_csv(r'w:\PROJECTS\Doren19\a_brondata\covariables\countries\iso_country_codes.csv',
# index_col='ISO3', sep=';', encoding='cp1252').Country_name.to_dict()
countries = gp.read_file(country_src)
out = gdf.sjoin(
countries.loc[:, ["geometry", "SOV_A3"]], how="left", predicate="within"
).rename(columns={"SOV_A3": "country"}).drop(labels='index_right', axis=1)
if drop_na:
out = out.dropna(subset=["country"])
msg = f"Adding country information from {country_src} to {gdf.shape[0]:,} headers, with {out.shape[0]:,} headers remaining."
print(msg)
return out, msg
update = f"Adding EPSG3035 coordinates for {df.shape[0]:,} headers"
print(update)
return (
gp.GeoDataFrame(df, geometry="xy_3035", crs="epsg:3035"),
f"{msg}\n{update}",
)
def add_soil_type(
gdf: gp.GeoDataFrame,
soil_src=r"w:\PROJECTS\Doren19\a_brondata\covariables\soil\b_processed\WRBLEV1_laea.tif",
msg: str,
soil_src=SOURCE_DATA['soil_map'],
drop_na=True,
) -> (gp.GeoDataFrame, str):
"""
......@@ -236,15 +265,49 @@ def add_soil_type(
if drop_na:
gdf = gdf.dropna(subset=["soil_type"])
msg = f"Adding soil type from {soil_src} to {pre:,} headers, with {gdf.shape[0]:,} headers remaining."
print(msg)
return gdf, msg
update = f"Adding soil type from {soil_src} to {pre:,} headers, with {gdf.shape[0]:,} headers remaining."
print(update)
return (gdf, f"{msg}\n{update}")
def add_country(
gdf: gp.GeoDataFrame,
msg: str,
country_src=SOURCE_DATA['country_map'],
drop_na=True,
) -> (gp.GeoDataFrame, str):
"""
Add country name to EVA header DF, based on coordinates
:param df:
:param drop_na: remove headers with NA for country
:return:
"""
# category_map = pd.read_csv(r'w:\PROJECTS\Doren19\a_brondata\covariables\countries\iso_country_codes.csv',
# index_col='ISO3', sep=';', encoding='cp1252').Country_name.to_dict()
countries = gp.read_file(country_src)
out = (
gdf.sjoin(
countries.loc[:, ["geometry", "SOV_A3"]], how="left", predicate="within"
)
.rename(columns={"SOV_A3": "country"})
.drop(labels="index_right", axis=1)
)
if drop_na:
out = out.dropna(subset=["country"])
update = f"Adding country information from {country_src} to {gdf.shape[0]:,} headers, with {out.shape[0]:,} headers remaining."
print(update)
return (out, f"{msg}\n{update}")
def add_yearly_temperature(
gdf: gp.GeoDataFrame,
temp_src=r"w:\PROJECTS\Doren19\a_brondata\covariables\EObs\2_compiled\epsg3035",
temp_basename=r"EObs_v200e_tg_5yrmean",
msg: str,
temp_src=SOURCE_DATA['temperature_directory'],
temp_basename=SOURCE_DATA['temperature_basename'],
drop_na=True,
) -> (gp.GeoDataFrame, str):
"""
......@@ -271,15 +334,16 @@ def add_yearly_temperature(
if drop_na:
gdf.dropna(subset=["five_yearly_temp"], inplace=True)
msg = f"Adding 5-yearly averaged temperature from {temp_src} to {pre:,} headers, with {gdf.shape[0]:,} headers remaining."
print(msg)
return (gdf, msg)
update = f"Adding 5-yearly averaged temperature from {temp_src} to {pre:,} headers, with {gdf.shape[0]:,} headers remaining."
print(update)
return (gdf, f"{msg}\n{update}")
def add_yearly_precipitation(
gdf: gp.GeoDataFrame,
precip_src=r"w:\PROJECTS\Doren19\a_brondata\covariables\EObs\2_compiled\epsg3035",
precip_basename=r"EObs_v200e_rr_5yrmean",
msg: str,
precip_src=SOURCE_DATA['precipitation_directory'],
precip_basename=SOURCE_DATA['precipitation_basename'],
drop_na=True,
) -> (gp.GeoDataFrame, str):
"""
......@@ -304,14 +368,15 @@ def add_yearly_precipitation(
)
if drop_na:
gdf.dropna(subset=["five_yearly_precipitation"], inplace=True)
msg = f"Adding 5-yearly averaged precipitation from {precip_src} to {pre:,} headers, with {gdf.shape[0]:,} headers remaining."
print(msg)
return (gdf, msg)
update = f"Adding 5-yearly averaged precipitation from {precip_src} to {pre:,} headers, with {gdf.shape[0]:,} headers remaining."
print(update)
return (gdf, f"{msg}\n{update}")
def add_nh3_max(
gdf: gp.GeoDataFrame,
ndep_src=r"w:\PROJECTS\Doren19\a_brondata\POSCH_dep\20200401delivery\v2\NH3-20200505.csv",
msg: str,
ndep_src=SOURCE_DATA['nh3_averaged'],
drop_na=True,
) -> (gp.GeoDataFrame, str):
"""
......@@ -325,14 +390,16 @@ def add_nh3_max(
# Read NDep data
nh3 = pd.read_csv(
os.path.join(ndep_src, ),
os.path.join(
ndep_src,
),
sep=",",
comment="!",
index_col=0,
header=0,
skip_blank_lines=True,
usecols=[0] + [i for i in range(3, 71)],
names=["plot_obs_id"] + [f"y{i}" for i in range(1950, 2018)],
names=["plot_obs_id"] + [f"y{i}" for i in range(FILTERS['earliest_year'], FILTERS['latest_year']+1)],
)
# restrict NDep and GDF to common plots
......@@ -348,20 +415,25 @@ def add_nh3_max(
idx, cols = pd.factorize(mapper.year)
gdf["nh3_mg_m2"] = (
nh3.reindex(cols, axis=1)
.reindex(gdf.index, axis=0)
.to_numpy()[np.arange(len(nh3)), idx]
.reindex(gdf.index, axis=0)
.to_numpy()[np.arange(len(nh3)), idx]
)
msg = f'Adding NH3 information from {os.path.join(ndep_src, "NH3-20200505.csv")} to {pre:,} headers, with {gdf.shape[0]:,} remaining.'
print(msg)
return (gdf, msg)
# Drop NAs
if drop_na:
gdf.dropna(axis=0, subset=["nh3_mg_m2"], how="any", inplace=True)
update = f'Adding NH3 information from {ndep_src} to {pre:,} headers, with {gdf.shape[0]:,} remaining.'
print(update)
return (gdf, f"{msg}\n{update}")
def add_nox_max(
gdf: gp.GeoDataFrame,
ndep_src=r"w:\PROJECTS\Doren19\a_brondata\POSCH_dep\20200401delivery\v2\NOx-20200505.csv",
msg: str,
ndep_src=SOURCE_DATA['nox_averaged'],
drop_na=True,
) -> (gp.GeoDataFrame, str):
) -> (gp.GeoDataFrame, str):
"""
Add NH3 values from Max Posch to plots
:param gdf:
......@@ -378,7 +450,7 @@ def add_nox_max(
index_col=0,
header=0,
usecols=[0] + [i for i in range(3, 71)],
names=["plot_obs_id"] + [f"y{i}" for i in range(1950, 2018)],
names=["plot_obs_id"] + [f"y{i}" for i in range(FILTERS['earliest_year'], FILTERS['latest_year']+1)],
)
# restrict NOx and GDF to common plots
......@@ -400,14 +472,14 @@ def add_nox_max(
# Drop NAs
if drop_na:
gdf.dropna(axis=0, subset=["nh3_mg_m2", "nox_mg_m2"], how="any", inplace=True)
gdf.dropna(axis=0, subset=["nox_mg_m2"], how="any", inplace=True)
msg = f'Adding NOx information from {os.path.join(ndep_src, "NOx-20200505.csv")} to {pre:,} headers, with {gdf.shape[0]:,} remaining.'
print(msg)
return (gdf, msg)
update = f'Adding NOx information from {ndep_src} to {pre:,} headers, with {gdf.shape[0]:,} remaining.'
print(update)
return gdf, f"{msg}\n{update}"
def add_n_totals(gdf: gp.GeoDataFrame) -> (gp.GeoDataFrame, str):
def add_n_totals(gdf: gp.GeoDataFrame, msg: str) -> (gp.GeoDataFrame, str):
"""
Calculated total NDep based on NH3 and NOx
:param gdf:
......@@ -419,12 +491,37 @@ def add_n_totals(gdf: gp.GeoDataFrame) -> (gp.GeoDataFrame, str):
gdf["totN_kg_ha"] = gdf.loc[:, "totN_mg_m2"].divide(100)
gdf["totN_kmol_ha"] = gdf.loc[:, "totN_kg_ha"].divide(14)
msg = f'Adding total N in mg_m2, kg_ha and kmol_ha for {gdf.shape[0]:,} headers.'
print(msg)
return gdf, msg
update = f"Adding total N in mg_m2, kg_ha and kmol_ha for {gdf.shape[0]:,} headers."
print(update)
return (gdf, f"{msg}\n{update}")
def to_single_eunis(df: pd.DataFrame) -> (pd.DataFrame, str):
def add_structuurtype(gdf: gp.GeoDataFrame, msg: str) -> (gp.GeoDataFrame, str):
"""
Add structuurtype kolom to dataframe, based on EUNIS code. Structuurtypen zijn:
:param gdf:
:param msg:
:return:
"""
# TODO
pass
def add_roughness(gdf: gp.GeoDataFrame, msg: str) -> (gp.GeoDataFrame, str):
"""
Add roughness classification of the plot, based on EUNIS type. Roughness is: {high, low}
:param gdf:
:param msg:
:return:
"""
# TODO
pass
def to_single_eunis(df: pd.DataFrame, msg: str) -> (pd.DataFrame, str):
"""
Create single EUNIS column based on eunis_new where possible, else eunis_old
:param df:
......@@ -439,13 +536,15 @@ def to_single_eunis(df: pd.DataFrame) -> (pd.DataFrame, str):
),
index=df.index,
).str.replace(",", "-")
msg = f"Merging eunis-old and eunis-new codes for {df.shape[0]:,} headers."
print(msg)
return (df, msg)
update = f"Merging eunis-old and eunis-new codes for {df.shape[0]:,} headers."
print(update)
return (df, f"{msg}\n{update}")
def overwrite_eunis(
gdf: gp.GeoDataFrame, src, src_col: str, target_eunis
gdf: gp.GeoDataFrame,
msg: str,
mapper=EUNIS_OVERWRITE,
) -> (gp.GeoDataFrame, str):
"""
Overwrite EUNIS type of selected plots
......@@ -455,11 +554,19 @@ def overwrite_eunis(
:return:
"""
target_ids = pd.read_csv(src, sep="\t", index_col=src_col).index.intersection(
msg += '\n'
for src, target_eunis in mapper.items():
target_ids = pd.read_csv(src, sep="\t", index_col='PlotObservationID').index.intersection(
gdf.index
)
gdf.loc[target_ids, "eunis_code"] = target_eunis
msg = f"Setting EUNIS type to {target_eunis} based on {src} for {len(target_ids):,} headers."
print(msg)
return (gdf, msg)
gdf.loc[target_ids, "eunis_code"] = target_eunis
update = f"Setting EUNIS type to {target_eunis} based on {src} for {len(target_ids):,} headers."
print(update)
msg += f'{update}\n'
return gdf, msg
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment