New function to sample raster for point values and add as columns.

6eab8be5 · roelo008 · 4dc0cfcf · 6eab8be5 · 6eab8be5 · 6eab8be5
Commit 6eab8be5 authored 5 years ago by roelo008
--- a/a_prep/a1_compile_eva_header.py
+++ b/a_prep/a1_compile_eva_header.py
@@ -23,7 +23,7 @@ emep_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_bronda
 wrb_lev1_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\a_brondata\covariables\soil\b_processed\WRBLEV1_laea.tif'

 '''Output data'''
-out_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\1_EVA'
+out_dir = r'c:\apps\z_temp\doren_test'
 datestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M")
 basename = 'eva_header_selection_{0}'.format(datestamp)

@@ -56,13 +56,7 @@ eva_full['plot_coordinates_3035'] = eva_full.loc[:, 'plot_coordinates_wgs84'].ap
 eva_full['plot_pnt_wgs84'] = eva_full['plot_coordinates_wgs84'].apply(shapely.geometry.Point)
 eva_full = gp.GeoDataFrame(eva_full, geometry='plot_pnt_wgs84', crs={"init": "epsg:4326"})

-"""
-Add raster based information as new columns based on coorindates
-"""
-wrb_lev1 = do.get_raster_vals(eva_full.plot_coordinates_3035, wrb_lev1_src)
-# TODO: merge to eva_full
-
-'''Specificy requirements to which the data must comply'''
+'''Optional requirements to which the data must comply'''
 req1_desc = "Req 1: <latitude> is niet leeg"
 req2_desc = "Req 2: <longitude> is niet leeg"
 req3_desc = 'Req 3: plot is in AOI {0}'.format(aoi_src)
@@ -92,6 +86,13 @@ reqs = ["req1", "req2", "req3", "req4", "req8", 'req9']
 set_drop_rows = set().union(*[eval(x) for x in reqs])
 eva_sel = eva_full.drop(labels=list(set_drop_rows))

+"""
+Add raster based information as new columns based on coorindates. Doe dit pas hier omdat 1) er nu pas gecheckt is op
+  volledigheid van de coordinaten en omdat 2) er nu minder plots gesampled hoeven te worden. 
+"""
+wrb_lev1 = do.get_raster_vals(eva_sel.plot_coordinates_3035, wrb_lev1_src, nominal=True)
+eva_sel = eva_sel.join(other=wrb_lev1, how='left')
+
 ''''Prepare log for reporting'''
 header = 'EVA header from {0} with {1} original rows. ' \
         'The following selection requirements were applied\n'.format(eva_full_scr, eva_full.shape[0])
@@ -101,8 +102,7 @@ footer = 'Union between requirements {0} gives {1} rows, leaving {2}-{1} = {3} E
    .format(', '.join(reqs), len(set_drop_rows), eva_full.shape[0], np.subtract(eva_full.shape[0], len(set_drop_rows)))
 print(header, *msgs, footer)

-'''Save to file'''
-
+'''Save to files'''
 # Text metadata
 with open(os.path.join(out_dir, '{0}.txt'.format(basename)), 'w') as f:
    f.write(header)

--- a/utils/doren.py
+++ b/utils/doren.py
@@ -2,6 +2,8 @@
 Helper functions for the DOREN project

 """
+
+import os
 import json
 import rasterstats as rast
 import geopandas as gp
@@ -14,6 +16,9 @@ from shapely.geometry import Polygon


 def eva_colnames_orig():
+    """
+    :return: list of columns names of the original EVA database
+    """
    return ['PlotObservationID', 'PlotID', 'TV2 relevé number', 'Country', 'Cover abundance scale', 'Date of recording',
            'Relevé area (m²)', 'Altitude (m)', 'Aspect (°)', 'Slope (°)', 'Cover total (%)', 'Cover tree layer (%)',
            'Cover shrub layer (%)', 'Cover herb layer (%)', 'Cover moss layer (%)', 'Cover lichen layer (%)',
@@ -26,6 +31,9 @@ def eva_colnames_orig():


 def eva_colnames_new():
+    """
+    :return: list of improved colnames for the EVA database
+    """
    return ['plot_obs_id', 'plot_id', 'tv2_releve_nr', 'country', 'cover_abundance_scale', 'date_of_recording',
              'releve_area_m2', 'altitude_m', 'aspect_deg', 'slope_deg', 'cover_total_perc', 'cover_tree_layer_perc',
              'cover_shrub_layer_perc', 'cover_herb_layer_perc', 'cover_moss_layer_perc', 'cover_lichen_layer_perc',
@@ -38,6 +46,9 @@ def eva_colnames_new():


 def eva_colnames_drop():
+    """
+    :return: list of improved colnames from EVA database which can be dropped
+    """
    return ['slope_deg', 'cover_total_perc', 'cover_tree_layer_perc',
             'cover_shrub_layer_perc', 'cover_herb_layer_perc', 'cover_moss_layer_perc', 'cover_lichen_layer_perc',
             'cover_algae_layer_perc', 'cover_litter_layer_perc', 'cover_open_water_perc', 'cover_bare_rock_perc',
@@ -48,17 +59,31 @@ def eva_colnames_drop():


 def get_world_aoi_gdf():
+    """
+    If no AOI shapefile is provided, use a ply gdf covering the entire earth
+    :return: geodataframe
+    """
    return gp.GeoDataFrame(crs={"init": "epsg:4326"}, data={'aoi': 1}, index=[1],
                           geometry=[Polygon([(-180, 90), (180, 90), (180, -90), (-180, -90)])])


 def read_aoi_from_src(src):
+    """
+    Read an aoi shapefile from source, return as single ply after dissolving
+    :param src: path to the shapefile
+    :return: geodataframe with one polygon
+    """
    aoi_raw = gp.read_file(src)
    aoi_diss = aoi_raw.dissolve(by='featurecla')
    return gp.GeoDataFrame(crs={"init": "epsg:4326"}, data={'aoi': 1}, index=[1], geometry=aoi_diss.geometry.values)


 def first_letter(x):
+    """
+
+    :param x: string
+    :return: first letter of x
+    """
    try:
        out = x[0]
    except TypeError:
@@ -67,6 +92,11 @@ def first_letter(x):


 def int_year(x):
+    """
+    Try to set x to integer, return nan if x is not a value
+    :param x:
+    :return:
+    """
    try:
        out = np.int32(x)
    except ValueError:
@@ -206,18 +236,18 @@ def generate_square_id(east_north, size):
    :param east_north: tuple easting, northing coordinates in EPSG3035
    :param size: length in m of the squares (eg 10.000 or 25.000 m)
    :return: square identification string as eeee_nnnn: E-N of square lower left coordinates
-    """
-    #    |
-    #    |
-    #  N |        X
-    #    |
-    #    @_____________
-    #          E
-    #    <------------>
-    #         size
-    #
-    # Given plot at location X, return Easthing Northing of @ as "eeee_nnnn"

+        |
+        |
+      N |        X
+        |
+        @_____________
+              E
+        <------------>
+             size
+
+     Given plot at location X and square size x size, return Easting Northing of @ as "eeee_nnnn"
+"""
    easting, northing = east_north

    sq_ll_e = easting // size  # easting of square lower left
@@ -285,7 +315,7 @@ def get_raster_nominals(raster_src):
    :param raster_src: name of the raster
    :return: dictionary of the raster values and corresponding nominals
    """
-    with open('legend_mappings.json', 'r') as j:
+    with open(r'../utils/legend_mappings.json', 'r') as j:
        all_mappings = json.load(j)

    try:
@@ -294,32 +324,40 @@ def get_raster_nominals(raster_src):
        for k, v in mapping.items():
            out[int(k)] = v
        return out
+
    except KeyError:
        print('No legend mapping available for {0}'.format(raster_src))
        raise


-def get_raster_vals(coords, rast_src):
+def get_raster_vals(coords, rast_src, nominal=False):
    """
    Query a raster for its values at each coordinate pair. Assumes CRS match between the coordinates series and the r
    raster image.
    :param coords: pandas series with shapely point geometries
    :param rast_src: source of raster image to be queried
-    :return: pandas series of raster values at EACH coordinate pair
+    :param nominal: boolean, raster source is categorical (nomical) or not. Default = False
+    :return: pandas dataframe w raster values and nominal category
    """

-    if isinstance(coords[0], str):
+    interpolation = 'nearest' if nominal else 'bilinear'
+    rast_basename = os.path.splitext(os.path.basename(rast_src))[0]
+
+    if isinstance(coords.iloc[0], str):
        raise TypeError('Trying to parse string to coorindate tuples. Did you read EVA data from *.csv instead of pkl')

-    if not isinstance(coords[0], shapely.geometry.point.Point)\
-            and isinstance(coords[0], tuple):
+    if not isinstance(coords.iloc[0], shapely.geometry.point.Point) and isinstance(coords.iloc[0], tuple):
+        print('converting coordinate tuples to Shapely points')
        coords = coords.apply(shapely.geometry.Point)

    # the numeric raster values
-    rast_vals = rast.point_query(coords, rast_src)
-    #TODO: ValueError: cannot convert float NaN to integer
+    rast_vals = rast.point_query(coords, rast_src, interpolate=interpolation)
+    data_dict = {'{0}_vals'.format(rast_basename): rast_vals}
+
    # try to map raster values to nominal values
-    nominal_dict = get_raster_nominals(rast_src)
-    nominals = [apply_dict(x, nominal_dict) for x in rast_vals]
+    if nominal:
+        nominal_dict = get_raster_nominals(rast_basename)
+        nominals = [apply_dict(x, nominal_dict) for x in rast_vals]
+        data_dict['{0}_cats'.format(rast_basename)] = nominals

-    return pd.DataFrame(data={'rast_vals': rast_vals, 'rast_categories': nominals}, index=coords.index)
+    return pd.DataFrame(data=data_dict, index=coords.index)
--- a/utils/legend_mappings.json
+++ b/utils/legend_mappings.json
 {
-  "wrb_lev1": {
+  "WRBLEV1_laea": {
    "1": "Podzol",
    "2": "Water body",
    "3": "Andosol",