Commit 4e26a518 authored by Wit, Allard de's avatar Wit, Allard de
Browse files

Merge branch 'develop' into 'master'

Upgrade to grompy 1.2

See merge request !1
parents 8353c37f 2536cae3
......@@ -126,6 +126,8 @@ run `grompy check` first.
### loading
*See information below for converting grompy 1.1 to 1.2 databases*
The final step is to load the parcel information and satellite observations into the database tables. This can be
done with the `grompy load <grompy_yaml>` command. Grompy will now show the following output:
......@@ -147,6 +149,11 @@ server with sufficient capacity to handle multiple streams of data. Note that gr
into one SQLite database, but write locks on the database will cause delays in processing so this is not
*Important*: grompy 1.2 offers additional parcel selection options compared to grompy 1.2. To convert a grompy 1.1 database
to a grompy 1.2 database it is only necessary to reload the parcel information. The structure of the Sentinel1/2
parcel observations did not change from 1.1 to 1.2. A special option has been added to only reload the parcel
info in a grompy database: `grompy load --parcel_info_only <grompy_yaml>`
## Accessing data processed by grompy
See the jupyter notebooks in the `notebooks/` directory in the grompy git repository.
\ No newline at end of file
# -*- coding: utf-8 -*-
# Copyright (c) 2021 Wageningen Environmental Research
# Allard de Wit (, April 2021
# Allard de Wit (, October 2021
"""Grompy is a tool to process and access parcel-based satellite observations from
__version__ = "1.2.0"
from .dap import DataAccessProvider
from .cmd import cli
__version__ = "1.1.1"
\ No newline at end of file
......@@ -8,6 +8,7 @@ import click
from .load_data import load_data
from .check_files import check_grompy_inputs
from .init import init
from . import __version__
def cli():
......@@ -35,9 +36,17 @@ def check_grompy(grompy_yaml):
@click.argument("grompy_yaml", type=click.Path(exists=True))
def load_grompy(grompy_yaml):
@click.option("--parcel_info_only", is_flag=True)
def load_grompy(grompy_yaml, parcel_info_only):
grompy_yaml = Path(grompy_yaml)
load_data(grompy_yaml, parcel_info_only)
def cli(ctx):
......@@ -15,8 +15,27 @@ class Container:
class DataAccessProvider:
def __init__(self, grompy_yaml, fieldID=None, area_gt=None, pixcount_gt=None, provincie=None, limit=None, gws_gewasc=None):
"""grompy.DataAccessProvider allow to query grompy databases and iterate through the selected parcels.
:param grompy_yaml: path to the grompy YAML file
:param fieldID: A list of fieldIDs that should be selected. Note that the number of fieldIDs that can be
provided is limited but unknown (depending on the SQL parser). Above 1000 FieldIDs warning
messages will be displayed. When this keyword is used, all other selection criteria will
be ignored.
:param area_gt: Only selected parcels greater than X ha
:param pixcount_gt: Only select parcels with a pixel count greater than this number
:param provincie: Only select parcels within this province
:param limit: Limit the number of parcels selected.
:param gws_gewasc: Select only parcels with given gws_gewasc
:param cat_gewasc: Select only parcels with given cat_gewasc
:param rdx_bounds: Select only parcels within the [min, max] RD X coordinates
:param rdy_bounds: Select only parcels within the [min, max] RD Y coordinates
:param lon_bounds: Select only parcels within the [min, max] longitude
:param lat_bounds: Select only parcels within the [min, max] latitude
def __init__(self, grompy_yaml, fieldID=None, area_gt=None, pixcount_gt=None, provincie=None, limit=None,
gws_gewasc=None, cat_gewasc=None, rdx_bounds=None, rdy_bounds=None, lon_bounds=None, lat_bounds=None):
grompy_yaml = Path(grompy_yaml)
if not grompy_yaml.exists():
......@@ -46,6 +65,10 @@ class DataAccessProvider:
if not isinstance(fieldID, (list, tuple, set)):
msg = "FieldID must be provide as a list, set or tuple"
raise RuntimeError(msg)
if len(fieldID) > 1000:
msg = f"Warning: your are passing {len(fieldID)} fieldIDs, this may be too many. " \
f"Consider splitting your selected fieldIDs in smaller batches or use another " \
f"selection criteria."
if area_gt is not None:
......@@ -56,6 +79,17 @@ class DataAccessProvider:
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.pixcount > pixcount_gt)
if provincie is not None:
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.provincie == provincie)
if cat_gewasc is not None:
cat_gewasc = str(cat_gewasc).strip()
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.cat_gewasc == cat_gewasc)
if rdx_bounds is not None:
self.perc_stmt.append_whereclause(sa.between(self.tbl_perc_info.c.rdx, rdx_bounds[0], rdx_bounds[1]))
if rdy_bounds is not None:
self.perc_stmt.append_whereclause(sa.between(self.tbl_perc_info.c.rdy, rdy_bounds[0], rdy_bounds[1]))
if lon_bounds is not None:
self.perc_stmt.append_whereclause(sa.between(self.tbl_perc_info.c.longitude, lon_bounds[0], lon_bounds[1]))
if lat_bounds is not None:
self.perc_stmt.append_whereclause(sa.between(self.tbl_perc_info.c.latitude, lat_bounds[0], lat_bounds[1]))
s =[sa.func.count()]).select_from(self.perc_stmt)
self.parcel_count = s.execute().fetchone()[0]
......@@ -104,4 +138,3 @@ class DataAccessProvider:
def __len__(self):
return self.parcel_count
......@@ -15,7 +15,7 @@ import geopandas as gpd
import numpy as np
import yaml
from .util import prepare_db, printProgressBar, Process, make_path_absolute, open_DB_connection
from .util import prepare_db, printProgressBar, Process, make_path_absolute, open_DB_connection, get_latlon_from_RD
dummy_date = "19000101"
......@@ -42,21 +42,33 @@ def load_parcel_info(grompy_yaml, dsn, counts_file, shape_file, table_name):
shp_fname = make_path_absolute(grompy_yaml, Path(shape_file))
df = gpd.read_file(shp_fname)
df["area_ha"] = df.geometry.area / 1e4
df = df.set_index("fieldid")
# Compute latitude/longitude of field centroids
r = []
for row in df.itertuples():
x, y = float(row.geometry.centroid.x), float(row.geometry.centroid.y)
lon, lat = get_latlon_from_RD(x, y)
r.append({"fieldID": row.Index, "latitude": lat, "longitude":lon, "rdx": x, "rdy": y})
df_latlon = pd.DataFrame(r).set_index("fieldID")
df = df.join(df_latlon)
# add pixel counts
fname_counts = make_path_absolute(grompy_yaml, Path(counts_file))
df_counts = pd.read_csv(fname_counts)
df_counts.set_index("field_ID", inplace=True)
df["pixcount"] = df_counts["pixcount"]
df = df.join(df_counts)
df_out = pd.DataFrame({"fieldID": df.index,
"year": df.year.astype(np.int32),
"pixcount": df.pixcount.astype(np.int32),
"area_ha": df.area_ha,
"area_ha": df.geometry.area / 1e4,
"rdx": df.rdx,
"rdy": df.rdy,
"longitude": df.longitude,
"latitude": df.latitude,
"cat_gewasc": df.cat_gewasc.apply(str),
"gws_gewasc": df.gws_gewasc.astype(np.int32),
"gws_gewas": df.gws_gewas.apply(str),
"provincie": df.provincie.apply(str),
"gemeente": df.gemeente.apply(str),
"regio": df.regio.apply(str),
......@@ -72,9 +84,12 @@ def load_parcel_info(grompy_yaml, dsn, counts_file, shape_file, table_name):
sa.Column("year", sa.Integer),
sa.Column("pixcount", sa.Integer),
sa.Column("area_ha", sa.Float),
sa.Column("rdx", sa.Float),
sa.Column("rdy", sa.Float),
sa.Column("longitude", sa.Float),
sa.Column("latitude", sa.Float),
sa.Column("cat_gewasc", sa.Text),
sa.Column("gws_gewasc", sa.Integer),
sa.Column("gws_gewas", sa.Text),
sa.Column("provincie", sa.Text),
sa.Column("gemeente", sa.Text),
sa.Column("regio", sa.Text),
......@@ -226,7 +241,7 @@ def monitor_parallel_loading(process_list, parent_conn, lines_per_dataset):
def load_data(grompy_yaml):
def load_data(grompy_yaml, parcel_info_only):
"""Loads data point to by the YAML config file.
:param grompy_yaml:
......@@ -240,5 +255,6 @@ def load_data(grompy_yaml):
parcel_info = grompy_conf.pop("parcel_info")
load_parcel_info(grompy_yaml, **parcel_info)
process_list, parent_conn, lines_per_dataset = start_parallel_loading(grompy_yaml, grompy_conf["datasets"])
monitor_parallel_loading(process_list, parent_conn, lines_per_dataset)
if not parcel_info_only:
process_list, parent_conn, lines_per_dataset = start_parallel_loading(grompy_yaml, grompy_conf["datasets"])
monitor_parallel_loading(process_list, parent_conn, lines_per_dataset)
......@@ -7,6 +7,7 @@ from pathlib import Path
import sqlalchemy.exc
from sqlalchemy import MetaData, Table, Column, Integer, Date, Float, Text, create_engine
import pyproj
def take_first(iter):
......@@ -126,3 +127,11 @@ def open_DB_connection(grompy_yaml, dsn):
return engine
def get_latlon_from_RD(x, y, _cache={}):
rd_stelsel = "epsg:7415"
rd2wgs84 = _cache["rd2wgs84"]
except KeyError:
rd2wgs84 = _cache["rd2wgs84"] = pyproj.Proj(init=rd_stelsel)
return rd2wgs84(x, y, inverse=True)
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -12,10 +12,10 @@ description-file = ""
requires = [
"geopandas>= 0.8",
"pyyaml>= 5.4",
"pyyaml>= 5.3",
"sqlalchemy>= 1.4",
requires-python=">= 3.6"
grompy = "grompy:cli"
\ No newline at end of file
grompy = "grompy:cli"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment