Commit 52485840 authored by Wit, Allard de's avatar Wit, Allard de
Browse files

grompy.DataAccessProvider now works well.

parent d104602d
from pathlib import Path
import sqlalchemy as sa
import pandas as pd
import yaml
class Container:
pass
class DataAccessProvider:
def __init__(self, dsn=None, area_gt=None, pixcount_gt=None, provincie=None, limit=None, gws_gewasc=None):
def __init__(self, grompy_conf, fieldID=None, area_gt=None, pixcount_gt=None, provincie=None, limit=None, gws_gewasc=None):
grompy_conf = Path(grompy_conf)
if not grompy_conf.exists():
msg = f"Cannot find config file: {grompy_conf}"
raise RuntimeError(msg)
self.grompy_conf = yaml.safe_load(open(grompy_conf))
self.parcel_dsn = self.grompy_conf["parcel_info"]["dsn"]
self.parcel_table = self.grompy_conf["parcel_info"]["table_name"]
self.engine = sa.create_engine(dsn)
# Build connections to dataset tables
self.dataset_connections = {}
for dataset_name, details in self.grompy_conf["datasets"].items():
e = sa.create_engine(details["dsn"])
meta = sa.MetaData(e)
tbl = sa.Table(dataset_name, meta, autoload=True)
self.dataset_connections[dataset_name] = (e, tbl)
# Build connection to parcel info
self.engine = sa.create_engine(self.parcel_dsn)
meta = sa.MetaData(self.engine)
self.tbl_perc_info = sa.Table('perceels_info', meta, autoload=True)
self.s2_observations = sa.Table("s2_observations", meta, autoload=True)
self.tbl_perc_info = sa.Table(self.parcel_table, meta, autoload=True)
if limit is None:
limit = int(1e9)
self.limit = limit
self.perc_stmt = sa.select([self.tbl_perc_info])
if area_gt is not None:
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.area_ha > area_gt)
if gws_gewasc is not None:
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.gws_gewasc == gws_gewasc)
if pixcount_gt is not None:
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.pixcount > pixcount_gt)
if provincie is not None:
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.provincie == provincie)
self.perc_stmt = sa.select([self.tbl_perc_info]).order_by("fieldID")
if fieldID is not None:
if not isinstance(fieldID, (list, tuple, set)):
msg = "FieldID must be provide as a list, set or tuple"
raise RuntimeError(msg)
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.fieldID.in_(fieldID))
else:
if area_gt is not None:
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.area_ha > area_gt)
if gws_gewasc is not None:
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.gws_gewasc == gws_gewasc)
if pixcount_gt is not None:
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.pixcount > pixcount_gt)
if provincie is not None:
self.perc_stmt.append_whereclause(self.tbl_perc_info.c.provincie == provincie)
s = sa.select([sa.func.count()]).select_from(self.perc_stmt)
self.count = s.execute().fetchone()[0]
self.parcel_count = s.execute().fetchone()[0]
@property
def datasets(self):
return list(self.dataset_connections.keys())
def __iter__(self):
......@@ -33,16 +66,19 @@ class DataAccessProvider:
rows = r.fetchmany(100)
while rows:
for row in rows:
s = sa.select([self.s2_observations],
sa.and_(self.s2_observations.c.fieldID==row.fieldID),
order_by={self.s2_observations.c.day})
df = pd.read_sql(s, self.engine)
df = df.drop(columns="fieldID")
df.index = pd.to_datetime(df.day)
yield row, df
rows = r.fetchmany()
c = Container()
for dataset_name, (engine, tbl) in self.dataset_connections.items():
s = sa.select([tbl],
sa.and_(tbl.c.fieldID==row.fieldID),
order_by={tbl.c.day})
df = pd.read_sql(s, engine)
df = df.drop(columns="fieldID")
df.index = pd.to_datetime(df.day)
setattr(c, dataset_name, df)
yield row, c
rows = r.fetchmany()
def __len__(self):
return self.count
return self.parcel_count
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment