From b98703db8797e9024dfa193cc76ff07b4e4e4784 Mon Sep 17 00:00:00 2001 From: Roelofsen <hans.roelofsen@wur.nl> Date: Mon, 5 Oct 2020 10:48:52 +0200 Subject: [PATCH] testing funtion to doren main class --- doren_batch.sh | 2 +- run_species.py | 1 + utils/doren_classes.py | 22 +++++++++++++--------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/doren_batch.sh b/doren_batch.sh index 03985e1..3f7e599 100644 --- a/doren_batch.sh +++ b/doren_batch.sh @@ -20,4 +20,4 @@ echo $SLURM_ARRAY_TASK_ID # Run cd /home/WUR/roelo008/projs/doren_2019 -python run_species.py $SLURM_ARRAY_TASK_ID 50 doren_20201001.pkl \ No newline at end of file +python run_species.py $SLURM_ARRAY_TASK_ID 50 doren_20201005.pkl \ No newline at end of file diff --git a/run_species.py b/run_species.py index c8e7bc9..f660213 100644 --- a/run_species.py +++ b/run_species.py @@ -20,6 +20,7 @@ n_batches = int(args.n_batches) # Recover the Pickled doren object pkl_src = os.path.join('./pkl_src/', args.pkl) +pkl_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\DOREN\b_compiled_data\a_pkl\doren_20201001.pkl' with open(pkl_src, 'rb') as handle: doren = pickle.load(handle) diff --git a/utils/doren_classes.py b/utils/doren_classes.py index f4c50d5..6667e5d 100644 --- a/utils/doren_classes.py +++ b/utils/doren_classes.py @@ -310,7 +310,7 @@ class Doren: self.eva['totN_kg_ha'] = self.eva.loc[:, 'totN_mg_m2'].divide(100) self.eva['totN_mol_ha'] = self.eva.loc[:, 'totN_kg_ha'].divide(14) - self.update_status() + self.update_status(covar=['totN_mg_m2', 'totN_kg_ha', 'totN_mol_ha']) self.report += 'Added NDep from POSCH: {0} rows remaining.\n'.format(self.eva.shape[0]) def add_covar(self, covar_dir, covar_src, covar_name, nominal=False, keep_all=False, raster=True, **kwargs): @@ -716,7 +716,11 @@ class Doren: self.species = set(self.spec.species_name_hdr) self.status.update(n_plots=self.eva.shape[0], columns=self.eva.columns.tolist(), n_species=len(self.species)) if covar: - self.status['covars'] += covar + if not isinstance(covar, list): + covar_lst = [covar] + else: + covar_lst = covar + self.status['covars'] += covar_lst def reset_species_sel(self): # reset self.positive_plots and self.negative_plots @@ -742,26 +746,26 @@ class Doren: self.report += 'Testing dataset integrity\n' # Check for NAs - for check_col in self.status['covars'] + ['plot_id', 'country', 'year', 'veg_type', 'plot_coordinates_wgs84']: + for check_col in self.status['covars'] + ['plot_id', 'year', 'veg_type', 'plot_coordinates_wgs84']: nas = self.eva.loc[:, check_col].isna() if any(nas): - msg = '{0} NA values found for {1} (index: {2})\n'.format(len(nas), check_col, self.eva.loc[nas].index) + msg = '{0} -- {1} NA values found (index: {2})\n'.format(check_col, sum(nas), self.eva.loc[nas].index) else: msg = '{0} -- no missing data\n'.format(check_col) self.report += msg - # Check for invalid vals earliest_yr, latest_yr = self.eva.year.min(), self.eva.year.max() self.report += 'Plot years are between {0}-{1}\n'.format(earliest_yr, latest_yr) - for check_col in [x for x in self.status['covars'] if not x.endswith('precip')]: + for check_col in [x for x in self.status['covars'] if not (x.endswith('precip') or + self.eva.loc[:, x].dtype.name == 'object')]: sub_zeros = self.eva.loc[:, check_col] < 0 if any(sub_zeros): - msg = '{0} NA values found for {1} (index: {2})\n'.format(len(sub_zeros), check_col, - self.eva.loc[sub_zeros].index) + msg = '{0} values below 0 found for {1} (index: {2})\n'.format(sum(sub_zeros), check_col, + self.eva.loc[sub_zeros].index) else: - msg = '{0} -- all values > 0\n' + msg = '{0} -- all values > 0\n'.format(check_col) self.report += msg if self.eva.index.difference(set(self.spec.plot_obs_id)).empty: -- GitLab