Commit 7052db49 authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

suggestions Paul G to output files

parent 7736e586
......@@ -22,7 +22,7 @@ doren = dc.Doren(header_src=param_header_src, sp_src=param_sp_src)
doren.initiate(sample=False)
doren.apply_requirements('req1', 'req2', 'req3', 'req4', 'req8', 'req9', 'req10', aoi_src=param_aoi_src,
dem_src=param_dem_src)
doren.add_covar(covar_dir=cv_cntr_dir, covar_src=cv_cntr_src, covar_name='country', raster=False, column='SOVEREIGNT')
doren.add_covar(covar_dir=cv_cntr_dir, covar_src=cv_cntr_src, covar_name='country', raster=False, column='SOV_A3')
doren.add_posch(posch_src_dir=param_posch)
doren.add_covar(covar_dir=cv_soil_dir, covar_src=cv_soil_src, covar_name='soil_type', nominal=True)
doren.add_yearly_covar(covar_dir=cv_precp_dir, covar_src_basename=cv_precp_src, covar_name='five_yearly_precip')
......
......@@ -45,6 +45,7 @@ class Doren:
self.buffer_gdf = None # holder for geodataframe w. buffers
self.sel_species = None # holder for a species
self.buffer_size = None # holder for buffer size
self.sample = False # Dataset is a random sample of all plots, for testing purposes.
# relevant stats
self.status = {'n_plots': 0, 'columns': [], 'n_species': 0, 'covars': []}
self.basename = 'DOREN' # basename for all output
......@@ -92,6 +93,8 @@ class Doren:
# Read EVA Header data
if sample:
self.sample = sample
self.report += '\n\n==SAMPLE MODE, CONTENT RESTRICTED TO 500 RANDOM PLOTS==\n\n'
# Always use first 4 rows (for header and comments) plus 500 random rows
userows = [0, 1, 2, 3] + sorted(random.sample(range(4, 1357330), 500))
......@@ -128,11 +131,11 @@ class Doren:
self.req_found: subset of above, not found
"""
# Read csv file with requested species ('typische soorten')
# Read csv file with requested species and drop empty rows
reqs = pd.read_excel(xls, sheet_name=sheet)
reqs.dropna(subset=[col], inplace=True)
# Simplify suggested species names
# Optionally simplify species names
if simplify_names:
reqs.loc[:, 'species_name_hdr'] = reqs.loc[:, col].astype(str).apply(do.strip_leading_quote)
reqs.loc[:, 'species_name_hdr'] = reqs.species_name_hdr.astype(str).apply(do.simplify_species)
......@@ -608,17 +611,13 @@ class Doren:
self.report += 'Written FORTRAN input to files: {0}\n\n'.format(os.path.join(pg_dir, pg_out_name))
elif what == 'species_single_file':
"""Single file for union self.positive_plots, self.nearby_plots formatted as:
PlotObservationID,totN_mol_ha,soil_type_vals,soil_type_label,country_label,5_yearly_precip,5_yearly_temp,response
87523,0.5481642857142858,11.0,jong,Spain,4.240024717242157,12.729687707377398,0
87524,0.4981642857142857,11.0,jong,Spain,3.376466375059885,11.764382274150906,0
See e-mail Paul Goedhart, 25-06-2020
"""
Single file for all plots with columns: plotID, dist2nearest, neareastID, respons.
Respons means 1/0 for self.positive_plots and self.negative_plots
"""
# check if all required data is present
if self.sel_species is None or 'totN_mol_ha' not in self.eva.columns:
if self.sel_species is None or 'dist2nearest' not in self.eva.columns:
print('Cannot report for PG')
return None
......@@ -629,19 +628,41 @@ class Doren:
os.mkdir(pg_dir)
# Compile dataframe for writing to file
out_cols = ['plot_id', 'totN_mol_ha', 'soil_type_label', 'country_label', 'five_yearly_precip',
'five_yearly_temp', 'dist2nearest', 'nearest_id']
# TODO: verify if all out_cols are present in self.eva
out_cols = ['plot_id', 'dist2nearest', 'nearest_id']
out_df = self.eva.loc[:, out_cols]
out_df.loc[self.positive_plots, 'respons'] = np.uint8(1)
out_df.loc[self.negative_plots, 'respons'] = np.uint8(0)
# Write to file and update report
out_df.astype({'respons': 'uint8'}).to_csv(os.path.join(pg_dir, '{}.csv'.format(pg_out_name)), sep=',',
index=False, header=True)
out_df.round({'dist2nearest': 2}).astype({'respons': np.byte, 'nearest_id': int})\
.to_csv(os.path.join(pg_dir, '{}.csv'.format(pg_out_name)), sep=',', index=False, header=True)
self.report += 'Written single file output for species {0} to file: ' \
'{1}'.format(self.sel_species, os.path.join(pg_dir, pg_out_name))
elif what == 'plot_covars_file':
"""
Single file with covariable information on all plots.
"""
# check if all required data is present
if self.sel_species is None or 'totN_mol_ha' not in self.eva.columns:
print('Cannot report for PG')
return None
# Dedicated directory for the PG output and output filename
pg_out_name = 'Plot_Covars'.format(self.sel_species.replace(' ', '_'))
pg_dir = os.path.join(out_dir, 'pg_input')
if not os.path.isdir(pg_dir):
os.mkdir(pg_dir)
out_cols = ['plot_id', 'totN_mol_ha', 'soil_type_label', 'country_label', 'five_yearly_precip',
'five_yearly_temp']
out_df = self.eva.loc[:, out_cols]
out_df.round({'totN_mol_ha': 2, 'five_yearly_precip': 2, 'five_yearly_temp': 2})\
.to_csv(os.path.join(pg_dir, '{}.csv'.format(pg_out_name)), sep=',', index=False, header=True)
self.report += 'Written plot covar output file: {0}'.format(os.path.join(pg_dir, pg_out_name))
elif what == 'headers_shp':
shp_out_dir = os.path.join(out_dir, 'shp')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment