diff --git a/b_analysis/clo_1543_analysis.py b/b_analysis/clo_1543_analysis.py index a6ac4124778a6387482bf8ee20968df6aefd2b75..1165112003d4e8f851eee75c35593c84321ea58d 100644 --- a/b_analysis/clo_1543_analysis.py +++ b/b_analysis/clo_1543_analysis.py @@ -33,9 +33,13 @@ sub_soortlijst = ['SNL'] # SNL, Bijlage 1 of EcoSysLijst? snl_gebieden_wel = ['N1900'] # 250m cellen met welk SNL type? snl_gebieden_niet = [] # 250m cellen met welk SNL type? -# dif_cats = [range(-1000, -1), range(-1, 2), range(2, 1000)] +# Definieer de numerieke range voor de categorieren Afname, Stabiel, Toename: +# [a, b, c, d] +# a <= Afname < b +# b <= Stabiel < c +# c <= Toename < d dif_cats = [-1000, -1, 2, 1000] # see https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html -dif_labs = ['afname', 'stabiel', 'toename'] +dif_labs = ['afname', 'stabiel', 'toename'] # must be in INCREASING order # choose one of ['1994-2001', '2002-2009', '2010-2017'] for periode A and periode B, where stats will be made A-B periodes = ['1994-2001', '2002-2009', '2010-2017'] @@ -72,44 +76,31 @@ vogel_trend, vogel_score = clo.calc_clo_1543(s_periode_A=dat_piv.loc[:, idx['vog vlinder_trend, vlinder_score = clo.calc_clo_1543(s_periode_A=dat_piv.loc[:, idx['vlinder', periode_A]], s_periode_B=dat_piv.loc[:, idx['vlinder', periode_B]], bins=dif_cats, labels=dif_labs) -''' -Create dataframe where all trends and scores are in a single column, plus `soortgroep` column etc -''' -# all trends in a series -trend_s = pd.concat([plant_trend, vogel_trend, vlinder_trend], axis=0, ignore_index=True) -trend_s.name = 'clo1543_trend' -# all scores in a series -score_s = pd.concat([plant_score, vogel_score, vlinder_score], axis=0, ignore_index=True) -score_s.name = 'clo1543_score' +'''Create new df containing scores and trends for all soortgroepen. Then attach to hokken.''' +foo_df = pd.concat([plant_trend, plant_score, vogel_trend, vogel_score, vlinder_trend, vlinder_score], + axis=1) +clo_df = pd.merge(left=hokken, right=foo_df, how='left', right_index=True, left_index=True) -# series with species-group names -species_s = pd.Series(['vaatplant']*dat_piv.shape[0] + ['vogel']*dat_piv.shape[0] + ['vlinder']*dat_piv.shape[0], - name='soortgroep') +'''Niet alle hokken zullen een PGO observatie hebben. Vul score kolommen in met `onbekend`.''' +clo_df.filter(regex='*score*').fillna(value='onbekend', axis=1) -# series hok_ids (note repetition) -hok_id_s = pd.concat([plant_score.index.to_series(), vogel_score.index.to_series(), vlinder_score.index.to_series()], - axis=0, ignore_index=True) -hok_id_s.name = 'hok_id' +''' +Bereken totaal areaal per soortgroep voor CLO scores Toename, Stabiel, Afname, Onbekend. +Bereken ook netto toename (Toename-Afname) en score als netto/totaal areal +''' +plant_out = pd.pivot_table(clo_df, index='vaatplant_score', values='areaal_ha', aggfunc='sum') +plant_out.loc[:, 'netto'] = plant_out.apply(lambda row: row.toename - row.afname, axis=1) +plant_out.loc[:, 'score'] = plant_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1) -# concat the series into a dataframe and merge with hokken. Retain only hokken (how=right) -left_df = pd.concat([trend_s, score_s, species_s, hok_id_s], axis=1) -clo_dat = pd.merge(left=left_df, right=hokken.loc[:, ['hok_id', 'areaal_ha', 'twente']], # hokken only! - left_on='hok_id', right_on='hok_id', how='right') +vogel_out = pd.pivot_table(clo_df, index='vogel_score', values='areaal_ha', aggfunc='sum') +vogel_out.loc[:, 'netto'] = vogel_out.apply(lambda row: row.toename - row.afname, axis=1) +vogel_out.loc[:, 'score'] = vogel_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1) -''' -Fill CLO scores with 'onbekend' for cellen in Twente -''' -clo_dat.fillna('onbekend', inplace=True, axis=1) -clo_dat.loc[clo_dat.twente > 0, ['clo1543_trend', 'clo1543_score']] = 'onbekend' +vlinder_out = pd.pivot_table(clo_df, index='vlinder_score', values='areaal_ha', aggfunc='sum') +vlinder_out.loc[:, 'netto'] = vlinder_out.apply(lambda row: row.toename - row.afname, axis=1) +vlinder_out.loc[:, 'score'] = vlinder_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1) -''' -Pivot on CLO1543 scores and calculate total area per score for each soortgroep. -''' -piv_out = pd.pivot_table(clo_dat, index='soortgroep', columns='clo1543_score', values='areaal_ha', aggfunc='sum') -piv_out.loc[:, 'netto'] = piv_out.apply(lambda row: row.toename - row.afname, axis=1) -piv_out.loc[:, 'score'] = piv_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1) -piv_out.to_clipboard(sep=';') ''' Write report @@ -118,18 +109,30 @@ timestamp = datetime.datetime.now().strftime("%y%m%d-%H%M") out_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\CLO1543' basename = 'clo1543_{0}_{1}'.format(id, timestamp) pgo_dat_summary = pd.pivot_table(pgo_dat, index='soortgroep', columns=['snl', 'periode'], values='n', aggfunc='count') +cat_lims = [(dif_cats[i-1], dif_cats[i]) for i in range(1, len(dif_cats))] +lo_lims = [l for (l,_) in cat_lims] +up_lims = [u for (_,u) in cat_lims] +# TODO: hier nog een mooie string van maken! # category_limits = ['{0} t/m {1}'.format(min(x), max(x)) for x in dif_cats] # categories = ', '.join('{0}: {1}'.format(k, v) for k,v in dict(zip(dif_labs, category_limits)).items()) -header = 'Extract from PGO species distribution data as follows:\n' \ - 'Soortgroepen: {0}\n' \ - 'Soortlijst: {1}\n' \ - 'Sub-soortlijst: {2}\n' \ - 'PGO data restricted to {3} 250m cells where: {4}\n' \ - 'Trends: {5}\n' \ - 'Trends berekend als # soorten in {6} minus {7} \n\n'.format(', '.join([soort for soort in soortgroepen]), - ', '.join(snl for snl in snl_soortlijst), - '-'.join(sub for sub in sub_soortlijst), - dat_piv.shape[0], spat_query, dif_cats, +clo_scores = +header = '#Model run dated: {0} by {1}\n#\n' \ + '#Extract from PGO species distribution data with PGO Query:\n' \ + '# Soortgroepen: {2}\n' \ + '# SNL Soortlijst: {3}\n' \ + '# SNL Sub-soortlijst: {4}\n' \ + '# ==> {5} observations in {6} different 250m hokken (see also PGO DATA SUMMARY)\n' \ + '#Selection from 250m hokken grid with Beheertypen Query:\n' \ + '# {7}\n' \ + '# ==> {8} 250m hokken with total {9} hectare.\n' \ + '#Trend refers to species difference between {10}-{11}}\n' \ + '#Scores are as follows:\n' \ + '' + .format(timestamp, os.environ.get('USERNAME'), ', '.join([soort for soort in soortgroepen]), + ', '.join(snl for snl in snl_soortlijst), '-'.join(sub for sub in sub_soortlijst), + pgo_dat.shape[0], dat_piv.shape[0], spat_query, hokken.shape[0], hokken.areaal_ha.sum(), + periode_A, periode_A, + dif_cats, periode_A, periode_B) footer = '\nMade with Python 3.5 using pandas, geopandas, by Hans Roelofsen, WEnR team B&B, dated {0}'.format(timestamp) @@ -138,7 +141,12 @@ with open(os.path.join(out_dir, basename + '.txt'), 'w') as f: f.write('\n###PGO DATA SUMMARY###\n') f.write(pgo_dat_summary.to_csv(sep='\t', line_terminator='\r')) f.write('\n##### HECTARE-TOENAME-STABIEL-AFNAME #####\n') - f.write(piv_out.to_csv(sep='\t', line_terminator='\r')) + f.write('\n\n') + f.write(plant_out.to_csv(sep='\t', line_terminator='\r')) + f.write('\n\n') + f.write(vogel_out.to_csv(sep='\t', line_terminator='\r')) + f.write('\n\n') + f.write(vlinder_out.to_csv(sep='\t', line_terminator='\r')) f.write(footer) diff --git a/z_utils/clo.py b/z_utils/clo.py index 19f19806703165d81e0a39765f39acad86d48e21..39f7703fa5f1c3d5a7363527be3e9a15474d0d1a 100644 --- a/z_utils/clo.py +++ b/z_utils/clo.py @@ -233,9 +233,9 @@ def calc_clo_1543(s_periode_A, s_periode_B, bins, labels): try: sp_diff = s_periode_A.sub(s_periode_B) - sp_diff.name = 'clo1543_trend_{0}'.format(s_periode_A.name) + sp_diff.name = 'rend_{0}'.format(s_periode_A.name) sp_diff_score =sp_diff.apply(classifier, bins=bins, labels=labels) - sp_diff_score.name = 'clo1543_score_{0}'.format(s_periode_A.name) + sp_diff_score.name = 'score_{0}'.format(s_periode_A.name) return sp_diff, sp_diff_score