Skip to content
Snippets Groups Projects
Commit bca41cf6 authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

Again improved analysis

parent 22325d0a
No related branches found
No related tags found
No related merge requests found
......@@ -33,9 +33,13 @@ sub_soortlijst = ['SNL'] # SNL, Bijlage 1 of EcoSysLijst?
snl_gebieden_wel = ['N1900'] # 250m cellen met welk SNL type?
snl_gebieden_niet = [] # 250m cellen met welk SNL type?
# dif_cats = [range(-1000, -1), range(-1, 2), range(2, 1000)]
# Definieer de numerieke range voor de categorieren Afname, Stabiel, Toename:
# [a, b, c, d]
# a <= Afname < b
# b <= Stabiel < c
# c <= Toename < d
dif_cats = [-1000, -1, 2, 1000] # see https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
dif_labs = ['afname', 'stabiel', 'toename']
dif_labs = ['afname', 'stabiel', 'toename'] # must be in INCREASING order
# choose one of ['1994-2001', '2002-2009', '2010-2017'] for periode A and periode B, where stats will be made A-B
periodes = ['1994-2001', '2002-2009', '2010-2017']
......@@ -72,44 +76,31 @@ vogel_trend, vogel_score = clo.calc_clo_1543(s_periode_A=dat_piv.loc[:, idx['vog
vlinder_trend, vlinder_score = clo.calc_clo_1543(s_periode_A=dat_piv.loc[:, idx['vlinder', periode_A]],
s_periode_B=dat_piv.loc[:, idx['vlinder', periode_B]],
bins=dif_cats, labels=dif_labs)
'''
Create dataframe where all trends and scores are in a single column, plus `soortgroep` column etc
'''
# all trends in a series
trend_s = pd.concat([plant_trend, vogel_trend, vlinder_trend], axis=0, ignore_index=True)
trend_s.name = 'clo1543_trend'
# all scores in a series
score_s = pd.concat([plant_score, vogel_score, vlinder_score], axis=0, ignore_index=True)
score_s.name = 'clo1543_score'
'''Create new df containing scores and trends for all soortgroepen. Then attach to hokken.'''
foo_df = pd.concat([plant_trend, plant_score, vogel_trend, vogel_score, vlinder_trend, vlinder_score],
axis=1)
clo_df = pd.merge(left=hokken, right=foo_df, how='left', right_index=True, left_index=True)
# series with species-group names
species_s = pd.Series(['vaatplant']*dat_piv.shape[0] + ['vogel']*dat_piv.shape[0] + ['vlinder']*dat_piv.shape[0],
name='soortgroep')
'''Niet alle hokken zullen een PGO observatie hebben. Vul score kolommen in met `onbekend`.'''
clo_df.filter(regex='*score*').fillna(value='onbekend', axis=1)
# series hok_ids (note repetition)
hok_id_s = pd.concat([plant_score.index.to_series(), vogel_score.index.to_series(), vlinder_score.index.to_series()],
axis=0, ignore_index=True)
hok_id_s.name = 'hok_id'
'''
Bereken totaal areaal per soortgroep voor CLO scores Toename, Stabiel, Afname, Onbekend.
Bereken ook netto toename (Toename-Afname) en score als netto/totaal areal
'''
plant_out = pd.pivot_table(clo_df, index='vaatplant_score', values='areaal_ha', aggfunc='sum')
plant_out.loc[:, 'netto'] = plant_out.apply(lambda row: row.toename - row.afname, axis=1)
plant_out.loc[:, 'score'] = plant_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1)
# concat the series into a dataframe and merge with hokken. Retain only hokken (how=right)
left_df = pd.concat([trend_s, score_s, species_s, hok_id_s], axis=1)
clo_dat = pd.merge(left=left_df, right=hokken.loc[:, ['hok_id', 'areaal_ha', 'twente']], # hokken only!
left_on='hok_id', right_on='hok_id', how='right')
vogel_out = pd.pivot_table(clo_df, index='vogel_score', values='areaal_ha', aggfunc='sum')
vogel_out.loc[:, 'netto'] = vogel_out.apply(lambda row: row.toename - row.afname, axis=1)
vogel_out.loc[:, 'score'] = vogel_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1)
'''
Fill CLO scores with 'onbekend' for cellen in Twente
'''
clo_dat.fillna('onbekend', inplace=True, axis=1)
clo_dat.loc[clo_dat.twente > 0, ['clo1543_trend', 'clo1543_score']] = 'onbekend'
vlinder_out = pd.pivot_table(clo_df, index='vlinder_score', values='areaal_ha', aggfunc='sum')
vlinder_out.loc[:, 'netto'] = vlinder_out.apply(lambda row: row.toename - row.afname, axis=1)
vlinder_out.loc[:, 'score'] = vlinder_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1)
'''
Pivot on CLO1543 scores and calculate total area per score for each soortgroep.
'''
piv_out = pd.pivot_table(clo_dat, index='soortgroep', columns='clo1543_score', values='areaal_ha', aggfunc='sum')
piv_out.loc[:, 'netto'] = piv_out.apply(lambda row: row.toename - row.afname, axis=1)
piv_out.loc[:, 'score'] = piv_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1)
piv_out.to_clipboard(sep=';')
'''
Write report
......@@ -118,18 +109,30 @@ timestamp = datetime.datetime.now().strftime("%y%m%d-%H%M")
out_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\CLO1543'
basename = 'clo1543_{0}_{1}'.format(id, timestamp)
pgo_dat_summary = pd.pivot_table(pgo_dat, index='soortgroep', columns=['snl', 'periode'], values='n', aggfunc='count')
cat_lims = [(dif_cats[i-1], dif_cats[i]) for i in range(1, len(dif_cats))]
lo_lims = [l for (l,_) in cat_lims]
up_lims = [u for (_,u) in cat_lims]
# TODO: hier nog een mooie string van maken!
# category_limits = ['{0} t/m {1}'.format(min(x), max(x)) for x in dif_cats]
# categories = ', '.join('{0}: {1}'.format(k, v) for k,v in dict(zip(dif_labs, category_limits)).items())
header = 'Extract from PGO species distribution data as follows:\n' \
'Soortgroepen: {0}\n' \
'Soortlijst: {1}\n' \
'Sub-soortlijst: {2}\n' \
'PGO data restricted to {3} 250m cells where: {4}\n' \
'Trends: {5}\n' \
'Trends berekend als # soorten in {6} minus {7} \n\n'.format(', '.join([soort for soort in soortgroepen]),
', '.join(snl for snl in snl_soortlijst),
'-'.join(sub for sub in sub_soortlijst),
dat_piv.shape[0], spat_query, dif_cats,
clo_scores =
header = '#Model run dated: {0} by {1}\n#\n' \
'#Extract from PGO species distribution data with PGO Query:\n' \
'# Soortgroepen: {2}\n' \
'# SNL Soortlijst: {3}\n' \
'# SNL Sub-soortlijst: {4}\n' \
'# ==> {5} observations in {6} different 250m hokken (see also PGO DATA SUMMARY)\n' \
'#Selection from 250m hokken grid with Beheertypen Query:\n' \
'# {7}\n' \
'# ==> {8} 250m hokken with total {9} hectare.\n' \
'#Trend refers to species difference between {10}-{11}}\n' \
'#Scores are as follows:\n' \
''
.format(timestamp, os.environ.get('USERNAME'), ', '.join([soort for soort in soortgroepen]),
', '.join(snl for snl in snl_soortlijst), '-'.join(sub for sub in sub_soortlijst),
pgo_dat.shape[0], dat_piv.shape[0], spat_query, hokken.shape[0], hokken.areaal_ha.sum(),
periode_A, periode_A,
dif_cats,
periode_A, periode_B)
footer = '\nMade with Python 3.5 using pandas, geopandas, by Hans Roelofsen, WEnR team B&B, dated {0}'.format(timestamp)
......@@ -138,7 +141,12 @@ with open(os.path.join(out_dir, basename + '.txt'), 'w') as f:
f.write('\n###PGO DATA SUMMARY###\n')
f.write(pgo_dat_summary.to_csv(sep='\t', line_terminator='\r'))
f.write('\n##### HECTARE-TOENAME-STABIEL-AFNAME #####\n')
f.write(piv_out.to_csv(sep='\t', line_terminator='\r'))
f.write('\n\n')
f.write(plant_out.to_csv(sep='\t', line_terminator='\r'))
f.write('\n\n')
f.write(vogel_out.to_csv(sep='\t', line_terminator='\r'))
f.write('\n\n')
f.write(vlinder_out.to_csv(sep='\t', line_terminator='\r'))
f.write(footer)
......
......@@ -233,9 +233,9 @@ def calc_clo_1543(s_periode_A, s_periode_B, bins, labels):
try:
sp_diff = s_periode_A.sub(s_periode_B)
sp_diff.name = 'clo1543_trend_{0}'.format(s_periode_A.name)
sp_diff.name = 'rend_{0}'.format(s_periode_A.name)
sp_diff_score =sp_diff.apply(classifier, bins=bins, labels=labels)
sp_diff_score.name = 'clo1543_score_{0}'.format(s_periode_A.name)
sp_diff_score.name = 'score_{0}'.format(s_periode_A.name)
return sp_diff, sp_diff_score
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment