Skip to content
Snippets Groups Projects
Commit 75f0ac18 authored by haro-nl's avatar haro-nl
Browse files

added function to negate SNL types

parent 5abaaa33
No related branches found
No related tags found
No related merge requests found
...@@ -39,7 +39,7 @@ sources = ['N1900.pkl', 'N1800.pkl', 'N1706.pkl', 'N1705.pkl', 'N1704.pkl', 'N17 ...@@ -39,7 +39,7 @@ sources = ['N1900.pkl', 'N1800.pkl', 'N1706.pkl', 'N1705.pkl', 'N1704.pkl', 'N17
'N1401.pkl', 'N1302.pkl', 'N1301.pkl', 'N1206.pkl', 'N1205.pkl', 'N1204.pkl', 'N1203.pkl', 'N1202.pkl', 'N1401.pkl', 'N1302.pkl', 'N1301.pkl', 'N1206.pkl', 'N1205.pkl', 'N1204.pkl', 'N1203.pkl', 'N1202.pkl',
'N1201.pkl', 'N1101.pkl', 'N1002.pkl', 'N1001.pkl', 'N0901.pkl', 'N0804.pkl', 'N0803.pkl', 'N0802.pkl', 'N1201.pkl', 'N1101.pkl', 'N1002.pkl', 'N1001.pkl', 'N0901.pkl', 'N0804.pkl', 'N0803.pkl', 'N0802.pkl',
'N0801.pkl', 'N0702.pkl', 'N0701.pkl', 'N0606.pkl', 'N0605.pkl', 'N0604.pkl', 'N0603.pkl', 'N0602.pkl', 'N0801.pkl', 'N0702.pkl', 'N0701.pkl', 'N0606.pkl', 'N0605.pkl', 'N0604.pkl', 'N0603.pkl', 'N0602.pkl',
'N0601.pkl', 'N0502.pkl', 'N0501.pkl', 'N0404.pkl', 'N0403.pkl', 'N0402.pkl', 'N0401.pkl', 'N0301.pk', 'N0601.pkl', 'N0502.pkl', 'N0501.pkl', 'N0404.pkl', 'N0403.pkl', 'N0402.pkl', 'N0401.pkl', 'N0301.pkl',
'N0201.pkl', 'OpenDuin.pkl', 'Moeras.pkl', 'Heide.pkl', 'N0201.pkl', 'OpenDuin.pkl', 'Moeras.pkl', 'Heide.pkl',
'HalfnatuurlijkGrasland.pkl', 'Bos.pkl'] 'HalfnatuurlijkGrasland.pkl', 'Bos.pkl']
pkl_dir = r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken' pkl_dir = r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken'
...@@ -48,16 +48,16 @@ for source in sources: ...@@ -48,16 +48,16 @@ for source in sources:
src_name = os.path.splitext(source)[0] src_name = os.path.splitext(source)[0]
try: try:
with open(os.path.join(pkl_dir, source), 'rb') as handle: source_df = pd.read_pickle(os.path.join(pkl_dir, source))
source_df = pickle.load(handle) source_df.set_index('hok_id', inplace=True, verify_integrity=True)
source_df.set_index('hok_id', inplace=True, verify_integrity=True) source_df.rename(columns={'area_m2': src_name}, inplace=True)
source_df.rename(columns={'area_m2': src_name}, inplace=True)
db = pd.merge(db, source_df, left_on='hok_id', right_index=True, how='left') db = pd.merge(db, source_df, left_on='hok_id', right_index=True, how='left')
print('DB shape now: {0} rows, {1} cols'.format(db.shape[0], db.shape[1])) print('DB shape now: {0} rows, {1} cols'.format(db.shape[0], db.shape[1]))
except FileNotFoundError: except FileNotFoundError:
print('Warning, file not found: {0}'.format(source))
continue continue
......
...@@ -20,30 +20,46 @@ from z_utils import clo ...@@ -20,30 +20,46 @@ from z_utils import clo
''' '''
Model run identifier Model run identifier
''' '''
id = 'BdK01' id = 'MS01b'
''' '''
Query PGO data for a specific SNL soortenlijst and specific spatial filter Query PGO data for a specific SNL soortenlijst and specific spatial filter
''' '''
soortgroepen = ['vogel', 'vlinder', 'vaatplant'] soortgroepen = ['vogel', 'vlinder', 'vaatplant']
snl_soortlijst = ['HalfnatuurlijkGrasland'] # soortenlijst behorende bij een SNL type snl_soortlijst = ['N1202', 'N1301'] # soortenlijst behorende bij een SNL type
sub_soortlijst = ['EcoSysLijst'] # SNL, Bijlage 1 of EcoSysLijst? sub_soortlijst = ['SNL'] # SNL, Bijlage 1 of EcoSysLijst?
snl_gebieden = ['HalfnatuurlijkGrasland'] # 250m cellen met welk SNL type?
snl_gebieden_wel = ['N1900'] # 250m cellen met welk SNL type?
snl_gebieden_niet = ['N0201', 'N0301', 'N0401', 'N0402', 'N0403', 'N0404', 'N0501', 'N0502', 'N0601', 'N0602', 'N0603',
'N0604', 'N0605', 'N0606', 'N0701', 'N0702', 'N0801', 'N0802', 'N0803', 'N0804', 'N0901', 'N1001',
'N1002', 'N1101', 'N1201', 'N1202', 'N1203', 'N1204', 'N1205', 'N1206', 'N1301', 'N1302', 'N1401',
'N1402', 'N1403', 'N1501', 'N1502', 'N1601', 'N1602', 'N1603', 'N1604', 'N1701', 'N1702', 'N1703',
'N1704', 'N1705', 'N1706'] # 250m cellen met welk SNL type?
dif_cats = [range(-1000, -1), range(-1, 2), range(2, 1000)] dif_cats = [range(-1000, -1), range(-1, 2), range(2, 1000)]
dif_labs = ['afname', 'stabiel', 'toename'] dif_labs = ['afname', 'stabiel', 'toename']
# choose one of ['1994-2001', '2002-2009', '2010-2017'] for periode A and periode B, where stats will be made A-B
periodes = ['1994-2001', '2002-2009', '2010-2017']
periode_A = '2010-2017'
periode_B = '1994-2001'
''' '''
SPATIAL SELECTION SPATIAL SELECTION
''' '''
# read full 250 m grid of the Netherlands spatial_query_p1 = ' | '.join(["{0} > 0".format(x) for x in snl_gebieden_wel]) # cells must have > 0 m2 of each snl
spatial_query_p2 = ' & '.join(["{0} == 0".format(x) for x in snl_gebieden_niet]) # cells must have 0 m2 of each snl
spatial_query = ' & '.join(['({0})'.format(x) for x in [spatial_query_p1, spatial_query_p2] if len(x) > 0])
nl250 = pd.read_pickle(os.path.join(r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken', 'nl250mgrid.pkl')) nl250 = pd.read_pickle(os.path.join(r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken', 'nl250mgrid.pkl'))
spatial_query = ' | '.join(["{0} > 0".format(x) for x in snl_gebieden]) # cells must have > 0 m2 of each snl type hokken = nl250.query(spatial_query, engine='python') # reduce to hokken complying to the spatial query
hokken = nl250.query(spatial_query) # reduce to hokken complying to the spatial query hokken['areaal'] = hokken.loc[:, snl_gebieden_wel].sum(axis=1) # calculate areaal per hok of selected SNL types
hokken['areaal'] = hokken.loc[:, snl_gebieden].sum(axis=1) # calculate areaal per hok of selecte SNL types
hokken.set_index('hok_id') hokken.set_index('hok_id')
print('Querying {0} from nl250 hokken geeft {1} 250m hokken.'.format(spatial_query, hokken.shape[0])) if hokken.empty:
raise Exception('No hokken found complying to query.')
else:
print('Querying {0} from nl250 hokken geeft {1} 250m hokken.'.format(spatial_query, hokken.shape[0]))
''' '''
SELECTION FROM PGO DATA SELECTION FROM PGO DATA
...@@ -66,24 +82,27 @@ Pivot to show plant, vogel, vlinder count per hok ...@@ -66,24 +82,27 @@ Pivot to show plant, vogel, vlinder count per hok
pgo_piv = pd.pivot_table(pgo_dat, index='hok_id', columns=['soortgroep', 'periode'], values='n', aggfunc='sum') pgo_piv = pd.pivot_table(pgo_dat, index='hok_id', columns=['soortgroep', 'periode'], values='n', aggfunc='sum')
''' '''
Calc progress between p3-p2 for plant, vogel, vlinder Calc progress between periode A - periode B for plant, vogel, vlinder.
''' '''
for soort in soortgroepen: for soort in soortgroepen:
trend_p3_p2 = pgo_piv[(soort, '2010-2017')].sub(pgo_piv[(soort, '2002-2009')], axis=0) trend_pA_pB = pgo_piv[(soort, periode_A)].sub(pgo_piv[(soort, periode_B)], axis=0)
score_p3_p2 = trend_p3_p2.apply(clo.classifier, args=(dif_cats, dif_labs)) score_pA_pB = trend_pA_pB.apply(clo.classifier, args=(dif_cats, dif_labs))
pgo_piv[(soort, 'trend')] = trend_p3_p2 pgo_piv[(soort, 'trend')] = trend_pA_pB # difference in species count
pgo_piv[(soort, 'score')] = score_p3_p2 pgo_piv[(soort, 'score')] = score_pA_pB # score: afname, stabiel, toename
print(soort)
print(pgo_piv[(soort, )].head(10)) print(pgo_piv[(soort, )].head(10))
print(pgo_piv[(soort, )].tail(10))
print('Found observations for species list {0}-{1} in {2} 250m hokken. \nAlso found {3} 250m hokken complying ' print('Found observations for species list {0}-{1} in {2} 250m hokken. \nAlso found {3} 250m hokken complying '
'to spatial filter {4}. \nNarrowing down PGO observations to spatial filter XX 250 m. hokken'.format( 'to spatial filter {4}. \nNarrowing down PGO observations to spatial filter XX 250 m. hokken'.format(
snl_soortlijst, sub_soortlijst, pgo_piv.shape[0], hokken.shape[0], spatial_query, )) snl_soortlijst, sub_soortlijst, pgo_piv.shape[0], hokken.shape[0], spatial_query, ))
''' '''
Summarize areaal by trends Calculate areaal and stats on species per cell
''' '''
output = pd.DataFrame(index=dif_labs) idx = pd.IndexSlice
output_stats = pd.DataFrame(index=['mean', 'std'], columns=pd.MultiIndex.from_product([soortgroepen, periodes]))
output_areaal = pd.DataFrame(index=dif_labs) # TODO: also use nested columns here?
for soort in soortgroepen: for soort in soortgroepen:
# Narrow down pgo_piv to just `soort` and any rows with any NA for trend or score. # Narrow down pgo_piv to just `soort` and any rows with any NA for trend or score.
# Merge with data from 250m hokken to couple `areaal` to hok ID # Merge with data from 250m hokken to couple `areaal` to hok ID
...@@ -94,11 +113,18 @@ for soort in soortgroepen: ...@@ -94,11 +113,18 @@ for soort in soortgroepen:
print('Empty result for {0}'.format(soort)) print('Empty result for {0}'.format(soort))
soort_summ = pd.DataFrame(0, index=dif_labs, columns=['areaal', 'areaal_{0}_ha'.format(soort)]) soort_summ = pd.DataFrame(0, index=dif_labs, columns=['areaal', 'areaal_{0}_ha'.format(soort)])
else: else:
# Pivot to calculate sum areaal for afname, stabiel, toename
# Pivot to calculate areaal
soort_summ = pd.pivot_table(soort_sel, index='score', values='areaal', aggfunc='sum') soort_summ = pd.pivot_table(soort_sel, index='score', values='areaal', aggfunc='sum')
soort_summ['areaal_ha_{0}'.format(soort)] = soort_summ.apply(lambda row: np.divide(row.areaal, 10000), axis=1) soort_summ['areaal_ha_{0}'.format(soort)] = soort_summ.apply(lambda row: np.divide(row.areaal, 10000), axis=1)
output = output.join(soort_summ.drop('areaal', axis=1), how='right')
# calculate mean and st dev number of species per cell #TODO: calculate nr of cells with obs
mean = soort_sel.loc[:, periodes].mean(axis=0)
std = soort_sel.loc[:, periodes].std(axis=0)
count = soort_sel.shape[0]
output_stats.loc[idx['mean'], idx[soort, :]] = mean.to_list()
output_stats.loc[idx['std'], idx[soort, :]] = std.to_list()
output_areaal = output_areaal.join(soort_summ.drop('areaal', axis=1), how='right') # TODO: use .loc method
''' '''
Write report Write report
...@@ -112,16 +138,21 @@ header = 'Extract from PGO species distribution data as follows:\n' \ ...@@ -112,16 +138,21 @@ header = 'Extract from PGO species distribution data as follows:\n' \
'Soortgroepen: {0}\n' \ 'Soortgroepen: {0}\n' \
'Soortlijst: {1}\n' \ 'Soortlijst: {1}\n' \
'Sub-soortlijst: {2}\n' \ 'Sub-soortlijst: {2}\n' \
'PGO data restricted to 250m cells with: {3} m2\n' \ 'PGO data restricted to {3} 250m cells where: {4}\n' \
'Trends: {4}\n\n' \ 'Trends: {5}\n' \
'Periode: '.format(', '.join([soort for soort in soortgroepen]), 'Trends berekend als # soorten in {6} minus {7} \n\n'.format(', '.join([soort for soort in soortgroepen]),
', '.join(snl for snl in snl_soortlijst), ', '.join(snl for snl in snl_soortlijst),
'-'.join(sub for sub in sub_soortlijst), spatial_query, categories) '-'.join(sub for sub in sub_soortlijst),
pgo_piv.shape[0], spatial_query, categories,
periode_A, periode_B)
footer = '\nMade with Python 3.5 using pandas, geopandas, by Hans Roelofsen, WEnR team B&B, dated {0}'.format(timestamp) footer = '\nMade with Python 3.5 using pandas, geopandas, by Hans Roelofsen, WEnR team B&B, dated {0}'.format(timestamp)
with open(os.path.join(out_dir, basename + '.txt'), 'w') as f: with open(os.path.join(out_dir, basename + '.txt'), 'w') as f:
f.write(header) f.write(header)
f.write(output.to_csv(sep='\t', line_terminator='\r')) f.write('\n##### HECTARE-TOENAME-STABIEL-AFNAME #####\n')
f.write(output_areaal.to_csv(sep='\t', line_terminator='\r'))
f.write('\n##### GEMIDDELD + ST.DEV AANTAL SOORTEN IN HOK #####\n')
f.write(output_stats.to_csv(sep='\t', line_terminator='\r'))
f.write(footer) f.write(footer)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment