added function to negate SNL types

75f0ac18 · haro-nl · 5abaaa33 · 75f0ac18 · 75f0ac18
Commit 75f0ac18 authored 5 years ago by haro-nl
--- a/a_prep_dat/compile_250m_hok_db.py
+++ b/a_prep_dat/compile_250m_hok_db.py
@@ -39,7 +39,7 @@ sources = ['N1900.pkl', 'N1800.pkl', 'N1706.pkl', 'N1705.pkl', 'N1704.pkl', 'N17
           'N1401.pkl', 'N1302.pkl', 'N1301.pkl', 'N1206.pkl', 'N1205.pkl', 'N1204.pkl', 'N1203.pkl', 'N1202.pkl',
           'N1201.pkl', 'N1101.pkl', 'N1002.pkl', 'N1001.pkl', 'N0901.pkl', 'N0804.pkl', 'N0803.pkl', 'N0802.pkl',
           'N0801.pkl', 'N0702.pkl', 'N0701.pkl', 'N0606.pkl', 'N0605.pkl', 'N0604.pkl', 'N0603.pkl', 'N0602.pkl',
-           'N0601.pkl', 'N0502.pkl', 'N0501.pkl', 'N0404.pkl', 'N0403.pkl', 'N0402.pkl', 'N0401.pkl', 'N0301.pk',
+           'N0601.pkl', 'N0502.pkl', 'N0501.pkl', 'N0404.pkl', 'N0403.pkl', 'N0402.pkl', 'N0401.pkl', 'N0301.pkl',
           'N0201.pkl', 'OpenDuin.pkl', 'Moeras.pkl', 'Heide.pkl',
           'HalfnatuurlijkGrasland.pkl', 'Bos.pkl']
 pkl_dir = r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken'
@@ -48,16 +48,16 @@ for source in sources:
    src_name = os.path.splitext(source)[0]
    try:
-        with open(os.path.join(pkl_dir, source), 'rb') as handle:
+        source_df = pd.read_pickle(os.path.join(pkl_dir, source))
-            source_df = pickle.load(handle)
+        source_df.set_index('hok_id', inplace=True, verify_integrity=True)
-            source_df.set_index('hok_id', inplace=True, verify_integrity=True)
+        source_df.rename(columns={'area_m2': src_name}, inplace=True)
-            source_df.rename(columns={'area_m2': src_name}, inplace=True)
        db = pd.merge(db, source_df, left_on='hok_id', right_index=True, how='left')
        print('DB shape now: {0} rows, {1} cols'.format(db.shape[0], db.shape[1]))
    except FileNotFoundError:
+        print('Warning, file not found: {0}'.format(source))
        continue

--- a/clo_analysis.py
+++ b/clo_analysis.py
@@ -20,30 +20,46 @@ from z_utils import clo
 '''
 Model run identifier
 '''
-id = 'BdK01'
+id = 'MS01b'
 '''
 Query PGO data for a specific SNL soortenlijst and specific spatial filter  
 '''
 soortgroepen = ['vogel', 'vlinder', 'vaatplant']
-snl_soortlijst = ['HalfnatuurlijkGrasland']  # soortenlijst behorende bij een SNL type
+snl_soortlijst = ['N1202', 'N1301']  # soortenlijst behorende bij een SNL type
-sub_soortlijst = ['EcoSysLijst']  # SNL, Bijlage 1 of EcoSysLijst?
+sub_soortlijst = ['SNL']  # SNL, Bijlage 1 of EcoSysLijst?
-snl_gebieden = ['HalfnatuurlijkGrasland']  # 250m cellen met welk SNL type?
+snl_gebieden_wel = ['N1900']  # 250m cellen met welk SNL type?
+snl_gebieden_niet = ['N0201', 'N0301', 'N0401', 'N0402', 'N0403', 'N0404', 'N0501', 'N0502', 'N0601', 'N0602', 'N0603',
+                     'N0604', 'N0605', 'N0606', 'N0701', 'N0702', 'N0801', 'N0802', 'N0803', 'N0804', 'N0901', 'N1001',
+                     'N1002', 'N1101', 'N1201', 'N1202', 'N1203', 'N1204', 'N1205', 'N1206', 'N1301', 'N1302', 'N1401',
+                     'N1402', 'N1403', 'N1501', 'N1502', 'N1601', 'N1602', 'N1603', 'N1604', 'N1701', 'N1702', 'N1703',
+                     'N1704', 'N1705', 'N1706']  # 250m cellen met welk SNL type?
 dif_cats = [range(-1000, -1), range(-1, 2), range(2, 1000)]
 dif_labs = ['afname', 'stabiel', 'toename']
+# choose one of ['1994-2001', '2002-2009', '2010-2017'] for periode A and periode B, where stats will be made A-B
+periodes = ['1994-2001', '2002-2009', '2010-2017']
+periode_A = '2010-2017'
+periode_B = '1994-2001'
 '''
 SPATIAL SELECTION 
 '''
-# read full 250 m grid of the Netherlands
+spatial_query_p1 = ' | '.join(["{0} > 0".format(x) for x in snl_gebieden_wel])  # cells must have > 0 m2 of each snl
+spatial_query_p2 = ' & '.join(["{0} == 0".format(x) for x in snl_gebieden_niet])  # cells must have 0 m2 of each snl
+spatial_query = ' & '.join(['({0})'.format(x) for x in [spatial_query_p1, spatial_query_p2] if len(x) > 0])
 nl250 = pd.read_pickle(os.path.join(r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken', 'nl250mgrid.pkl'))
-spatial_query = ' | '.join(["{0} > 0".format(x) for x in snl_gebieden])  # cells must have > 0 m2 of each snl type
+hokken = nl250.query(spatial_query, engine='python')  # reduce to hokken complying to the spatial query
-hokken = nl250.query(spatial_query)  # reduce to hokken complying to the spatial query
+hokken['areaal'] = hokken.loc[:, snl_gebieden_wel].sum(axis=1)  # calculate areaal per hok of selected SNL types
-hokken['areaal'] = hokken.loc[:, snl_gebieden].sum(axis=1)  # calculate areaal per hok of selecte        SNL types
 hokken.set_index('hok_id')
-print('Querying {0} from nl250 hokken geeft {1} 250m hokken.'.format(spatial_query, hokken.shape[0]))
+if hokken.empty:
+    raise Exception('No hokken found complying to query.')
+else:
+    print('Querying {0} from nl250 hokken geeft {1} 250m hokken.'.format(spatial_query, hokken.shape[0]))
 '''
 SELECTION FROM PGO DATA
@@ -66,24 +82,27 @@ Pivot to show plant, vogel, vlinder count per hok
 pgo_piv = pd.pivot_table(pgo_dat, index='hok_id', columns=['soortgroep', 'periode'], values='n', aggfunc='sum')
 '''
-Calc progress between p3-p2 for plant, vogel, vlinder
+Calc progress between periode A - periode B for plant, vogel, vlinder. 
 '''
 for soort in soortgroepen:
-    trend_p3_p2 = pgo_piv[(soort, '2010-2017')].sub(pgo_piv[(soort, '2002-2009')], axis=0)
+    trend_pA_pB = pgo_piv[(soort, periode_A)].sub(pgo_piv[(soort, periode_B)], axis=0)
-    score_p3_p2 = trend_p3_p2.apply(clo.classifier, args=(dif_cats, dif_labs))
+    score_pA_pB = trend_pA_pB.apply(clo.classifier, args=(dif_cats, dif_labs))
-    pgo_piv[(soort, 'trend')] = trend_p3_p2
+    pgo_piv[(soort, 'trend')] = trend_pA_pB  # difference in species count
-    pgo_piv[(soort, 'score')] = score_p3_p2
+    pgo_piv[(soort, 'score')] = score_pA_pB  # score: afname, stabiel, toename
+    print(soort)
    print(pgo_piv[(soort, )].head(10))
-    print(pgo_piv[(soort, )].tail(10))
 print('Found observations for species list {0}-{1} in {2} 250m hokken. \nAlso found {3} 250m hokken complying '
      'to spatial filter {4}. \nNarrowing down PGO observations to spatial filter XX 250 m. hokken'.format(
      snl_soortlijst, sub_soortlijst, pgo_piv.shape[0], hokken.shape[0], spatial_query, ))
 '''
-Summarize areaal by trends
+Calculate areaal and stats on species per cell
 '''
-output = pd.DataFrame(index=dif_labs)
+idx = pd.IndexSlice
+output_stats = pd.DataFrame(index=['mean', 'std'], columns=pd.MultiIndex.from_product([soortgroepen, periodes]))
+output_areaal = pd.DataFrame(index=dif_labs)  # TODO: also use nested columns here?
 for soort in soortgroepen:
    # Narrow down pgo_piv to just `soort` and any rows with any NA for trend or score.
    # Merge with data from 250m hokken to couple `areaal` to hok ID
@@ -94,11 +113,18 @@ for soort in soortgroepen:
        print('Empty result for {0}'.format(soort))
        soort_summ = pd.DataFrame(0, index=dif_labs, columns=['areaal', 'areaal_{0}_ha'.format(soort)])
    else:
+        # Pivot to calculate sum areaal for afname, stabiel, toename
-        # Pivot to calculate areaal
        soort_summ = pd.pivot_table(soort_sel, index='score', values='areaal', aggfunc='sum')
        soort_summ['areaal_ha_{0}'.format(soort)] = soort_summ.apply(lambda row: np.divide(row.areaal, 10000), axis=1)
-    output = output.join(soort_summ.drop('areaal', axis=1), how='right')
+        # calculate mean and st dev number of species per cell #TODO: calculate nr of cells with obs
+        mean = soort_sel.loc[:, periodes].mean(axis=0)
+        std = soort_sel.loc[:, periodes].std(axis=0)
+        count = soort_sel.shape[0]
+        output_stats.loc[idx['mean'], idx[soort, :]] = mean.to_list()
+        output_stats.loc[idx['std'], idx[soort, :]] = std.to_list()
+    output_areaal = output_areaal.join(soort_summ.drop('areaal', axis=1), how='right')  # TODO: use .loc method
 '''
 Write report
@@ -112,16 +138,21 @@ header = 'Extract from PGO species distribution data as follows:\n' \
         'Soortgroepen: {0}\n' \
         'Soortlijst: {1}\n' \
         'Sub-soortlijst: {2}\n' \
-         'PGO data restricted to 250m cells with: {3} m2\n' \
+         'PGO data restricted to {3} 250m cells where: {4}\n' \
-         'Trends: {4}\n\n' \
+         'Trends: {5}\n' \
-         'Periode: '.format(', '.join([soort for soort in soortgroepen]),
+         'Trends berekend als # soorten in {6} minus {7} \n\n'.format(', '.join([soort for soort in soortgroepen]),
-                                      ', '.join(snl for snl in snl_soortlijst),
+                                                                      ', '.join(snl for snl in snl_soortlijst),
-                                      '-'.join(sub for sub in sub_soortlijst), spatial_query, categories)
+                                                                      '-'.join(sub for sub in sub_soortlijst),
+                                                                      pgo_piv.shape[0], spatial_query, categories,
+                                                                      periode_A, periode_B)
 footer = '\nMade with Python 3.5 using pandas, geopandas, by Hans Roelofsen, WEnR team B&B, dated {0}'.format(timestamp)
 with open(os.path.join(out_dir, basename + '.txt'), 'w') as f:
    f.write(header)
-    f.write(output.to_csv(sep='\t', line_terminator='\r'))
+    f.write('\n##### HECTARE-TOENAME-STABIEL-AFNAME #####\n')
+    f.write(output_areaal.to_csv(sep='\t', line_terminator='\r'))
+    f.write('\n##### GEMIDDELD + ST.DEV AANTAL SOORTEN IN HOK #####\n')
+    f.write(output_stats.to_csv(sep='\t', line_terminator='\r'))
    f.write(footer)