first ideas, messy

a74a4089 · haro-nl · 5f931fca · a74a4089 · a74a4089 · a74a4089
Commit a74a4089 authored 5 years ago by haro-nl
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/README.md
+++ b/README.md
 # wenr_clo1543

-code for update CLO indicator 1543
\ No newline at end of file
+code for update CLO indicator 1543: https://www.clo.nl/indicatoren/nl1543-ontwikkeling-soorten-natuurgebied-en-agrarisch-gebied
+. Gebasseerd op de PGO data: \\wur\dfs-root\PROJECTS\hotspots_pgos\a_Source_Data\ 
+
+#####Periodes
+* p1: 1994-2001
+* p2: 2002-2009
+* p3: 2010-2017
+
+####Trends
+* afname: 2 of nog minder 
+* stabiel: -1, 0 of 1
+* toename 2 of nog meer
+
+#####Voor natuurgebieden:
+* selecteer 250m grid cellen with area beheertype N12.02 en N13.02 > 0.
+* bepaal areaal per cel voor N12.02 en N13.02
+* N13.01 aantal vogels (SNL + Bijlage 1) en aantal vlinders
+* N12.02 aantal planten (SNL + Bijlage 1) 
+* sommeer aantal soorten en bereken trend (afname, stabiel, toename) voor p2-p3
+* update totaal areaal afname-stabiel-toename
+
+#####Voor agrarisch gebied
+* idem, maar selecteer gridcellen zonder N-typen en met Top10NL agrarisch grasland
+* gebasseerd op Top10NL 2.5 m raster, BNL code 100 
+
+ 
\ No newline at end of file
--- a/a_prep_dat/compile_250m_hok_db.py
+++ b/a_prep_dat/compile_250m_hok_db.py
+"""
+Scrip to generate a pandas dataframe where:
+index = 250m hok ID XXXXXX_YYYYYY
+columns = fractional cover of Beheertypen (Nxx.yy) and other land cover categories
+
+n features = 1.456.000 (1.120 cols, 1.300 rows)
+
+"""
+
+import os
+import pickle
+import affine
+import numpy as np
+import pandas as pd
+
+'''
+Create dataframe and basic columns identifying 250m hokken.  
+'''
+ncols = 1120
+nrows = 1300
+shape = (nrows, ncols)
+rows = np.array([[i] * ncols for i in range(0, nrows)]).reshape(np.product(shape))
+cols = np.array([i for i in range(0, ncols)] * nrows).reshape(np.product(shape))
+
+# affine transformation b'teen row-col and RD-New coords for grid
+affine_trans = affine.Affine.from_gdal(0, 250, 0, 625000, 0, -250)  # (x topleft, width, 0, y topleft, 0, height)
+
+db = pd.DataFrame({'row': rows, 'col':cols})
+db['x_topleft'] = db.apply(lambda x: ((x.col, x.row) * affine_trans)[0], axis=1).astype(np.int32)
+db['y_topleft'] = db.apply(lambda x: ((x.col, x.row) * affine_trans)[1], axis=1).astype(np.int32)
+db['hok_id'] = db.apply(lambda x: '{0}_{1}'.format(x.x_topleft, x.y_topleft), axis=1)
+
+'''
+Add pickled ASC rasters showing fractual cover of LU class X as columns
+Never change nrow(db), padd missing cells with 0
+'''
+sources = ['N1900.pkl', 'N1800.pkl', 'N1706.pkl', 'N1705.pkl', 'N1704.pkl', 'N1703.pkl', 'N1702.pkl', 'N1701.pkl', 'N1604.pkl', 'N1603.pkl', 'N1602.pkl', 'N1601.pkl', 'N1502.pkl', 'N1501.pkl', 'N1403.pkl', 'N1402.pkl', 'N1401.pkl', 'N1302.pkl', 'N1301.pkl', 'N1206.pkl', 'N1205.pkl', 'N1204.pkl', 'N1203.pkl', 'N1202.pkl', 'N1201.pkl', 'N1101.pkl', 'N1002.pkl', 'N1001.pkl', 'N0901.pkl', 'N0804.pkl', 'N0803.pkl', 'N0802.pkl', 'N0801.pkl', 'N0702.pkl', 'N0701.pkl', 'N0606.pkl', 'N0605.pkl', 'N0604.pkl', 'N0603.pkl', 'N0602.pkl', 'N0601.pkl', 'N0502.pkl', 'N0501.pkl', 'N0404.pkl', 'N0403.pkl', 'N0402.pkl', 'N0401.pkl', 'N0301.pk', 'N0201.pkl', 'Top10NL_agr_grasland.pkl', 'OpenDuin.pkl', 'Moeras.pkl', 'Heide.pkl', 'HalfnatuurlijkGrasland.pkl', 'Bos.pkl']
+pkl_dir = r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken'
+
+for source in sources:
+    src_name = os.path.splitext(source)[0]
+
+    try:
+        with open(os.path.join(pkl_dir, source), 'rb') as handle:
+            source_df = pickle.load(handle)
+            source_df.set_index('hok_id', inplace=True, verify_integrity=True)
+            source_df.rename(columns={'area_m2': src_name}, inplace=True)
+
+        db = pd.merge(db, source_df, left_on='hok_id', right_index=True, how='left')
+
+        print('DB shape now: {0} rows, {1} cols'.format(db.shape[0], db.shape[1]))
+
+    except FileNotFoundError:
+        continue
+
+
+db.fillna(0, inplace=True)
+intcols = [col for col in list(db) if col not in['row', 'col', 'x_topleft', 'y_topleft', 'hok_id']]
+db = db.astype(dict(zip(intcols, ['int32']*len(intcols))))
+print(list(db))
+print(db.dtypes)
+print(db.head())
+
+with open(os.path.join(pkl_dir, 'nl250mgrid.pkl'), 'wb') as handle:
+    pickle.dump(db, handle)
+
+with open(os.path.join(pkl_dir, 'nl250mgrid.pkl'), 'rb') as handle:
+    foo = pickle.load(handle)
--- a/a_prep_dat/extent_maps.py
+++ b/a_prep_dat/extent_maps.py
+import geopandas as gp
+import os
+import pickle
+import pandas as pd
+
+# read the 250m grid for NL
+with open(os.path.join(r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken', 'nl250mgrid.pkl'), 'rb') as handle:
+    nl250 = pickle.load(handle)
+
+# Queries voor 250m hokken
+natuur_query = 'N1202 > 0 | N1302 > 0'
+agr_query = 'Top10NL_agr_grasland > 0 & N1900 == 0 & N1800 == 0 & N1706 == 0 & N1705 == 0 & N1704 == 0 & N1703 == 0 &' \
+            'N1702 == 0 & N1701 == 0 & N1604 == 0 & N1603 == 0 & N1602 == 0 & N1601 == 0 & N1502 == 0 & N1501 == 0 &' \
+            'N1403 == 0 & N1402 == 0 & N1401 == 0 & N1302 == 0 & N1301 == 0 & N1206 == 0 & N1205 == 0 & N1204 == 0 &' \
+            'N1203 == 0 & N1202 == 0 & N1201 == 0 & N1101 == 0 & N1002 == 0 & N1001 == 0 & N0901 == 0 & N0804 == 0 &' \
+            'N0803 == 0 & N0802 == 0 & N0801 == 0 & N0702 == 0 & N0701 == 0 & N0606 == 0 & N0605 == 0 & N0604 == 0 &' \
+            'N0603 == 0 & N0602 == 0 & N0601 == 0 & N0502 == 0 & N0501 == 0 & N0404 == 0 & N0403 == 0 & N0402 == 0 &' \
+            'N0401 == 0 & N0201 == 0'
+
+natuur_hok_ids = nl250.query(natuur_query).hok_id
+agr_hok_ids = nl250.query(agr_query, engine='python').hok_id
+
+# full extent shp
+nlgrid = gp.read_file(r'd:\hotspot_working\shp_250mgrid\hok250m_fullextent.shp')
+natuur_grid = nlgrid.loc[nlgrid['ID'].isin(natuur_hok_ids)]
+agr_grid = nlgrid.loc[nlgrid['ID'].isin(agr_hok_ids)]
+
+natuur_grid.to_file(r'd:\clo1543\natuurgrid.shp')
+agr_grid.to_file(r'd:\clo1543\agrgrid.shp')
\ No newline at end of file
--- a/a_prep_dat/gen_agra_grasland.py
+++ b/a_prep_dat/gen_agra_grasland.py
+"""
+Script to generate pkl file containing list of 250m hok IDs where agrarisch grasland areaal > 0
+
+Hans Roelofsen, september 2019
+
+"""
+
+import os
+import numpy as np
+import pandas as pd
+import pickle
+
+agr_in = r'D:\projects_code\sastdes_wenr_2\sastdes_HANS_20190903-1109.csv'
+pkl_dir_out = r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken'
+pkl_out = 'Top10NL_agr_grasland.pkl'
+
+# read the CSV file containing cell IDs
+agr = pd.read_csv(agr_in, comment='#', sep=';')
+
+# area is now in sq km, translate to m2
+agr['area_m2'] = agr.apply(lambda x: np.multiply(x.HANS, 1000000), axis=1)
+
+# rename columns for consistenty with other pickled dataframes
+agr.rename(columns={'ID': 'hok_id'}, inplace=True)
+
+with open(os.path.join(pkl_dir_out, pkl_out), 'wb') as f:
+    pickle.dump(agr.drop([col for col in list(agr) if col not in ['hok_id', 'area_m2']], axis=1), f)
+
+
--- a/clo_analysis.py
+++ b/clo_analysis.py
+'''
+Analysis for CLO1543
+
+Hans Roelofsen, september 2019
+
+'''
+
+import os
+import pandas as pd
+import pickle
+
+
+# General specs
+soort_lijst = ['SNL', 'Bijl1']
+periodes = ['1994-2001', '2002-2009', '2010-2017']
+trends = {'afname': range(-1000, -1), 'stabiel': range(-1, 2), 'toename': range(2, 1000)}
+
+# read the 250m grid for NL
+with open(os.path.join(r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken', 'nl250mgrid.pkl'), 'rb') as handle:
+    nl250 = pickle.load(handle)
+
+# Queries voor 250m hokken
+natuur_query = 'N1202 > 0 | N1302 > 0'
+agr_query = 'Top10NL_agr_grasland > 0 & N1900 == 0 & N1800 == 0 & N1706 == 0 & N1705 == 0 & N1704 == 0 & N1703 == 0 &' \
+            'N1702 == 0 & N1701 == 0 & N1604 == 0 & N1603 == 0 & N1602 == 0 & N1601 == 0 & N1502 == 0 & N1501 == 0 &' \
+            'N1403 == 0 & N1402 == 0 & N1401 == 0 & N1302 == 0 & N1301 == 0 & N1206 == 0 & N1205 == 0 & N1204 == 0 &' \
+            'N1203 == 0 & N1202 == 0 & N1201 == 0 & N1101 == 0 & N1002 == 0 & N1001 == 0 & N0901 == 0 & N0804 == 0 &' \
+            'N0803 == 0 & N0802 == 0 & N0801 == 0 & N0702 == 0 & N0701 == 0 & N0606 == 0 & N0605 == 0 & N0604 == 0 &' \
+            'N0603 == 0 & N0602 == 0 & N0601 == 0 & N0502 == 0 & N0501 == 0 & N0404 == 0 & N0403 == 0 & N0402 == 0 &' \
+            'N0401 == 0 & N0201 == 0'
+
+natuur_hok_ids = nl250.query(natuur_query).hok_id
+agr_hok_ids = nl250.query(agr_query, engine='python').hok_id
+
+# query the PGO data for vogels, vlinder, planten
+pgo_query = "soortlijst in ['SNL', 'Bijl1'] & snl in ['N1202', 'N1301']"
+
+with open(r'd:\hotspot_working\a_broedvogels\Soortenrijkdom\Species_richness\vogel_all4.pkl', 'rb') as handle:
+    vogel = pickle.load(handle)
+    vogel = vogel.query(pgo_query)
+
+with open(r'd:\hotspot_working\b_vaatplanten\Soortenrijkdom\vaatplant_all_Bijl1.pkl', 'rb') as handle:
+    plant_bij1 = pickle.load(handle)
+    plant_bij1 = plant_bij1.query(pgo_query)
+
+with open(r'd:\hotspot_working\b_vaatplanten\Soortenrijkdom\vaatplant_all_snl.pkl', 'rb') as handle:
+    plant_snl = pickle.load(handle)
+    plant_snl = plant_snl.query(pgo_query)
+
+with open(r'd:\hotspot_working\c_vlinders\vlinder_all_v2.pkl', 'rb') as handle:
+    vlinder = pickle.load(handle)
+    vlinder = vlinder.query(pgo_query)
+
+pgo_dat = pd.concat([vogel, plant_bij1, plant_snl, vlinder])
+
+# sanity check
+check_tab1 = pd.pivot_table(pgo_dat, index='soortlijst', columns=['soortgroep', 'snl'], values='hok_id', aggfunc='count')
+check_tab2 = pd.pivot_table(pgo_dat, index='hok_id', columns=['periode'], values='n', aggfunc='count')