diff --git a/a_prep_dat/compile_250m_hok_db.py b/a_prep_dat/compile_250m_hok_db.py index 7aa60ec19e7fc43f50bfd86caf0ecbc9086f58ca..5345970cfdd78b5a7ad5925d8b6fd2fde58d85de 100644 --- a/a_prep_dat/compile_250m_hok_db.py +++ b/a_prep_dat/compile_250m_hok_db.py @@ -43,7 +43,7 @@ sources = ['N1900.pkl', 'N1800.pkl', 'N1706.pkl', 'N1705.pkl', 'N1704.pkl', 'N17 'N0201.pkl', 'OpenDuin.pkl', 'Moeras.pkl', 'Heide.pkl', 'HalfnatuurlijkGrasland.pkl', 'Bos.pkl', 'prov-Zeeland.pkl', 'prov-Noord-Holland.pkl', 'prov-geen.pkl', 'prov-Overijssel.pkl', 'prov-Groningen.pkl', 'prov-Friesland.pkl', 'prov-Limburg.pkl', 'prov-Utrecht.pkl', 'prov-Gelderland.pkl', 'prov-Flevoland.pkl', - 'prov-Noord-Brabant.pkl', 'prov-Zuid-Holland.pkl', 'prov-Drenthe.pkl', 'twente.pkl'] + 'prov-Noord-Brabant.pkl', 'prov-Zuid-Holland.pkl', 'prov-Drenthe.pkl', 'twente_hdr.pkl'] pkl_dir = r'd:\hotspot_working\a_broedvogels\SNL_grids\augurken' for source in sources: diff --git a/b_analysis/clo_1518_analysis.py b/b_analysis/clo_1518_analysis.py index 0ba84f3a09b0a7583747cf503b355193357c1545..0cf85e175f2056b42380ffaa0066037f0b226877 100644 --- a/b_analysis/clo_1518_analysis.py +++ b/b_analysis/clo_1518_analysis.py @@ -112,6 +112,7 @@ hokken = nl250.query(spatial_query, engine='python') # reduce to hokken complyi hokken['areaal_m2'] = hokken.loc[:, snl_gebieden_wel].sum(axis=1) # calculate areaal per hok of selected SNL types hokken['areaal_ha'] = hokken.apply(lambda row: np.divide(row.areaal_m2, 10000), axis=1) # sq m to hectare hokken.set_index('hok_id') +hokken['provincie'] = hokken.filter(regex='^prov', axis='columns').idxmax(axis=1) # name of provincie for each hok if hokken.empty: raise Exception('No hokken found complying to query.') else: @@ -187,17 +188,19 @@ dat_piv[('2002-2009', 'CLO1518', 'score')] = clo1518_2002_2009_score dat_piv[('2010_2017', 'CLO1518', 'score')] = clo1518_2010_2017_score ''' -Label each hok in dat_piv with: 1) SNL BeheerType (NXXYY), 2) EcosysteemType relating to the BT 3) area in m2 of (1), +merge PGO observation data in dat_piv with info on the hokken from hokken. Note multiindex will be removed +''' + +dat_snl = pd.merge(left=dat_piv, right=hokken.loc[:, ['hok_id', 'areaal_ha', 'provincie', 'twente_hdr']], + left_index=True, right_on='hok_id', how='right') # how = inner, dus bewaar enkel de overlap. + +''' +Label each hok in dat_snl with: 1) SNL BeheerType (NXXYY), 2) EcosysteemType relating to the BT 3) area in m2 of (1), 4) Provincie to which it is assigned ''' -dat_piv['snl_bt'] = '_'.join(snl for snl in snl_soortlijst) # beheertypen, possibly > 1 +dat_snl['snl_bt'] = '_'.join(snl for snl in snl_soortlijst) # beheertypen, possibly > 1 ecotype = clo.bt_to_eco(snl_soortlijst[0]) # dit is verschrikkelijk om alleen [0] te gebruiken, maar goed -dat_piv['ecosystype'] = ecotype -hokken['provincie'] = hokken.filter(regex='^prov', axis='columns').idxmax(axis=1) # name of provincie for each hok - -# merge PGO observation data in dat_piv with info on the hokken from hokken. Note multiindex will be removed -dat_snl = pd.merge(left=dat_piv, right=hokken.loc[:, ['hok_id', 'areaal_ha', 'provincie', 'twente']], left_index=True, - right_on='hok_id', how='right') # how = inner, dus bewaar enkel de overlap. +dat_snl['ecosystype'] = ecotype ''' Wrap up @@ -208,10 +211,24 @@ if len(set(dat_snl.hok_id)) != dat_snl.shape[0]: print('Intersection between PGO observation locations {0} and SNL hokken selection {1} reduces to {2} ' 'hokken'.format(dat_piv.shape[0], hokken.shape[0], dat_snl.shape[0])) +# pretty colnames colnames = dat_snl.columns.tolist() new_colnames = ['_'.join([x for x in foo if len(x) > 0]) for foo in colnames if isinstance(foo, tuple)] dat_snl.rename(columns=dict(zip(colnames, new_colnames)), inplace=True) # remove commas from colnames +# sort columns neatly (dit moet vast makkelijker kunnen, maar daar ben ik nu te moe voor.) +col_order = ['hok_id', 'snl_bt', 'ecosystype', 'areaal_ha', '1994-2001_vaatplant_SNL', '1994-2001_vaatplant_Bijl1', + '1994-2001_vogel_SNL', '1994-2001_vogel_Bijl1', '1994-2001_vlinder_SNL', '1994-2001_vlinder_Bijl1', + '1994-2001_Bijl1_tot', '1994-2001_Bijl1_cap', '1994-2001_SNL_tot', '1994-2001_SNL-Bijl1_tot', + '1994-2001_CLO1518_score', '2002-2009_vaatplant_SNL', '2002-2009_vaatplant_Bijl1', + '2002-2009_vogel_SNL', '2002-2009_vogel_Bijl1', '2002-2009_vlinder_SNL', '2002-2009_vlinder_Bijl1', + '2002-2009_Bijl1_tot', '2002-2009_Bijl1_cap', '2002-2009_SNL_tot', '2002-2009_SNL-Bijl1_tot', + '2002-2009_CLO1518_score', '2010-2017_vaatplant_SNL', '2010-2017_vogel_SNL', '2010-2017_vogel_Bijl1', + '2010-2017_vaatplant_Bijl1', '2010-2017_vlinder_SNL', '2010-2017_vlinder_Bijl1', '2010-2017_Bijl1_tot', + '2010-2017_Bijl1_cap', '2010-2017_SNL_tot', '2010-2017_SNL-Bijl1_tot', '2010_2017_CLO1518_score', + 'provincie', 'twente_hdr'] +dat_snl = dat_snl.reindex(columns=col_order) + # Optional TODO: pivot on CLO scores to calculate area for each category ''' diff --git a/z_utils/merge_tabs.py b/z_utils/merge_tabs.py index 3603061783ca4c242057004a87080585966feab8..1096c7542ba73fe02bf99ffdcc867dc2d0acc690 100644 --- a/z_utils/merge_tabs.py +++ b/z_utils/merge_tabs.py @@ -7,6 +7,7 @@ Hans Roelofsen, 7 nov 2019 ''' import os +import pandas as pd import datetime in_dir = r'd:\clo1543\CLO1518\bt_tabel' @@ -17,16 +18,25 @@ out_base_name = 'combined_table_bts' timestamp_brief = datetime.datetime.now().strftime("%y%m%d-%H%M") i = 0 -with open(os.path.join(out_dir, '{0}_{1}.txt'.format(out_base_name, timestamp_brief)), 'w') as f: - for csv_file in in_csv: +holder = [] +for csv_file in in_csv: + df = pd.read_csv(os.path.join(in_dir, csv_file), comment='#', sep=';') + df['source_file'] = csv_file + holder.append(df) + +df_out = pd.concat(holder) +df_out.to_csv(os.path.join(out_dir, '{0}_{1}.txt'.format(out_base_name, timestamp_brief)), index=False, sep=';') +''' + with open(os.path.join(in_dir, csv_file)) as csv: if i == 0: for line in csv.readlines(): - if not (line.startswith('#') or line.endswith('wente')): + if not (line.startswith('#')): f.write('{0};{1}\n'.format(line.rstrip('\n'), csv_file)) else: for line in csv.readlines(): - if not (line.startswith('#')): + if not (line.startswith('#') or line.endswith('wente_hdr')): f.write('{0};{1}\n'.format(line.rstrip('\n'), csv_file)) i += 1 +''' \ No newline at end of file