Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
wenr_clo1543
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Roelofsen, Hans
wenr_clo1543
Commits
75f0ac18
Commit
75f0ac18
authored
5 years ago
by
haro-nl
Browse files
Options
Downloads
Patches
Plain Diff
added function to negate SNL types
parent
5abaaa33
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
a_prep_dat/compile_250m_hok_db.py
+5
-5
5 additions, 5 deletions
a_prep_dat/compile_250m_hok_db.py
clo_analysis.py
+57
-26
57 additions, 26 deletions
clo_analysis.py
with
62 additions
and
31 deletions
a_prep_dat/compile_250m_hok_db.py
+
5
−
5
View file @
75f0ac18
...
@@ -39,7 +39,7 @@ sources = ['N1900.pkl', 'N1800.pkl', 'N1706.pkl', 'N1705.pkl', 'N1704.pkl', 'N17
...
@@ -39,7 +39,7 @@ sources = ['N1900.pkl', 'N1800.pkl', 'N1706.pkl', 'N1705.pkl', 'N1704.pkl', 'N17
'
N1401.pkl
'
,
'
N1302.pkl
'
,
'
N1301.pkl
'
,
'
N1206.pkl
'
,
'
N1205.pkl
'
,
'
N1204.pkl
'
,
'
N1203.pkl
'
,
'
N1202.pkl
'
,
'
N1401.pkl
'
,
'
N1302.pkl
'
,
'
N1301.pkl
'
,
'
N1206.pkl
'
,
'
N1205.pkl
'
,
'
N1204.pkl
'
,
'
N1203.pkl
'
,
'
N1202.pkl
'
,
'
N1201.pkl
'
,
'
N1101.pkl
'
,
'
N1002.pkl
'
,
'
N1001.pkl
'
,
'
N0901.pkl
'
,
'
N0804.pkl
'
,
'
N0803.pkl
'
,
'
N0802.pkl
'
,
'
N1201.pkl
'
,
'
N1101.pkl
'
,
'
N1002.pkl
'
,
'
N1001.pkl
'
,
'
N0901.pkl
'
,
'
N0804.pkl
'
,
'
N0803.pkl
'
,
'
N0802.pkl
'
,
'
N0801.pkl
'
,
'
N0702.pkl
'
,
'
N0701.pkl
'
,
'
N0606.pkl
'
,
'
N0605.pkl
'
,
'
N0604.pkl
'
,
'
N0603.pkl
'
,
'
N0602.pkl
'
,
'
N0801.pkl
'
,
'
N0702.pkl
'
,
'
N0701.pkl
'
,
'
N0606.pkl
'
,
'
N0605.pkl
'
,
'
N0604.pkl
'
,
'
N0603.pkl
'
,
'
N0602.pkl
'
,
'
N0601.pkl
'
,
'
N0502.pkl
'
,
'
N0501.pkl
'
,
'
N0404.pkl
'
,
'
N0403.pkl
'
,
'
N0402.pkl
'
,
'
N0401.pkl
'
,
'
N0301.pk
'
,
'
N0601.pkl
'
,
'
N0502.pkl
'
,
'
N0501.pkl
'
,
'
N0404.pkl
'
,
'
N0403.pkl
'
,
'
N0402.pkl
'
,
'
N0401.pkl
'
,
'
N0301.pk
l
'
,
'
N0201.pkl
'
,
'
OpenDuin.pkl
'
,
'
Moeras.pkl
'
,
'
Heide.pkl
'
,
'
N0201.pkl
'
,
'
OpenDuin.pkl
'
,
'
Moeras.pkl
'
,
'
Heide.pkl
'
,
'
HalfnatuurlijkGrasland.pkl
'
,
'
Bos.pkl
'
]
'
HalfnatuurlijkGrasland.pkl
'
,
'
Bos.pkl
'
]
pkl_dir
=
r
'
d:\hotspot_working\a_broedvogels\SNL_grids\augurken
'
pkl_dir
=
r
'
d:\hotspot_working\a_broedvogels\SNL_grids\augurken
'
...
@@ -48,16 +48,16 @@ for source in sources:
...
@@ -48,16 +48,16 @@ for source in sources:
src_name
=
os
.
path
.
splitext
(
source
)[
0
]
src_name
=
os
.
path
.
splitext
(
source
)[
0
]
try
:
try
:
with
open
(
os
.
path
.
join
(
pkl_dir
,
source
),
'
rb
'
)
as
handle
:
source_df
=
pd
.
read_pickle
(
os
.
path
.
join
(
pkl_dir
,
source
))
source_df
=
pickle
.
load
(
handle
)
source_df
.
set_index
(
'
hok_id
'
,
inplace
=
True
,
verify_integrity
=
True
)
source_df
.
set_index
(
'
hok_id
'
,
inplace
=
True
,
verify_integrity
=
True
)
source_df
.
rename
(
columns
=
{
'
area_m2
'
:
src_name
},
inplace
=
True
)
source_df
.
rename
(
columns
=
{
'
area_m2
'
:
src_name
},
inplace
=
True
)
db
=
pd
.
merge
(
db
,
source_df
,
left_on
=
'
hok_id
'
,
right_index
=
True
,
how
=
'
left
'
)
db
=
pd
.
merge
(
db
,
source_df
,
left_on
=
'
hok_id
'
,
right_index
=
True
,
how
=
'
left
'
)
print
(
'
DB shape now: {0} rows, {1} cols
'
.
format
(
db
.
shape
[
0
],
db
.
shape
[
1
]))
print
(
'
DB shape now: {0} rows, {1} cols
'
.
format
(
db
.
shape
[
0
],
db
.
shape
[
1
]))
except
FileNotFoundError
:
except
FileNotFoundError
:
print
(
'
Warning, file not found: {0}
'
.
format
(
source
))
continue
continue
...
...
This diff is collapsed.
Click to expand it.
clo_analysis.py
+
57
−
26
View file @
75f0ac18
...
@@ -20,30 +20,46 @@ from z_utils import clo
...
@@ -20,30 +20,46 @@ from z_utils import clo
'''
'''
Model run identifier
Model run identifier
'''
'''
id
=
'
BdK
01
'
id
=
'
MS
01
b
'
'''
'''
Query PGO data for a specific SNL soortenlijst and specific spatial filter
Query PGO data for a specific SNL soortenlijst and specific spatial filter
'''
'''
soortgroepen
=
[
'
vogel
'
,
'
vlinder
'
,
'
vaatplant
'
]
soortgroepen
=
[
'
vogel
'
,
'
vlinder
'
,
'
vaatplant
'
]
snl_soortlijst
=
[
'
HalfnatuurlijkGrasland
'
]
# soortenlijst behorende bij een SNL type
snl_soortlijst
=
[
'
N1202
'
,
'
N1301
'
]
# soortenlijst behorende bij een SNL type
sub_soortlijst
=
[
'
EcoSysLijst
'
]
# SNL, Bijlage 1 of EcoSysLijst?
sub_soortlijst
=
[
'
SNL
'
]
# SNL, Bijlage 1 of EcoSysLijst?
snl_gebieden
=
[
'
HalfnatuurlijkGrasland
'
]
# 250m cellen met welk SNL type?
snl_gebieden_wel
=
[
'
N1900
'
]
# 250m cellen met welk SNL type?
snl_gebieden_niet
=
[
'
N0201
'
,
'
N0301
'
,
'
N0401
'
,
'
N0402
'
,
'
N0403
'
,
'
N0404
'
,
'
N0501
'
,
'
N0502
'
,
'
N0601
'
,
'
N0602
'
,
'
N0603
'
,
'
N0604
'
,
'
N0605
'
,
'
N0606
'
,
'
N0701
'
,
'
N0702
'
,
'
N0801
'
,
'
N0802
'
,
'
N0803
'
,
'
N0804
'
,
'
N0901
'
,
'
N1001
'
,
'
N1002
'
,
'
N1101
'
,
'
N1201
'
,
'
N1202
'
,
'
N1203
'
,
'
N1204
'
,
'
N1205
'
,
'
N1206
'
,
'
N1301
'
,
'
N1302
'
,
'
N1401
'
,
'
N1402
'
,
'
N1403
'
,
'
N1501
'
,
'
N1502
'
,
'
N1601
'
,
'
N1602
'
,
'
N1603
'
,
'
N1604
'
,
'
N1701
'
,
'
N1702
'
,
'
N1703
'
,
'
N1704
'
,
'
N1705
'
,
'
N1706
'
]
# 250m cellen met welk SNL type?
dif_cats
=
[
range
(
-
1000
,
-
1
),
range
(
-
1
,
2
),
range
(
2
,
1000
)]
dif_cats
=
[
range
(
-
1000
,
-
1
),
range
(
-
1
,
2
),
range
(
2
,
1000
)]
dif_labs
=
[
'
afname
'
,
'
stabiel
'
,
'
toename
'
]
dif_labs
=
[
'
afname
'
,
'
stabiel
'
,
'
toename
'
]
# choose one of ['1994-2001', '2002-2009', '2010-2017'] for periode A and periode B, where stats will be made A-B
periodes
=
[
'
1994-2001
'
,
'
2002-2009
'
,
'
2010-2017
'
]
periode_A
=
'
2010-2017
'
periode_B
=
'
1994-2001
'
'''
'''
SPATIAL SELECTION
SPATIAL SELECTION
'''
'''
# read full 250 m grid of the Netherlands
spatial_query_p1
=
'
|
'
.
join
([
"
{0} > 0
"
.
format
(
x
)
for
x
in
snl_gebieden_wel
])
# cells must have > 0 m2 of each snl
spatial_query_p2
=
'
&
'
.
join
([
"
{0} == 0
"
.
format
(
x
)
for
x
in
snl_gebieden_niet
])
# cells must have 0 m2 of each snl
spatial_query
=
'
&
'
.
join
([
'
({0})
'
.
format
(
x
)
for
x
in
[
spatial_query_p1
,
spatial_query_p2
]
if
len
(
x
)
>
0
])
nl250
=
pd
.
read_pickle
(
os
.
path
.
join
(
r
'
d:\hotspot_working\a_broedvogels\SNL_grids\augurken
'
,
'
nl250mgrid.pkl
'
))
nl250
=
pd
.
read_pickle
(
os
.
path
.
join
(
r
'
d:\hotspot_working\a_broedvogels\SNL_grids\augurken
'
,
'
nl250mgrid.pkl
'
))
spatial_query
=
'
|
'
.
join
([
"
{0} > 0
"
.
format
(
x
)
for
x
in
snl_gebieden
])
# cells must have > 0 m2 of each snl type
hokken
=
nl250
.
query
(
spatial_query
,
engine
=
'
python
'
)
# reduce to hokken complying to the spatial query
hokken
=
nl250
.
query
(
spatial_query
)
# reduce to hokken complying to the spatial query
hokken
[
'
areaal
'
]
=
hokken
.
loc
[:,
snl_gebieden_wel
].
sum
(
axis
=
1
)
# calculate areaal per hok of selected SNL types
hokken
[
'
areaal
'
]
=
hokken
.
loc
[:,
snl_gebieden
].
sum
(
axis
=
1
)
# calculate areaal per hok of selecte SNL types
hokken
.
set_index
(
'
hok_id
'
)
hokken
.
set_index
(
'
hok_id
'
)
print
(
'
Querying {0} from nl250 hokken geeft {1} 250m hokken.
'
.
format
(
spatial_query
,
hokken
.
shape
[
0
]))
if
hokken
.
empty
:
raise
Exception
(
'
No hokken found complying to query.
'
)
else
:
print
(
'
Querying {0} from nl250 hokken geeft {1} 250m hokken.
'
.
format
(
spatial_query
,
hokken
.
shape
[
0
]))
'''
'''
SELECTION FROM PGO DATA
SELECTION FROM PGO DATA
...
@@ -66,24 +82,27 @@ Pivot to show plant, vogel, vlinder count per hok
...
@@ -66,24 +82,27 @@ Pivot to show plant, vogel, vlinder count per hok
pgo_piv
=
pd
.
pivot_table
(
pgo_dat
,
index
=
'
hok_id
'
,
columns
=
[
'
soortgroep
'
,
'
periode
'
],
values
=
'
n
'
,
aggfunc
=
'
sum
'
)
pgo_piv
=
pd
.
pivot_table
(
pgo_dat
,
index
=
'
hok_id
'
,
columns
=
[
'
soortgroep
'
,
'
periode
'
],
values
=
'
n
'
,
aggfunc
=
'
sum
'
)
'''
'''
Calc progress between p
3-p2
for plant, vogel, vlinder
Calc progress between p
eriode A - periode B
for plant, vogel, vlinder
.
'''
'''
for
soort
in
soortgroepen
:
for
soort
in
soortgroepen
:
trend_p3_p2
=
pgo_piv
[(
soort
,
'
2010-2017
'
)].
sub
(
pgo_piv
[(
soort
,
'
2002-2009
'
)],
axis
=
0
)
trend_pA_pB
=
pgo_piv
[(
soort
,
periode_A
)].
sub
(
pgo_piv
[(
soort
,
periode_B
)],
axis
=
0
)
score_p3_p2
=
trend_p3_p2
.
apply
(
clo
.
classifier
,
args
=
(
dif_cats
,
dif_labs
))
score_pA_pB
=
trend_pA_pB
.
apply
(
clo
.
classifier
,
args
=
(
dif_cats
,
dif_labs
))
pgo_piv
[(
soort
,
'
trend
'
)]
=
trend_p3_p2
pgo_piv
[(
soort
,
'
trend
'
)]
=
trend_pA_pB
# difference in species count
pgo_piv
[(
soort
,
'
score
'
)]
=
score_p3_p2
pgo_piv
[(
soort
,
'
score
'
)]
=
score_pA_pB
# score: afname, stabiel, toename
print
(
soort
)
print
(
pgo_piv
[(
soort
,
)].
head
(
10
))
print
(
pgo_piv
[(
soort
,
)].
head
(
10
))
print
(
pgo_piv
[(
soort
,
)].
tail
(
10
))
print
(
'
Found observations for species list {0}-{1} in {2} 250m hokken.
\n
Also found {3} 250m hokken complying
'
print
(
'
Found observations for species list {0}-{1} in {2} 250m hokken.
\n
Also found {3} 250m hokken complying
'
'
to spatial filter {4}.
\n
Narrowing down PGO observations to spatial filter XX 250 m. hokken
'
.
format
(
'
to spatial filter {4}.
\n
Narrowing down PGO observations to spatial filter XX 250 m. hokken
'
.
format
(
snl_soortlijst
,
sub_soortlijst
,
pgo_piv
.
shape
[
0
],
hokken
.
shape
[
0
],
spatial_query
,
))
snl_soortlijst
,
sub_soortlijst
,
pgo_piv
.
shape
[
0
],
hokken
.
shape
[
0
],
spatial_query
,
))
'''
'''
Summariz
e areaal
by trends
Calculat
e areaal
and stats on species per cell
'''
'''
output
=
pd
.
DataFrame
(
index
=
dif_labs
)
idx
=
pd
.
IndexSlice
output_stats
=
pd
.
DataFrame
(
index
=
[
'
mean
'
,
'
std
'
],
columns
=
pd
.
MultiIndex
.
from_product
([
soortgroepen
,
periodes
]))
output_areaal
=
pd
.
DataFrame
(
index
=
dif_labs
)
# TODO: also use nested columns here?
for
soort
in
soortgroepen
:
for
soort
in
soortgroepen
:
# Narrow down pgo_piv to just `soort` and any rows with any NA for trend or score.
# Narrow down pgo_piv to just `soort` and any rows with any NA for trend or score.
# Merge with data from 250m hokken to couple `areaal` to hok ID
# Merge with data from 250m hokken to couple `areaal` to hok ID
...
@@ -94,11 +113,18 @@ for soort in soortgroepen:
...
@@ -94,11 +113,18 @@ for soort in soortgroepen:
print
(
'
Empty result for {0}
'
.
format
(
soort
))
print
(
'
Empty result for {0}
'
.
format
(
soort
))
soort_summ
=
pd
.
DataFrame
(
0
,
index
=
dif_labs
,
columns
=
[
'
areaal
'
,
'
areaal_{0}_ha
'
.
format
(
soort
)])
soort_summ
=
pd
.
DataFrame
(
0
,
index
=
dif_labs
,
columns
=
[
'
areaal
'
,
'
areaal_{0}_ha
'
.
format
(
soort
)])
else
:
else
:
# Pivot to calculate sum areaal for afname, stabiel, toename
# Pivot to calculate areaal
soort_summ
=
pd
.
pivot_table
(
soort_sel
,
index
=
'
score
'
,
values
=
'
areaal
'
,
aggfunc
=
'
sum
'
)
soort_summ
=
pd
.
pivot_table
(
soort_sel
,
index
=
'
score
'
,
values
=
'
areaal
'
,
aggfunc
=
'
sum
'
)
soort_summ
[
'
areaal_ha_{0}
'
.
format
(
soort
)]
=
soort_summ
.
apply
(
lambda
row
:
np
.
divide
(
row
.
areaal
,
10000
),
axis
=
1
)
soort_summ
[
'
areaal_ha_{0}
'
.
format
(
soort
)]
=
soort_summ
.
apply
(
lambda
row
:
np
.
divide
(
row
.
areaal
,
10000
),
axis
=
1
)
output
=
output
.
join
(
soort_summ
.
drop
(
'
areaal
'
,
axis
=
1
),
how
=
'
right
'
)
# calculate mean and st dev number of species per cell #TODO: calculate nr of cells with obs
mean
=
soort_sel
.
loc
[:,
periodes
].
mean
(
axis
=
0
)
std
=
soort_sel
.
loc
[:,
periodes
].
std
(
axis
=
0
)
count
=
soort_sel
.
shape
[
0
]
output_stats
.
loc
[
idx
[
'
mean
'
],
idx
[
soort
,
:]]
=
mean
.
to_list
()
output_stats
.
loc
[
idx
[
'
std
'
],
idx
[
soort
,
:]]
=
std
.
to_list
()
output_areaal
=
output_areaal
.
join
(
soort_summ
.
drop
(
'
areaal
'
,
axis
=
1
),
how
=
'
right
'
)
# TODO: use .loc method
'''
'''
Write report
Write report
...
@@ -112,16 +138,21 @@ header = 'Extract from PGO species distribution data as follows:\n' \
...
@@ -112,16 +138,21 @@ header = 'Extract from PGO species distribution data as follows:\n' \
'
Soortgroepen: {0}
\n
'
\
'
Soortgroepen: {0}
\n
'
\
'
Soortlijst: {1}
\n
'
\
'
Soortlijst: {1}
\n
'
\
'
Sub-soortlijst: {2}
\n
'
\
'
Sub-soortlijst: {2}
\n
'
\
'
PGO data restricted to 250m cells with: {3} m2
\n
'
\
'
PGO data restricted to {3} 250m cells where: {4}
\n
'
\
'
Trends: {4}
\n\n
'
\
'
Trends: {5}
\n
'
\
'
Periode:
'
.
format
(
'
,
'
.
join
([
soort
for
soort
in
soortgroepen
]),
'
Trends berekend als # soorten in {6} minus {7}
\n\n
'
.
format
(
'
,
'
.
join
([
soort
for
soort
in
soortgroepen
]),
'
,
'
.
join
(
snl
for
snl
in
snl_soortlijst
),
'
,
'
.
join
(
snl
for
snl
in
snl_soortlijst
),
'
-
'
.
join
(
sub
for
sub
in
sub_soortlijst
),
spatial_query
,
categories
)
'
-
'
.
join
(
sub
for
sub
in
sub_soortlijst
),
pgo_piv
.
shape
[
0
],
spatial_query
,
categories
,
periode_A
,
periode_B
)
footer
=
'
\n
Made with Python 3.5 using pandas, geopandas, by Hans Roelofsen, WEnR team B&B, dated {0}
'
.
format
(
timestamp
)
footer
=
'
\n
Made with Python 3.5 using pandas, geopandas, by Hans Roelofsen, WEnR team B&B, dated {0}
'
.
format
(
timestamp
)
with
open
(
os
.
path
.
join
(
out_dir
,
basename
+
'
.txt
'
),
'
w
'
)
as
f
:
with
open
(
os
.
path
.
join
(
out_dir
,
basename
+
'
.txt
'
),
'
w
'
)
as
f
:
f
.
write
(
header
)
f
.
write
(
header
)
f
.
write
(
output
.
to_csv
(
sep
=
'
\t
'
,
line_terminator
=
'
\r
'
))
f
.
write
(
'
\n
##### HECTARE-TOENAME-STABIEL-AFNAME #####
\n
'
)
f
.
write
(
output_areaal
.
to_csv
(
sep
=
'
\t
'
,
line_terminator
=
'
\r
'
))
f
.
write
(
'
\n
##### GEMIDDELD + ST.DEV AANTAL SOORTEN IN HOK #####
\n
'
)
f
.
write
(
output_stats
.
to_csv
(
sep
=
'
\t
'
,
line_terminator
=
'
\r
'
))
f
.
write
(
footer
)
f
.
write
(
footer
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment