Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
wenr_clo1543
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Roelofsen, Hans
wenr_clo1543
Commits
bca41cf6
Commit
bca41cf6
authored
5 years ago
by
Roelofsen, Hans
Browse files
Options
Downloads
Patches
Plain Diff
Again improved analysis
parent
22325d0a
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
b_analysis/clo_1543_analysis.py
+53
-45
53 additions, 45 deletions
b_analysis/clo_1543_analysis.py
z_utils/clo.py
+2
-2
2 additions, 2 deletions
z_utils/clo.py
with
55 additions
and
47 deletions
b_analysis/clo_1543_analysis.py
+
53
−
45
View file @
bca41cf6
...
...
@@ -33,9 +33,13 @@ sub_soortlijst = ['SNL'] # SNL, Bijlage 1 of EcoSysLijst?
snl_gebieden_wel
=
[
'
N1900
'
]
# 250m cellen met welk SNL type?
snl_gebieden_niet
=
[]
# 250m cellen met welk SNL type?
# dif_cats = [range(-1000, -1), range(-1, 2), range(2, 1000)]
# Definieer de numerieke range voor de categorieren Afname, Stabiel, Toename:
# [a, b, c, d]
# a <= Afname < b
# b <= Stabiel < c
# c <= Toename < d
dif_cats
=
[
-
1000
,
-
1
,
2
,
1000
]
# see https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
dif_labs
=
[
'
afname
'
,
'
stabiel
'
,
'
toename
'
]
dif_labs
=
[
'
afname
'
,
'
stabiel
'
,
'
toename
'
]
# must be in INCREASING order
# choose one of ['1994-2001', '2002-2009', '2010-2017'] for periode A and periode B, where stats will be made A-B
periodes
=
[
'
1994-2001
'
,
'
2002-2009
'
,
'
2010-2017
'
]
...
...
@@ -72,44 +76,31 @@ vogel_trend, vogel_score = clo.calc_clo_1543(s_periode_A=dat_piv.loc[:, idx['vog
vlinder_trend
,
vlinder_score
=
clo
.
calc_clo_1543
(
s_periode_A
=
dat_piv
.
loc
[:,
idx
[
'
vlinder
'
,
periode_A
]],
s_periode_B
=
dat_piv
.
loc
[:,
idx
[
'
vlinder
'
,
periode_B
]],
bins
=
dif_cats
,
labels
=
dif_labs
)
'''
Create dataframe where all trends and scores are in a single column, plus `soortgroep` column etc
'''
# all trends in a series
trend_s
=
pd
.
concat
([
plant_trend
,
vogel_trend
,
vlinder_trend
],
axis
=
0
,
ignore_index
=
True
)
trend_s
.
name
=
'
clo1543_trend
'
# all scores in a series
score_s
=
pd
.
concat
([
plant_score
,
vogel_score
,
vlinder_score
],
axis
=
0
,
ignore_index
=
True
)
score_s
.
name
=
'
clo1543_score
'
'''
Create new df containing scores and trends for all soortgroepen. Then attach to hokken.
'''
foo_df
=
pd
.
concat
([
plant_trend
,
plant_score
,
vogel_trend
,
vogel_score
,
vlinder_trend
,
vlinder_score
],
axis
=
1
)
clo_df
=
pd
.
merge
(
left
=
hokken
,
right
=
foo_df
,
how
=
'
left
'
,
right_index
=
True
,
left_index
=
True
)
# series with species-group names
species_s
=
pd
.
Series
([
'
vaatplant
'
]
*
dat_piv
.
shape
[
0
]
+
[
'
vogel
'
]
*
dat_piv
.
shape
[
0
]
+
[
'
vlinder
'
]
*
dat_piv
.
shape
[
0
],
name
=
'
soortgroep
'
)
'''
Niet alle hokken zullen een PGO observatie hebben. Vul score kolommen in met `onbekend`.
'''
clo_df
.
filter
(
regex
=
'
*score*
'
).
fillna
(
value
=
'
onbekend
'
,
axis
=
1
)
# series hok_ids (note repetition)
hok_id_s
=
pd
.
concat
([
plant_score
.
index
.
to_series
(),
vogel_score
.
index
.
to_series
(),
vlinder_score
.
index
.
to_series
()],
axis
=
0
,
ignore_index
=
True
)
hok_id_s
.
name
=
'
hok_id
'
'''
Bereken totaal areaal per soortgroep voor CLO scores Toename, Stabiel, Afname, Onbekend.
Bereken ook netto toename (Toename-Afname) en score als netto/totaal areal
'''
plant_out
=
pd
.
pivot_table
(
clo_df
,
index
=
'
vaatplant_score
'
,
values
=
'
areaal_ha
'
,
aggfunc
=
'
sum
'
)
plant_out
.
loc
[:,
'
netto
'
]
=
plant_out
.
apply
(
lambda
row
:
row
.
toename
-
row
.
afname
,
axis
=
1
)
plant_out
.
loc
[:,
'
score
'
]
=
plant_out
.
apply
(
lambda
row
:
row
.
netto
/
hokken
.
areaal_ha
.
sum
()
*
100
,
axis
=
1
)
# concat the series into a dataframe and merge with hokken. Retain only hokken (how=right)
left_df
=
pd
.
concat
([
trend_s
,
score_s
,
species_s
,
hok_id_s
],
axis
=
1
)
clo_dat
=
pd
.
merge
(
left
=
left_df
,
right
=
hokken
.
loc
[:,
[
'
hok_id
'
,
'
areaal_ha
'
,
'
twente
'
]],
# hokken only!
left_on
=
'
hok_id
'
,
right_on
=
'
hok_id
'
,
how
=
'
right
'
)
vogel_out
=
pd
.
pivot_table
(
clo_df
,
index
=
'
vogel_score
'
,
values
=
'
areaal_ha
'
,
aggfunc
=
'
sum
'
)
vogel_out
.
loc
[:,
'
netto
'
]
=
vogel_out
.
apply
(
lambda
row
:
row
.
toename
-
row
.
afname
,
axis
=
1
)
vogel_out
.
loc
[:,
'
score
'
]
=
vogel_out
.
apply
(
lambda
row
:
row
.
netto
/
hokken
.
areaal_ha
.
sum
()
*
100
,
axis
=
1
)
'''
Fill CLO scores with
'
onbekend
'
for cellen in Twente
'''
clo_dat
.
fillna
(
'
onbekend
'
,
inplace
=
True
,
axis
=
1
)
clo_dat
.
loc
[
clo_dat
.
twente
>
0
,
[
'
clo1543_trend
'
,
'
clo1543_score
'
]]
=
'
onbekend
'
vlinder_out
=
pd
.
pivot_table
(
clo_df
,
index
=
'
vlinder_score
'
,
values
=
'
areaal_ha
'
,
aggfunc
=
'
sum
'
)
vlinder_out
.
loc
[:,
'
netto
'
]
=
vlinder_out
.
apply
(
lambda
row
:
row
.
toename
-
row
.
afname
,
axis
=
1
)
vlinder_out
.
loc
[:,
'
score
'
]
=
vlinder_out
.
apply
(
lambda
row
:
row
.
netto
/
hokken
.
areaal_ha
.
sum
()
*
100
,
axis
=
1
)
'''
Pivot on CLO1543 scores and calculate total area per score for each soortgroep.
'''
piv_out
=
pd
.
pivot_table
(
clo_dat
,
index
=
'
soortgroep
'
,
columns
=
'
clo1543_score
'
,
values
=
'
areaal_ha
'
,
aggfunc
=
'
sum
'
)
piv_out
.
loc
[:,
'
netto
'
]
=
piv_out
.
apply
(
lambda
row
:
row
.
toename
-
row
.
afname
,
axis
=
1
)
piv_out
.
loc
[:,
'
score
'
]
=
piv_out
.
apply
(
lambda
row
:
row
.
netto
/
hokken
.
areaal_ha
.
sum
()
*
100
,
axis
=
1
)
piv_out
.
to_clipboard
(
sep
=
'
;
'
)
'''
Write report
...
...
@@ -118,18 +109,30 @@ timestamp = datetime.datetime.now().strftime("%y%m%d-%H%M")
out_dir
=
r
'
c:\Users\roelo008\OneDrive - WageningenUR\a_projects\CLO1543
'
basename
=
'
clo1543_{0}_{1}
'
.
format
(
id
,
timestamp
)
pgo_dat_summary
=
pd
.
pivot_table
(
pgo_dat
,
index
=
'
soortgroep
'
,
columns
=
[
'
snl
'
,
'
periode
'
],
values
=
'
n
'
,
aggfunc
=
'
count
'
)
cat_lims
=
[(
dif_cats
[
i
-
1
],
dif_cats
[
i
])
for
i
in
range
(
1
,
len
(
dif_cats
))]
lo_lims
=
[
l
for
(
l
,
_
)
in
cat_lims
]
up_lims
=
[
u
for
(
_
,
u
)
in
cat_lims
]
# TODO: hier nog een mooie string van maken!
# category_limits = ['{0} t/m {1}'.format(min(x), max(x)) for x in dif_cats]
# categories = ', '.join('{0}: {1}'.format(k, v) for k,v in dict(zip(dif_labs, category_limits)).items())
header
=
'
Extract from PGO species distribution data as follows:
\n
'
\
'
Soortgroepen: {0}
\n
'
\
'
Soortlijst: {1}
\n
'
\
'
Sub-soortlijst: {2}
\n
'
\
'
PGO data restricted to {3} 250m cells where: {4}
\n
'
\
'
Trends: {5}
\n
'
\
'
Trends berekend als # soorten in {6} minus {7}
\n\n
'
.
format
(
'
,
'
.
join
([
soort
for
soort
in
soortgroepen
]),
'
,
'
.
join
(
snl
for
snl
in
snl_soortlijst
),
'
-
'
.
join
(
sub
for
sub
in
sub_soortlijst
),
dat_piv
.
shape
[
0
],
spat_query
,
dif_cats
,
clo_scores
=
header
=
'
#Model run dated: {0} by {1}
\n
#
\n
'
\
'
#Extract from PGO species distribution data with PGO Query:
\n
'
\
'
# Soortgroepen: {2}
\n
'
\
'
# SNL Soortlijst: {3}
\n
'
\
'
# SNL Sub-soortlijst: {4}
\n
'
\
'
# ==> {5} observations in {6} different 250m hokken (see also PGO DATA SUMMARY)
\n
'
\
'
#Selection from 250m hokken grid with Beheertypen Query:
\n
'
\
'
# {7}
\n
'
\
'
# ==> {8} 250m hokken with total {9} hectare.
\n
'
\
'
#Trend refers to species difference between {10}-{11}}
\n
'
\
'
#Scores are as follows:
\n
'
\
''
.
format
(
timestamp
,
os
.
environ
.
get
(
'
USERNAME
'
),
'
,
'
.
join
([
soort
for
soort
in
soortgroepen
]),
'
,
'
.
join
(
snl
for
snl
in
snl_soortlijst
),
'
-
'
.
join
(
sub
for
sub
in
sub_soortlijst
),
pgo_dat
.
shape
[
0
],
dat_piv
.
shape
[
0
],
spat_query
,
hokken
.
shape
[
0
],
hokken
.
areaal_ha
.
sum
(),
periode_A
,
periode_A
,
dif_cats
,
periode_A
,
periode_B
)
footer
=
'
\n
Made with Python 3.5 using pandas, geopandas, by Hans Roelofsen, WEnR team B&B, dated {0}
'
.
format
(
timestamp
)
...
...
@@ -138,7 +141,12 @@ with open(os.path.join(out_dir, basename + '.txt'), 'w') as f:
f
.
write
(
'
\n
###PGO DATA SUMMARY###
\n
'
)
f
.
write
(
pgo_dat_summary
.
to_csv
(
sep
=
'
\t
'
,
line_terminator
=
'
\r
'
))
f
.
write
(
'
\n
##### HECTARE-TOENAME-STABIEL-AFNAME #####
\n
'
)
f
.
write
(
piv_out
.
to_csv
(
sep
=
'
\t
'
,
line_terminator
=
'
\r
'
))
f
.
write
(
'
\n\n
'
)
f
.
write
(
plant_out
.
to_csv
(
sep
=
'
\t
'
,
line_terminator
=
'
\r
'
))
f
.
write
(
'
\n\n
'
)
f
.
write
(
vogel_out
.
to_csv
(
sep
=
'
\t
'
,
line_terminator
=
'
\r
'
))
f
.
write
(
'
\n\n
'
)
f
.
write
(
vlinder_out
.
to_csv
(
sep
=
'
\t
'
,
line_terminator
=
'
\r
'
))
f
.
write
(
footer
)
...
...
This diff is collapsed.
Click to expand it.
z_utils/clo.py
+
2
−
2
View file @
bca41cf6
...
...
@@ -233,9 +233,9 @@ def calc_clo_1543(s_periode_A, s_periode_B, bins, labels):
try
:
sp_diff
=
s_periode_A
.
sub
(
s_periode_B
)
sp_diff
.
name
=
'
clo1543_t
rend_{0}
'
.
format
(
s_periode_A
.
name
)
sp_diff
.
name
=
'
rend_{0}
'
.
format
(
s_periode_A
.
name
)
sp_diff_score
=
sp_diff
.
apply
(
classifier
,
bins
=
bins
,
labels
=
labels
)
sp_diff_score
.
name
=
'
clo1543_
score_{0}
'
.
format
(
s_periode_A
.
name
)
sp_diff_score
.
name
=
'
score_{0}
'
.
format
(
s_periode_A
.
name
)
return
sp_diff
,
sp_diff_score
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment