Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
ACT - Time Series Analysis in Flood-Prone Areas
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Kies, Jacqueline
ACT - Time Series Analysis in Flood-Prone Areas
Commits
761db737
Commit
761db737
authored
2 years ago
by
Verouden, Niels
Browse files
Options
Downloads
Patches
Plain Diff
Update s23OutlierDetection.py
parent
89ba731b
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
urban_areas/py/s23OutlierDetection.py
+38
-20
38 additions, 20 deletions
urban_areas/py/s23OutlierDetection.py
with
38 additions
and
20 deletions
urban_areas/py/s23OutlierDetection.py
+
38
−
20
View file @
761db737
...
...
@@ -13,7 +13,7 @@ import os
# CHECK OUTLIERS
# =============================================================================
# METHOD 1: VISUALLY CHECK OUTLIERS
def
visualOutlierDetection
(
SAR_path
,
df_total
,
days
,
mean_VV
,
sum_xdays
):
def
visualOutlierDetection
(
title_name
,
df_total
,
days
,
mean_VV
,
sum_xdays
):
## Create copy of dataframe and keep only relevant columns for plotting
df_plot
=
df_total
.
copy
()
df_plot
=
df_plot
[[
mean_VV
,
sum_xdays
]]
...
...
@@ -26,24 +26,41 @@ def visualOutlierDetection(SAR_path, df_total, days, mean_VV, sum_xdays):
ax2
=
sns
.
histplot
(
data
=
df_plot
,
x
=
sum_xdays
,
kde
=
True
,
color
=
"
skyblue
"
,
ax
=
ax2
)
ax2
.
set_title
(
f
'
Histogram of sum
{
days
}
-days precipitation (in mm)
'
,
fontdict
=
{
'
fontsize
'
:
15
})
plt
.
tight_layout
()
plt
.
savefig
(
f
"
./urban_areas/output/Histogram_
{
title_name
}
.png
"
,
dpi
=
300
)
plt
.
show
()
## METHOD 2: BOXPLOT
sns
.
set
(
rc
=
{
'
figure.figsize
'
:(
15
,
8
)})
plt
.
title
(
f
'
Boxplot of mean VV
and sum
{
days
}
-days precipitation (in mm) (
{
SAR_path
}
)
'
,
sns
.
set
(
rc
=
{
'
figure.figsize
'
:(
15
,
8
)})
plt
.
title
(
f
'
Boxplot of mean VV
(
{
title_name
}
)
'
,
fontdict
=
{
'
fontsize
'
:
15
})
sns
.
boxplot
(
data
=
df_plot
,
orient
=
"
h
"
,
palette
=
"
Set1
"
)
sns
.
swarmplot
(
data
=
df_plot
,
orient
=
"
h
"
,
color
=
"
.25
"
,
size
=
5
)
# sns.reset_orig()
sns
.
boxplot
(
data
=
df_plot
[
mean_VV
],
orient
=
"
h
"
,
color
=
"
Red
"
)
sns
.
swarmplot
(
data
=
df_plot
[
mean_VV
],
orient
=
"
h
"
,
color
=
"
.25
"
,
size
=
5
)
plt
.
tight_layout
()
plt
.
savefig
(
f
"
./urban_areas/output/Boxplot
{
mean_VV
}
_
{
title_name
}
.png
"
,
dpi
=
300
)
plt
.
show
()
sns
.
set
(
rc
=
{
'
figure.figsize
'
:(
15
,
8
)})
plt
.
title
(
f
'
Boxplot of sum
{
days
}
-days precipitation (in mm) (
{
title_name
}
)
'
,
fontdict
=
{
'
fontsize
'
:
15
})
sns
.
boxplot
(
data
=
df_plot
[
sum_xdays
],
orient
=
"
h
"
,
color
=
'
Blue
'
)
sns
.
swarmplot
(
data
=
df_plot
[
sum_xdays
],
orient
=
"
h
"
,
color
=
"
.25
"
,
size
=
5
)
plt
.
tight_layout
()
plt
.
savefig
(
f
"
./urban_areas/output/Boxplot
{
sum_xdays
}
_
{
title_name
}
.png
"
,
dpi
=
300
)
plt
.
show
()
## METHOD 3: SCATTERPLOT
sns
.
set
(
rc
=
{
'
figure.figsize
'
:(
15
,
8
)})
plt
.
title
(
f
'
Scatterplot with sum
{
days
}
-days precipitation (in mm) (y-ax) and mean VV (x-ax) (
{
SAR_path
}
)
'
,
plt
.
title
(
f
'
Scatterplot with sum
{
days
}
-days precipitation (in mm) (y-ax) and mean VV (x-ax) (
{
title_name
}
)
'
,
fontdict
=
{
'
fontsize
'
:
15
})
sns
.
scatterplot
(
data
=
df_plot
,
x
=
sum_xdays
,
y
=
mean_VV
)
plt
.
show
()
plt
.
tight_layout
()
plt
.
savefig
(
f
"
./urban_areas/output/Scatterplot_
{
title_name
}
.png
"
,
dpi
=
300
)
plt
.
show
()
return
# METHOD 2: STATISTICALLY CHECK OUTLIERS
...
...
@@ -265,23 +282,22 @@ def mad_method(df, variable_name, threshold=3):
# =============================================================================
# VISUALISE AND EXPORT OUTLIERS
# =============================================================================
def
visualiseStatisticalOutliers
(
SAR_path
,
df_outlier
,
LR_columns
,
mean_VV
,
sum_xdays
,
days
):
def
visualiseStatisticalOutliers
(
title_name
,
df_outlier
,
LR_columns
,
mean_VV
,
sum_xdays
,
days
):
# Get list of indices of both variables (which will be used to marker the outliers)
indexLR_vv
=
df_outlier
.
loc
[
df_outlier
[
LR_columns
[
0
]]
!=
0
].
index
.
to_list
()
indexLR_precip
=
df_outlier
.
loc
[
df_outlier
[
LR_columns
[
1
]]
!=
0
].
index
.
to_list
()
##### VISUALISE RESULTS - SCATTERPLOT
variables
=
[
mean_VV
,
sum_xdays
]
for
idx
in
range
(
len
(
variables
)):
variable
=
variables
[
idx
]
sns
.
set
(
rc
=
{
'
figure.figsize
'
:(
15
,
8
)})
plt
.
title
(
f
'
Scatterplot with outlier Likelihood Ratio (LR) for
{
variable
}
(
{
SAR_path
}
)
'
,
fontdict
=
{
'
fontsize
'
:
15
})
for
idx
in
range
(
len
(
variables
)):
sns
.
set
(
rc
=
{
'
figure.figsize
'
:(
6
,
6
)})
sns
.
scatterplot
(
data
=
df_outlier
,
x
=
sum_xdays
,
y
=
mean_VV
,
hue
=
LR_columns
[
idx
],
style
=
LR_columns
[
idx
],
palette
=
"
deep
"
)
plt
.
legend
(
loc
=
'
upper left
'
)
plt
.
legend
(
loc
=
'
upper right
'
)
plt
.
tight_layout
()
plt
.
savefig
(
f
"
./urban_areas/output/outliers
{
variables
[
idx
]
}
_Scatterplot_
{
title_name
}
.png
"
,
dpi
=
300
)
plt
.
show
()
##### VISUALISE RESULTS - LINEPLOT
...
...
@@ -289,7 +305,7 @@ def visualiseStatisticalOutliers(SAR_path, df_outlier, LR_columns, mean_VV, sum_
fig
,
ax
=
plt
.
subplots
(
figsize
=
(
25
,
5
))
plt
.
xticks
(
np
.
arange
(
0
,
len
(
df_outlier
),
(
len
(
df_outlier
)
*
0.015
)))
[
lab
.
set_rotation
(
90
)
for
lab
in
ax
.
get_xticklabels
()]
plt
.
title
(
f
'
Mean VV backscatter and
{
days
}
-day sum of precipitation in urban area (
{
SAR_path
}
)
'
,
plt
.
title
(
f
'
Mean VV backscatter and
{
days
}
-day sum of precipitation in urban area (
{
title_name
}
)
'
,
fontdict
=
{
'
fontsize
'
:
20
})
ax
.
set_xlabel
(
'
date
'
,
fontdict
=
{
'
fontsize
'
:
15
})
...
...
@@ -323,19 +339,21 @@ def visualiseStatisticalOutliers(SAR_path, df_outlier, LR_columns, mean_VV, sum_
ax2
.
set_ylabel
(
f
"
{
days
}
-day sum precipitation
"
,
fontdict
=
{
'
fontsize
'
:
15
})
## Plot final result
plt
.
show
()
plt
.
tight_layout
()
plt
.
savefig
(
f
"
./urban_areas/output/outliers_Lineplot_
{
title_name
}
.png
"
,
dpi
=
300
)
plt
.
show
()
def
exportOutlierDetection
(
df_outlier
,
SAR_path
):
def
exportOutlierDetection
(
df_outlier
,
title_name
):
# Create directory
dest_path
=
os
.
path
.
join
(
'
urban_areas
'
,
'
output
'
,
'
outlierDetection
'
)
if
not
os
.
path
.
exists
(
dest_path
):
os
.
makedirs
(
dest_path
)
## Write DataFrame with outliers to output folder
csv_path
=
os
.
path
.
join
(
dest_path
,
f
'
outliers_
{
SAR_path
}
.csv
'
)
csv_path
=
os
.
path
.
join
(
dest_path
,
f
'
outliers_
{
title_name
}
.csv
'
)
df_outlier
.
to_csv
(
csv_path
,
encoding
=
'
utf-8
'
,
index
=
False
)
print
(
f
'
\n
The
data
can be found in:
\n
"
{
csv_path
}
"'
)
print
(
f
'
\n
The
outlier table
can be found in:
\n
"
{
csv_path
}
"'
)
return
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment