Skip to content
Snippets Groups Projects
Commit 5ed3b13d authored by Helfenstein, Anatol's avatar Helfenstein, Anatol :speech_balloon:
Browse files

predict SOM for 2 years and 2 depth layers and visualize/plot maps using levelplot

parent 91509a41
Branches
No related tags found
No related merge requests found
......@@ -23,7 +23,7 @@ gc()
rm(list=ls())
pkgs <- c("tidyverse", "foreach", "raster", "terra", "sf", "mapview", "ranger",
"caret", "viridis", "hexbin", "ggrepel")
"caret", "viridis", "hexbin", "ggrepel", "rasterVis")
# make sure 'mapview' pkg installed from github to avoid pandoc error:
# remotes::install_github("r-spatial/mapview")
lapply(pkgs, library, character.only = TRUE)
......@@ -44,7 +44,7 @@ TIME = "_dyn" # calibration 3D+T model using 2D+T & 3D+T dynamic covariates
TIME_DIR = "dynamic" # either "static" or "dynamic"; directory to save plots
if (TIME == "_dyn") {YEAR = c(1990, 2022)} # specify prediction year if 3D+T model
# 3) Specify target prediction depth [cm]
# (see out/data/covariates/target_depths_[5cm/GSM]):
# (see out/data/covariates/target_depths_GSM):
D_UPPER = c("d_5", "d_60")
D_UPPER_NUM = c(5, 60)
D_LOWER = c("d_15", "d_100")
......@@ -108,8 +108,8 @@ r_stack_cov_dyn <- stack(ls_r_cov_dyn)
v_dir_d <- dir("out/data/covariates/target_depths_GSM",
pattern = "d_", recursive = TRUE)
ls_r_d <- foreach(i = 1:length(v_dir_d)) %do%
raster(paste0("out/data/covariates/target_depths_GSM/", v_dir_d[i]))
ls_r_d <- foreach(d = 1:length(v_dir_d)) %do%
raster(paste0("out/data/covariates/target_depths_GSM/", v_dir_d[d]))
r_stack_d <- stack(ls_r_d)
......@@ -406,7 +406,7 @@ sr_LU_1910_present <- rast(r_stack_LU_1910_present)
sr_LU_xyt_1km <- imap(YEAR, ~ {
i <- .x
output <- sr_LU_1910_present[[paste0("LU_", i)]]
setNames(output, paste0("LU_", i))
setNames(output, paste0("LU_xyt_", i, "_1km"))
}) %>% rast(.)
# Sidenote: since output of foreach is list, rast() combines into multilayer SpatRaster
......@@ -416,43 +416,43 @@ sr_LU_xyt_1km <- imap(YEAR, ~ {
sr_LU_xyt_delta5_1km <- imap(YEAR, ~ {
i <- .x
output <- modal(sr_LU_1910_present[[paste0("LU_", seq(i - 4, i))]], na.rm = TRUE)
setNames(output, paste0("LU_", i, "_delta5"))
setNames(output, paste0("LU_xyt_", i, "_delta5_1km"))
}) %>% rast(.)
# Compute LU_xyt_delta10_1k, here only for 2 of the 70 years, e.g. 1990 & 2022
sr_LU_xyt_delta10_1km <- imap(YEAR, ~ {
i <- .x
output <- modal(sr_LU_1910_present[[paste0("LU_", seq(i - 9, i))]], na.rm = TRUE)
setNames(output, paste0("LU_", i, "_delta10"))
setNames(output, paste0("LU_xyt_", i, "_delta10_1km"))
}) %>% rast(.)
# Compute LU_xyt_delta20_1km, here only for 2 of the 70 years, e.g. 1990 & 2022
sr_LU_xyt_delta20_1km <- imap(YEAR, ~ {
i <- .x
output <- modal(sr_LU_1910_present[[paste0("LU_", seq(i - 19, i))]], na.rm = TRUE)
setNames(output, paste0("LU_", i, "_delta20"))
setNames(output, paste0("LU_xyt_", i, "_delta20_1km"))
}) %>% rast(.)
# Compute LU_xyt_delta40_1km, here only for 2 of the 70 years, e.g. 1990 & 2022
sr_LU_xyt_delta40_1km <- imap(YEAR, ~ {
i <- .x
output <- modal(sr_LU_1910_present[[paste0("LU_", seq(i - 39, i))]], na.rm = TRUE)
setNames(output, paste0("LU_", i, "_delta40"))
setNames(output, paste0("LU_xyt_", i, "_delta40_1km"))
}) %>% rast(.)
# combine dynamic LU covariates for 1990 & 2022 and designate as categorical covariates
sr_LU_xyt_1990_2022_1km <- as.factor(c(sr_LU_xyt_1km,
sr_LU_xyt_delta5_1km,
sr_LU_xyt_delta10_1km,
sr_LU_xyt_delta20_1km,
sr_LU_xyt_delta40_1km))
sr_LU_xyt_all_1km <- as.factor(c(sr_LU_xyt_1km,
sr_LU_xyt_delta5_1km,
sr_LU_xyt_delta10_1km,
sr_LU_xyt_delta20_1km,
sr_LU_xyt_delta40_1km))
# In the following lines of code, we designate proper names to the LU categories/classes...
# Don't worry too much about code!
# read in tables with reclassified values for dynamic LU covariates
ls_tbl_recl <- foreach(tbl = 1:length(v_cov_dyn_names)) %do%
ls_tbl_recl <- foreach(i = 1:length(v_cov_dyn_names)) %do%
readr::read_csv(paste0("data/covariates/organism/",
gsub(".grd", "", v_cov_dyn_names)[tbl], "_reclassify_dyn.csv"))
gsub(".grd", "", v_cov_dyn_names)[i], "_reclassify_dyn.csv"))
# tibble of all LU classes used in dynamic LU covariates
tbl_LU_xyt_classes <- distinct(ls_tbl_recl[[length(ls_tbl_recl)]][,3:4]) %>%
......@@ -462,30 +462,30 @@ tbl_LU_xyt_classes <- distinct(ls_tbl_recl[[length(ls_tbl_recl)]][,3:4]) %>%
# list of tibbles of LU classes specific to each dynamic LU covariate
# (not all contain all classes)
ls_tbl_LU_xyt_classes <- map2(
rep(list(tbl_LU_xyt_classes), nlyr(sr_LU_xyt_1990_2022_1km)),
map(as.list(sr_LU_xyt_1990_2022_1km), ~unique(.x)[,1]),
rep(list(tbl_LU_xyt_classes), nlyr(sr_LU_xyt_all_1km)),
map(as.list(sr_LU_xyt_all_1km), ~unique(.x)[,1]),
~filter(.x, id %in% .y)
)
# save names in obj because for some reason changing levels also changes names (below)
v_LU_xyt_1990_2022_names <- names(sr_LU_xyt_1990_2022_1km)
v_LU_xyt_all_names <- names(sr_LU_xyt_all_1km)
# designate as categorical variables and assign proper classes
foreach(i = 1:length(ls_tbl_LU_xyt_classes)) %do% {
levels(sr_LU_xyt_1990_2022_1km[[i]]) <- as.data.frame(ls_tbl_LU_xyt_classes[[i]])
names(sr_LU_xyt_1990_2022_1km[[i]]) <- v_LU_xyt_1990_2022_names[i] # rename to old names
levels(sr_LU_xyt_all_1km[[i]]) <- as.data.frame(ls_tbl_LU_xyt_classes[[i]])
names(sr_LU_xyt_all_1km[[i]]) <- v_LU_xyt_all_names[i] # rename to old names
}
# list of cols for plotting (don't worry too much about code)
cols <- c("#57c4ad", "#e6e1bc", "#eda247", "white", "#006164", "#f6d3e8",
"#ffff99", "#386cb0", "#b3589a", "#db4325", "white")
ls_cols <- map2(rep(list(cols), nlyr(sr_LU_xyt_1990_2022_1km)),
ls_cols <- map2(rep(list(cols), nlyr(sr_LU_xyt_all_1km)),
ls_tbl_LU_xyt_classes, ~.x[pull(arrange(.y, id), id)])
# plot and compare different dynamic LU covariates
foreach(i = c(1, 3, 5, 7, 9, 2, 4, 6, 8, 10)) %do% # easier to compare i in this order
plot(sr_LU_xyt_1990_2022_1km[[i]],
col = ls_cols[[i]], main = names(sr_LU_xyt_1990_2022_1km[[i]]))
plot(sr_LU_xyt_all_1km[[i]],
col = ls_cols[[i]], main = names(sr_LU_xyt_all_1km[[i]]))
# any way to get rid of all the unnecessary ouput in the console?
......@@ -512,8 +512,8 @@ levels(r_stack_cov$bodem50_2006_peatcode_1km)
n_classes = nrow(r_stack_cov$bodem50_2006_peatcode_1km@data@attributes[[1]]) - 1
# compute peat_xyt (2D+T dynamic covariates) for 1990 and 2022
ls_r_peat_xyt_1km <- foreach(k = 1:n_classes) %do% {
foreach(i = YEAR) %do%
ls_r_peat_xyt_1km <- foreach(i = 1:n_classes) %do% {
foreach(y = YEAR) %do%
# save file to disk
writeRaster(
# here we use the actual peat_xyt function
......@@ -521,10 +521,10 @@ ls_r_peat_xyt_1km <- foreach(k = 1:n_classes) %do% {
version2 = r_stack_cov$bodem50_2021_peatcode_1km,
version1_year = r_bodem50_2006_date, # time info
version2_year = r_bodem50_2021_update, # time info
class = k, year = i),
class = i, year = y),
# filename and overwrite if already on disk
paste0("out/data/covariates/final_stack_dyn/year/", i,
"/peat", k, "_xyt_", i, "_1km.tif"),
paste0("out/data/covariates/final_stack_dyn/year/", y,
"/peat", i, "_xyt_", y, "_1km.tif"),
overwrite = TRUE
)
}
......@@ -785,129 +785,192 @@ BREAKS_Y = seq(ceiling(MIN_Y/10)*10, floor(MAX_Y/10)*10, by = 10)
for(y in 1:length(YEAR)) {
for(d in 1:length(D_LOWER)) {
# if 3D+T model, locate, read in and stack dynamic covariates to predict over
# LU_xyt covariates
# Locate, read in and stack dynamic LU covariates to predict over
v_dir_cov_dyn_LU <- dir(
paste0("out/data/covariates/final_stack_dyn/year/", YEAR[y]), pattern = "LU_xyt")
# peat[]_xyt covariates
v_dir_cov_dyn_peat_xyt <- dir(
paste0("out/data/covariates/final_stack_dyn/year/", YEAR[y]), pattern = "peat\\d_xyt")
# peat_xydt covariate at specified target depth layer
dir_cov_dyn_peat_xydt <- dir(
paste0("out/data/covariates/final_stack_dyn/year/", YEAR[y]),
pattern = paste0("peat_xydt_", YEAR[y], "_", D_MID[d], "_1km.tif"))
# combine into vector of all necessary dynamic covariates
v_dir_cov_dyn <- c(v_dir_cov_dyn_LU, v_dir_cov_dyn_peat_xyt, dir_cov_dyn_peat_xydt)
ls_r_cov_dyn <- foreach(cov = 1:length(v_dir_cov_dyn)) %do%
ls_r_cov_dyn_LU <- foreach(cov = 1:length(v_dir_cov_dyn_LU)) %do%
raster(paste0("out/data/covariates/final_stack_dyn/year/", YEAR[y], "/",
v_dir_cov_dyn[[cov]]))
v_dir_cov_dyn_LU[[cov]]))
# stack dynamic covariates to predict over
r_stack_cov_dyn <- stack(stack(ls_r_cov_dyn_LU),
r_stack_peat_xyt, r_stack_peat_xydt)
r_stack_cov_dyn <- stack(ls_r_cov_dyn)
# remove dynamic covariates from other prediction years besides YEAR y
r_stack_cov_dyn <- dropLayer(r_stack_cov_dyn,
grep(YEAR[y], names(r_stack_cov_dyn), invert = TRUE))
# rename to same names as used for QRF model calibration
names(r_stack_cov_dyn) <- names(r_stack_cov_dyn) %>%
stringr::str_replace(., paste0("_", YEAR[y]), "") %>%
stringr::str_replace(., paste0("_", D_MID[d]), "")
# at the moment, raster stack still contains all possible depth increments
v_d <- names(r_stack_d)
# remove the upper, midpoint and lower depth boundary from layers that will be removed
v_d_drop <- v_d[!v_d %in% c(D_UPPER[d], D_MID[d], D_LOWER[d])]
# remove other depth layers
r_stack_d_1layer <- dropLayer(r_stack_d, v_d_drop)
# rename to same names as used for QRF model calibration
names(r_stack_d_1layer) <- names(r_stack_d_1layer) %>%
stringr::str_replace(., paste0(D_UPPER[d], "$"), "d_upper") %>%
stringr::str_replace(., paste0(D_MID[d], "$"), "d_mid") %>%
stringr::str_replace(., paste0(D_LOWER[d], "$"), "d_lower")
# combine covariates with GSM depth layers
r_stack_fit <- raster::stack(r_stack_d_1layer, r_stack_cov, r_stack_cov_dyn)
# remove covariates that were not used to fit model (e.g. after RFE)
r_stack_fit <- r_stack_fit[[QRF_FIT$finalModel$forest$independent.variable.names]]
# Predict target soil property using new data (entire NL) -------------
# predict mean prediction for entire NL
r_qrf_pred_mean <- terra::predict(
object = r_stack_fit,
model = QRF_FIT$finalModel,
type = "response",
seed = 2022, # to control randomness
#num.threads = ,
fun = function(model, ...) predict(model, ...)$predictions,
na.rm = TRUE,
cores = 1, # CL,
progress = "text")
# predict 5th, 50th & 95th quantile for entire NL
r_qrf_pred_quant <- terra::predict(
object = r_stack_fit,
model = QRF_FIT$finalModel,
type = "quantiles",
quantiles = c(0.05, 0.5, 0.95),
seed = 2022, # to control randomness
#num.threads = ,
fun = function(model, ...) predict(model, ...)$predictions,
index = c(1, 2, 3),
na.rm = TRUE,
cores = THREADS,
progress = "text")
# name rasters
names(r_qrf_pred_mean) <- names(r_qrf_pred_mean) %>%
stringr::str_replace(., "layer", "pred_mean")
names(r_qrf_pred_quant) <- names(r_qrf_pred_quant) %>%
stringr::str_replace(., "layer.1", "pred5") %>%
stringr::str_replace(., "layer.2", "pred50") %>%
stringr::str_replace(., "layer.3", "pred95")
# calculate 90% prediction interval (PI90)
r_PI90 <- (r_qrf_pred_quant$pred95 - r_qrf_pred_quant$pred5)
# stack all maps of results and rename
r_stack_predictions <- stack(r_qrf_pred_mean, r_qrf_pred_quant, r_PI90)
names(r_stack_predictions) <- names(r_stack_predictions) %>%
stringr::str_replace(., "layer", "PI90")
# save prediction maps to disk as GeoTIFFs
foreach(i = 1:nlayers(r_stack_predictions)) %do%
writeRaster(
r_stack_predictions[[i]],
paste0(
"out/maps/target/", TARGET, "/", TIME_DIR, "/GeoTIFFs/", YEAR[y], "/",
TARGET, "_", YEAR[y], "_", D_MID[d], "_QRF_",
names(r_stack_predictions)[i], ".tif"
),
overwrite = TRUE)
}
# locate, read in & stack rasters containing target prediction depths
v_dir_d <- dir("out/data/covariates/target_depths_GSM",
recursive = TRUE)
ls_r_d <- foreach(i = 1:length(v_dir_d)) %do%
raster(paste0("out/data/covariates/target_depths_GSM/",
v_dir_d[i]))
r_stack_d <- stack(ls_r_d)
# at the moment, raster stack still contains all possible depth increments
v_d <- names(r_stack_d)
# remove the upper, midpoint and lower depth boundary from layers that will be dropped
v_d_drop <- v_d[!v_d %in% c(D_UPPER[d], D_MID[d], D_LOWER[d])]
# drop other depth layers
r_stack_d <- dropLayer(r_stack_d, v_d_drop)
# rename to same names as used for QRF model calibration
names(r_stack_d) <- names(r_stack_d) %>%
stringr::str_replace(., paste0(D_UPPER[d], "$"), "d_upper") %>%
stringr::str_replace(., paste0(D_MID[d], "$"), "d_mid") %>%
stringr::str_replace(., paste0(D_LOWER[d], "$"), "d_lower")
# combine covariates with GSM depth layers
r_stack_fit <- raster::stack(r_stack_d, r_stack_cov, r_stack_cov_dyn)
# remove covariates that were not used to fit model (e.g. after RFE)
r_stack_fit <- r_stack_fit[[QRF_FIT$finalModel$forest$independent.variable.names]]
## Predict target soil property using new data (entire NL) -----------------
# always predict mean prediction for entire NL
r_qrf_pred_mean <- terra::predict(
object = r_stack_fit,
model = QRF_FIT$finalModel,
type = "response",
seed = 2022, # to control randomness
#num.threads = 10L, # to not overload RAM
fun = function(model, ...) predict(model, ...)$predictions,
na.rm = TRUE,
cores = 1, # CL,
progress = "text")
# for GSM depth layers, predict 5th, 50th & 95th quantile for entire NL
r_qrf_pred_quant <- terra::predict(
object = r_stack_fit,
model = QRF_FIT$finalModel,
type = "quantiles",
quantiles = c(0.05, 0.5, 0.95),
seed = 2022, # to control randomness
#num.threads = 10L, # to not overload RAM
fun = function(model, ...) predict(model, ...)$predictions,
index = c(1, 2, 3),
na.rm = TRUE,
cores = THREADS,
progress = "text")
# name rasters
names(r_qrf_pred_mean) <- names(r_qrf_pred_mean) %>%
stringr::str_replace(., "layer", "pred_mean")
names(r_qrf_pred_quant) <- names(r_qrf_pred_quant) %>%
stringr::str_replace(., "layer.1", "pred5") %>%
stringr::str_replace(., "layer.2", "pred50") %>%
stringr::str_replace(., "layer.3", "pred95")
# calculate 90% prediction interval (PI90)
r_PI90 <- (r_qrf_pred_quant$pred95 - r_qrf_pred_quant$pred5)
# stack all maps of results and rename
r_stack_predictions <- stack(r_qrf_pred_mean, r_qrf_pred_quant, r_PI90)
# r_stack_predictions <- stack(r_qrf_pred_mean, r_qrf_pred_quant,
# r_PI90, r_PI90_thresholds)
names(r_stack_predictions) <- names(r_stack_predictions) %>%
stringr::str_replace(., "layer", "PI90")
## Write maps of predictions to disk as GeoTIFFs ---------------------------
# save prediction maps
foreach(n = 1:nlayers(r_stack_predictions)) %do%
writeRaster(
r_stack_predictions[[n]],
paste0(
"out/maps/target/", TARGET, "/", TIME_DIR, "/GeoTIFFs/", YEAR[y], "/",
TARGET, "_", YEAR[y], "_", D_MID[d], "_QRF_",
names(r_stack_predictions)[n], ".tif"
),
overwrite = TRUE)
}
# prepare color scheme for visualizing TARGET prediction maps
# extract min and max values so we can use same color legend for all maps
response_min = round(min(minValue(r_stack_predictions)))
response_max = round(max(maxValue(r_stack_predictions)))
# extract min and max values PI90 so we can use same color legend for all maps
PI90_min = round(min(minValue(r_stack_predictions$PI90)))
PI90_max = round(max(maxValue(r_stack_predictions$PI90)))
# define interval (smallest step cm to visualize on map and in color scheme)
interval = 0.1
# vector that will define global color scheme for prediction quantiles and PI90
v_col_pred <- seq(response_min, response_max, interval)
v_col_PI90 <- seq(PI90_min, PI90_max, interval)
# colors for TARGET soil property
COLOR = hcl.colors(n = length(v_col_pred), palette = "YlOrBr", rev = TRUE)
# plot TARGET soil property for year y and depth layer d using rasterVis::levelplot()
levelplot(
r_stack_predictions$pred_mean, # mean predictions
margin = FALSE,
scales = list(draw = FALSE),
at = v_col_pred,
col.regions = COLOR,
par.settings = list(axis.line = list(col = 0),
strip.background = list(col = "white")),
main = paste0("Predicted SOM [%] (mean) for ", YEAR[y], " from ", D_UPPER_NUM[d], "-", D_LOWER_NUM[d], "cm")
)
# NOTE that color scale is not ideal, because very hard to differentiate majority
# of soils with <10% SOM (especially when plotting lower quantiles, see below),
# for this we will later use a non-continuous/discrete color scale...
levelplot(
r_stack_predictions$pred50, # median (50th quantile) predictions
margin = FALSE,
scales = list(draw = FALSE),
at = v_col_pred,
col.regions = COLOR,
par.settings = list(axis.line = list(col = 0),
strip.background = list(col = "white")),
main = paste0("Predicted SOM [%] (median) for ", YEAR[y], " from ", D_UPPER_NUM[d], "-", D_LOWER_NUM[d], "cm")
)
levelplot(
r_stack_predictions$pred5, # 5th quantile predictions
margin = FALSE,
scales = list(draw = FALSE),
at = v_col_pred,
col.regions = COLOR,
par.settings = list(axis.line = list(col = 0),
strip.background = list(col = "white")),
main = paste0("Predicted SOM [%] (5th quantile) for ", YEAR[y], " from ", D_UPPER_NUM[d], "-", D_LOWER_NUM[d], "cm")
)
levelplot(
r_stack_predictions$pred95, # 95th quantile predictions
margin = FALSE,
scales = list(draw = FALSE),
at = v_col_pred,
col.regions = COLOR,
par.settings = list(axis.line = list(col = 0),
strip.background = list(col = "white")),
main = paste0("Predicted SOM [%] (95th quantile) for ", YEAR[y], " from ", D_UPPER_NUM[d], "-", D_LOWER_NUM[d], "cm")
)
# 5th quantile and 95th quantile give us idea of uncertainty of our predictions
# using one single metric, this is easier using 90th prediction interval (PI90)
levelplot(
r_stack_predictions$PI90, # 95th quantile predictions
margin = FALSE,
scales = list(draw = FALSE),
at = v_col_PI90,
col.regions = viridis(n = length(v_col_PI90), option = "viridis"),
par.settings = list(axis.line = list(col = 0),
strip.background = list(col = "white")),
main = paste0("90th PI of SOM [%] predictions for ", YEAR[y], " from ", D_UPPER_NUM[d], "-", D_LOWER_NUM[d], "cm")
)
# FOR QUARTO TUTORIAL, TIME LAPSE VIDEOS COME HERE!
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment