Skip to content
Snippets Groups Projects
Commit 0f4e3a26 authored by Helfenstein, Anatol's avatar Helfenstein, Anatol :speech_balloon:
Browse files

predictions every 5 cm between 0-2m depth along transect through NL to see SOM...

predictions every 5 cm between 0-2m depth along transect through NL to see SOM and deltaSOM variation over depth
parent 13a088a3
No related branches found
No related tags found
No related merge requests found
#-------------------------------------------------------------------------------
# Name: 61_soil_maps_predict_QRF_transect.R
# (QRF = quantile regression forest)
#
# Content: - define target variable (response) and target prediction depth
# - read in fitted quantile regression forest (QRF) model
# - read in stack of covariates (predictors)
# - predict response mean and quantiles (e.g. 50th/median, 5th & 95th
# quantiles) across transect
#
# Inputs: - Regression matrix:
# "out/data/model/[TARGET]/[static/dynamic]/tbl_regmat_[].Rds"
# - Fitted QRF: "out/data/model/QRF_fit_[response]_obs[]_p[].Rds"
# - Stack of rasters used for model fitting:
# "out/data/covariates/final_stack/"
# - Transect shapefile: "data/other/transect_depth.shp"
#
# Output: - plots of TARGET variable, delta TARGET variable over time and PI90
# along depth transect
#
# Project BIS-4D Masterclass
# Author: Anatol Helfenstein
# Updated: 2023-07-14
#-------------------------------------------------------------------------------
### empty memory and workspace; load required packages -------------------------
gc()
rm(list=ls())
pkgs <- c("tidyverse", "raster", "terra", "sf", "ranger", "foreach", "doParallel",
"rasterVis", "RColorBrewer", "viridis", "cowplot")
# "data.table" (possibly required for QRF predict?)
lapply(pkgs, library, character.only = TRUE)
# correct use of CRS according to PROJ 7 and higher, see e.g.:
# https://inbo.github.io/tutorials/tutorials/spatial_crs_coding/
# "raster" pkg still uses old strings even though we designate it correctly, so
# we can suppress this warning as follows:
options(rgdal_show_exportToProj4_warnings = "none")
### Designate script parameters & read in data ---------------------------------
# 1) Specify DSM target soil property:
TARGET = "SOM_per"
# 2) Specify whether to (log) transform, observation quality & dynamic or static model
TRANSFORM = "" # if no transformation of response is needed: empty quotes
OBS_QUAL = "_lab" # i.e. only lab measurements or also field estimates
TIME = "_dyn" # calibration 3D+T model using 2D+T & 3D+T dynamic covariates
TIME_DIR = "dynamic" # either "static" or "dynamic"; directory to save plots
if (TIME == "_dyn") {YEAR = c(1953, 2022)} # specify prediction year if 3D+T model
# 3) Specify target prediction depth [cm]
v_d = seq(0, 200, 2.5)
D_UPPER = paste0("d_", seq(v_d[1], min(tail(v_d, n = 3)), 5))
D_UPPER_NUM = v_d[seq(1, length(v_d) - 1, 2)]
D_LOWER = paste0("d_", seq(v_d[3], tail(v_d, n = 1), 5))
D_LOWER_NUM = v_d[seq(3, length(v_d), 2)]
D_MID = paste0("d_", seq(v_d[1], min(tail(v_d, n = 3)), 5),
"_", seq(v_d[3], tail(v_d, n = 1), 5))
D_MID_NUM = v_d[seq(2, length(v_d) - 1, 2)]
# 4) Regression matrix data containing calibration and validation data
tbl_regmat_target <- read_rds(paste0("out/data/model/", TARGET, "/", TIME_DIR,
"/tbl_regmat_", TARGET, TIME, ".Rds"))
# if model should only include lab measurements, remove field estimates
if (OBS_QUAL == "_lab") {
tbl_regmat_target <- tbl_regmat_target %>%
filter(!BIS_type == "field")
}
# Separate tables for calibration and validation data
tbl_regmat_target_cal <- tbl_regmat_target %>%
filter(split %in% "train")
tbl_regmat_target_val <- tbl_regmat_target %>%
filter(split %in% "test")
# read in transect line shapefile
sf_transect <- st_read("data/other/transect_depth.shp")
# 5) Number of covariates used in model
COV = ncol(dplyr::select(tbl_regmat_target,
-c(split:hor, year, all_of(TARGET))))
# 6) Value of K (in k-fold CV); i.e. number of folds in CV
K = 10
# 7) Specify which (previously fitted) model to use to predict new data:
QRF_FIT <- read_rds(paste0(
"out/data/model/", TARGET, "/", TIME_DIR, "/QRF_fit_", TRANSFORM, TARGET, OBS_QUAL, TIME,
"_obs", nrow(tbl_regmat_target_cal), "_p", COV, "_LLO_", K, "FCV_optimal.Rds"))
# summary of calibrated model (model fit)
QRF_FIT
# 8) Specify which quantiles we want to predict:
# 50th (median), 5th & 95th quantile to calculate 90th prediction interval (PI90)
QUANTILES = c(0.05, 0.50, 0.95)
# 9) Specify number of cores to use for parallel computation
THREADS = parallel::detectCores()
# locate, read in and stack static covariates
v_dir_cov <- dir("out/data/covariates/final_stack",
pattern = "\\.grd$", recursive = TRUE)
ls_r_cov <- foreach(cov = 1:length(v_dir_cov)) %do%
raster(paste0("out/data/covariates/final_stack/", v_dir_cov[[cov]]))
r_stack_cov <- stack(ls_r_cov)
# select static covariates used only for this particular TARGET variable
r_stack_cov <- r_stack_cov[[grep( "_1km", colnames(tbl_regmat_target), value = TRUE)]]
# locate covariates from which we will derive dynamic (2D+T and 3D+T) covariates
v_cov_dyn_names <- dir("out/data/covariates/final_stack_dyn",
pattern = "\\.grd$", recursive = TRUE) %>%
.[c(1:7, 12:16, 8:11)] # change order of HGNs and LGNs by year of creation
# read in covariates and make stack
ls_r_cov_dyn <- foreach(cov = 1:length(v_cov_dyn_names)) %do%
raster(paste0("out/data/covariates/final_stack_dyn/",
v_cov_dyn_names[[cov]]))
r_stack_cov_dyn <- stack(ls_r_cov_dyn)
# Read in other covariates useful for 3D+T modelling
# r_luc_freq <- raster("out/data/covariates/final_stack_dyn/luc_freq_1km.tif")
r_bodem50_2021_peatcode <- raster("out/data/covariates/final_stack/bodem50_2021_peatcode_1km.grd")
r_bodem50_2006_peatcode <- raster("out/data/covariates/final_stack/bodem50_2006_peatcode_1km.grd")
# Read in metadata of soilmap years: original mapping year and updated year
r_bodem50_2021_update <- raster("data/other/bodem50_2021_update_1km.tif")
r_bodem50_2006_date <- raster("data/other/bodem50_2006_date_1km.tif")
# add to raster stack
r_stack_cov_dyn <- stack(r_stack_cov_dyn,
r_bodem50_2021_peatcode, r_bodem50_2006_peatcode,
r_bodem50_2021_update, r_bodem50_2006_date)
### Extract covariate values & derive dynamic covariates across transect -------
# static covariates
tbl_cov_transect <- raster::extract(stack(r_stack_cov, r_stack_cov_dyn),
sf_transect,
cellnumbers = TRUE)
# extract cell numbers so that we can derive XY coordinates from them
# make into tibble (not sure why extract object is list? - hence we just want matrix)
tbl_cov_transect <- as_tibble(tbl_cov_transect[[1]])
# add coordinates cols, remove cell col & remove NA's (water & buildings)
tbl_cov_transect <- tbl_cov_transect %>%
add_column(X = xyFromCell(r_stack_cov, tbl_cov_transect$cell)[,1],
Y = xyFromCell(r_stack_cov, tbl_cov_transect$cell)[,2],
.before = names(r_stack_cov)[1]) %>%
dplyr::select(-cell) %>%
filter(across(names(r_stack_cov), ~ !is.na(.x)))
# list of regression matrices for target prediction years (e.g. 1953 & 2022)
ls_tbl_cov_transect <- map(YEAR, ~mutate(tbl_cov_transect, year = .x,
.before = names(r_stack_cov)[1]))
# read in dynamic covariate functions
# see also script "R/other/fun_cov_dyn_xyt_xydt.R" to see how functions work
source("R/other/fun_cov_dyn_xyt_xydt.R")
# compute dynamic covariate values
# land use (2D+T): LU_xyt[_delta]
ls_tbl_cov_dyn_transect <- foreach(y = 1:length(ls_tbl_cov_transect)) %do% {
LU_xyt(
data = ls_tbl_cov_transect[[y]],
year = "year",
LU = c("hgn_1900_filled_1km", "hgn_1960_1km", "hgn_1970_1km", "hgn_1980_1km",
"lgn1_1km", "hgn_1990_1km", "lgn2_1km", "lgn3_1km", "lgn4_1km",
"lgn5_1km", "lgn6_1km", "lgn7_1km", "lgn2018_1km", "lgn2019_1km",
"lgn2020_1km", "lgn2021_1km"),
LU_years = list(1913:1930, 1931:1965, 1966:1975, 1976:1982,
1983:1988, 1989:1990, 1991:1994, 1995:1998, 1999:2001,
2002:2005, 2006:2010, 2011:2015, 2016:2018, 2019,
2020, 2021:2022),
LU_year_min = 1953,
LU_year_max = 2022
) %>%
bind_cols(ls_tbl_cov_transect[[y]], .)
}
# peat categories (2D+T): peat[1:8]_xyt
# vector of colnames and add empty cols
v_colnames_peat_xyt <- paste0("peat", 1:8, "_xyt_1km")
ls_tbl_cov_dyn_transect <- map(
ls_tbl_cov_dyn_transect,
~add_column(.x, !!!set_names(as.list(rep(NA, 8)), nm = v_colnames_peat_xyt))
)
# fill in values of new cols using "peat_xyt" function
foreach(y = 1:length(ls_tbl_cov_dyn_transect)) %do% {
for (i in 1:length(v_colnames_peat_xyt)) {
ls_tbl_cov_dyn_transect[[y]] <- ls_tbl_cov_dyn_transect[[y]] %>%
mutate(across(starts_with(paste0("peat", i)),
~peat_xyt(version1 = bodem50_2006_peatcode_1km,
version2 = bodem50_2021_peatcode_1km,
version1_year = bodem50_2006_date_1km,
version2_year = bodem50_2021_update_1km,
class = i, year = year),
.names = v_colnames_peat_xyt[i]))
}
}
# add depth
ls_tbl_cov_dyn_transect <- foreach(y = 1:length(ls_tbl_cov_dyn_transect)) %do% {
map2(
D_UPPER_NUM, D_LOWER_NUM,
~add_column(ls_tbl_cov_dyn_transect[[y]], d_upper = .x, d_lower = .y, .after = "Y")
) %>%
bind_rows() %>%
arrange(X, Y, d_upper) %>%
mutate(d_mid = d_upper + (d_lower - d_upper)/2, .after = "d_lower")
}
# peat binary variable (3D+T): peat_xydt
ls_tbl_cov_dyn_transect <- foreach(y = 1:length(ls_tbl_cov_dyn_transect)) %do% {
mutate(ls_tbl_cov_dyn_transect[[y]],
peat_xydt_1km = peat_xydt(version1 = bodem50_2006_peatcode_1km,
version2 = bodem50_2021_peatcode_1km,
version1_year = bodem50_2006_date_1km,
version2_year = bodem50_2021_update_1km,
year = year,
d_upper = d_upper,
d_lower = d_lower))
}
# remove LU and peat maps used to derive dynamic covariates and name list
ls_tbl_cov_dyn_transect <- ls_tbl_cov_dyn_transect %>%
map(., ~dplyr::select(.x, -c(str_replace(v_cov_dyn_names, ".grd", ""),
contains("bodem")))) %>%
set_names(paste0("tbl_transect_", c(1953, 2022)))
### Predict TARGET variable along transect for multiple years ------------------
# Use modified ranger function from ISRIC to calculate mean in addition to the
# usual quantiles from QRF
source("R/other/predict_qrf_fun.R")
# predict independent test dataset for original sampling years
ls_qrf_tree_transect <- foreach(y = 1:length(ls_tbl_cov_dyn_transect)) %do% {
predict.ranger.tree(
QRF_FIT$finalModel,
data = ls_tbl_cov_dyn_transect[[y]],
type = "treepred"
)
}
# predict all quantiles and then also the mean
ls_tbl_pred_transect <- foreach(y = 1:length(ls_qrf_tree_transect)) %do% {
data.frame(t(apply(ls_qrf_tree_transect[[y]]$predictions,
1, quantile, QUANTILES, na.rm = TRUE))) %>%
as_tibble() %>%
rename_all(~ paste0("quant", QUANTILES * 100, "_", YEAR[y])) %>%
# predict mean value
add_column(pred_mean = apply(ls_qrf_tree_transect[[y]]$predictions,
1, mean, na.rm = TRUE),
.before = paste0("quant5_", YEAR[y]))
}
# add coordinates, depths & name list
ls_tbl_pred_transect <- ls_tbl_pred_transect %>%
map2(., ls_tbl_cov_dyn_transect, ~bind_cols(dplyr::select(.y, X:year), .x)) %>%
set_names(paste0("tbl_pred_", c(1953, 2022)))
# combine into one tibble including delta TARGET values
# (below did not work)
# map2(ls_tbl_pred_transect, YEAR,
# ~rename_with(.x, .fn = ~paste0("pred_mean_", .y), .cols = pred_mean))
tbl_pred_transect <- mutate(ls_tbl_pred_transect[[1]], ls_tbl_pred_transect[[2]]) %>%
dplyr::select(-year) %>%
rename(pred_mean_2022 = pred_mean) %>%
add_column(pred_mean_1953 = ls_tbl_pred_transect$tbl_pred_1953$pred_mean,
.after = "d_mid") %>%
mutate(pred_mean_delta = pred_mean_2022 - pred_mean_1953,
quant50_delta = quant50_2022 - quant50_1953,
PI90_1953 = quant95_1953 - quant5_1953,
PI90_2022 = quant95_2022 - quant5_2022) %>%
dplyr::select(-c(contains("quant5_"), contains("quant95_")))
# split into list of depth layers for mean, median and PI90 predictions
ls_tbl_pred_transect_d <- map(
colnames(tbl_pred_transect[-(1:5)]),
~dplyr::select(tbl_pred_transect, X, Y, d_mid, all_of(.x)) %>%
group_split(d_mid, .keep = FALSE) %>%
set_names(D_MID)
) %>%
set_names(colnames(tbl_pred_transect[-(1:5)]))
# convert to raster stack for spatial plotting
ls_r_stack_pred <- map(
ls_tbl_pred_transect_d,
~map(.x,
~rasterFromXYZ(.x, crs = r_stack_cov)) %>%
stack(.)
)
# designate depth as "z coordinate" to make hovmoller plot (see below)
ls_r_stack_pred <- map(
ls_r_stack_pred,
~setZ(.x, D_MID_NUM, name = "Depth [cm]")
)
### Plot depth transect (direction vs. depth) ----------------------------------
# use rasterVis::hovmoller func to plot depth transect
# (instead of time on y-axis, as is typical for hovmoller, plot over depth)
# plot depth transect: mean & median predictions for 1953 & 2022
ls_m_transect_pred <- foreach(i = 1:4) %do% {
hovmoller(
ls_r_stack_pred[[i]],
dirXY = x, # x = easting; y = northing
at = c(0, 1, 2.5, 5, 10, 25, 100),
# panel = panel.levelplot.raster,
# interpolate = TRUE,
xscale.components = xscale.raster.subticks,
yscale.components = yscale.raster.subticks,
col.regions = brewer.pal(6, "YlOrBr"),
xlab = "Easting [EPSG:28992]",
ylim = c(200, 0), # reverse depth order from surface to 2m
ylab = "Depth [cm]",
main = c(expression("Mean SOM [%] in 1953"),
expression("Mean SOM [%] in 2022"),
expression("Median SOM [%] in 1953"),
expression("Median SOM [%] in 2022"))[i]
)
}
# save to disk
pdf(paste0("out/maps/target/", TARGET, "/dynamic/pdf_png_gif/m_", TARGET,
OBS_QUAL, "_depth_transect_mean_median_1953_2022.pdf"),
height = 3, width = 10)
ls_m_transect_pred
dev.off()
# plot depth transect: delta mean & delta median predictions for 1953-2022
ls_m_transect_pred_delta <- foreach(i = 1:2) %do% {
hovmoller(
ls_r_stack_pred[[i+4]],
dirXY = x,
at = c(-100, -25, -10, -5, -2.5, -1, 1, 2.5, 5, 10, 25, 100),
xscale.components = xscale.raster.subticks,
yscale.components = yscale.raster.subticks,
col.regions = brewer.pal(11, "RdBu"),
xlab = "Easting [EPSG:28992]",
ylim = c(200, 0),
ylab = "Depth [cm]",
main = c(expression("Mean "*Delta*"SOM [%] 1953-2022"),
expression("Median "*Delta*"SOM [%] 1953-2022"))[i]
)
}
# save to disk
pdf(paste0("out/maps/target/", TARGET, "/dynamic/pdf_png_gif/delta_2022_1953/m_", TARGET,
OBS_QUAL, "_depth_transect_delta_mean_median.pdf"),
height = 3, width = 10)
ls_m_transect_pred_delta
dev.off()
# plot depth transect: PI90 predictions for 1953 & 2022
ls_m_transect_PI90 <- foreach(i = 1:2) %do% {
hovmoller(
ls_r_stack_pred[[i+6]],
dirXY = x,
at = 0:100,
xscale.components = xscale.raster.subticks,
yscale.components = yscale.raster.subticks,
col.regions = viridis(n = length(0:100), option = "viridis"),
xlab = "Easting [EPSG:28992]",
ylim = c(200, 0),
ylab = "Depth [cm]",
main = c(expression("PI90 of SOM [%] in 1953"),
expression("PI90 of SOM [%] in 2022"))[i]
)
}
# save to disk
pdf(paste0("out/maps/target/", TARGET, "/dynamic/pdf_png_gif/m_", TARGET,
OBS_QUAL, "_depth_transect_PI90_1953_2022.pdf"),
height = 3, width = 10)
ls_m_transect_PI90
dev.off()
# for paper, plot median 2022 prediction & delta median
# without legend because we use legend from QGIS plots a-d
m_transect_pred50_2022 <- hovmoller(
ls_r_stack_pred$quant50_2022,
dirXY = x,
at = c(0, 1, 2.5, 5, 10, 25, 100),
xscale.components = xscale.raster.subticks,
yscale.components = yscale.raster.subticks,
col.regions = brewer.pal(6, "YlOrBr"),
xlab = NULL, # "Easting [EPSG:28992]",
ylim = c(200, 0),
ylab = "Depth [cm]",
colorkey = FALSE
)
m_transect_pred50_delta <- hovmoller(
ls_r_stack_pred$quant50_delta,
dirXY = x,
at = c(-100, -25, -10, -5, -2.5, -1, 1, 2.5, 5, 10, 25, 100),
xscale.components = xscale.raster.subticks,
yscale.components = yscale.raster.subticks,
col.regions = brewer.pal(11, "RdBu"),
xlab = "Easting [EPSG:28992] along transect",
ylim = c(200, 0),
ylab = "Depth [cm]",
colorkey = FALSE
)
# combine using cowplot
# allow space to left of plots for QGIS legend; format same as QGIS maps
m_transect_pred <- plot_grid(
m_transect_pred50_2022, m_transect_pred50_delta,
nrow = 2, ncol = 1, align = "hv",
axis = "tblr", labels = c("e)", "f)"),
label_fontface = "bold", label_size = 18, label_fontfamily = "arial black"
)
# save plot to disk
ggsave(paste0("out/maps/target/", TARGET, "/dynamic/pdf_png_gif/m_", TARGET,
OBS_QUAL, "_depth_transect_median_2022_delta.png"),
m_transect_pred,
width = 10, height = 5)
UTF-8
\ No newline at end of file
File added
PROJCS["RD_New",GEOGCS["GCS_Amersfoort",DATUM["D_Amersfoort",SPHEROID["Bessel_1841",6377397.155,299.1528128]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Double_Stereographic"],PARAMETER["False_Easting",155000.0],PARAMETER["False_Northing",463000.0],PARAMETER["Central_Meridian",5.38763888888889],PARAMETER["Scale_Factor",0.9999079],PARAMETER["Latitude_Of_Origin",52.1561605555556],UNIT["Meter",1.0]]
\ No newline at end of file
File added
File added
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment