Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • main
1 result

Target

Select target project
  • zina.janssen/map-differences-training-data
1 result
Select Git revision
  • main
1 result
Show changes
Commits on Source (3)
Showing
with 2373 additions and 0 deletions
This diff is collapsed.
This diff is collapsed.
# Title: select relevant DW tiles
# Name: Zina Janssen
# Created: 20/09
# Description: this is the script for reading tab delimited data and use it to keep the usefull tiles
### 1 Initializing ####
# load the required functions
# make output dirs
relevantFiles <- "1_int_fileSelect"
if (!dir.exists(relevantFiles)) {dir.create(relevantFiles)}
# load functions
source('../funcs/0_checkLoadPackages.R')
source("../funcs/1_in_writeFiles.R")
# load and install all required packages
listOfPackages<-c("sf","terra","dplyr","parallel","geodata")
lapply(listOfPackages, checkInstall) -> output
# you can download the DW training data from https://download.pangaea.de/dataset/933475/allfiles.zip
# this requires an pangea account
# subsequently, all folders with the "WH" tag were manually removed as the AOI considers the Easter hemisphere
# load the required data
researchArea <- geodata::gadm("Spain", level = 0, path = ".")
rasterNameList <- list.files(path="1_int_Reprojected",pattern = glob2rx("*.tif"),full.names=T)
raster<- rast(rasterNameList[100])
relate(raster,researchArea,relation="disjoint")
if(!relate(raster,researchArea,relation="disjoint")[1]){print("test")}
### 2 Processing ####
# loop over all the folder numbers and delete all the files that are not
# relevant for our locations
LCnums <- seq(from=1,to=14)
# rasters are reprojected and then it is checked if they are within the research area
# if so they are stored
# do the processing on the expert tiles
foldname <- "Experts_tiles/"
mclapply(LCnums,fileClean,researchArea=researchArea,outputdir=relevantFiles,foldname=foldname) -> outputSamples
# do the same for the non expert tiles
foldname <- "Non_expert_tiles/"
mclapply(LCnums,fileClean,researchArea,outputdir=relevantFiles,foldname)-> outputSamples
# Title: Reduce the CGLS data set based on ESA
# Name: Zina Janssen
# create: 14/03
# this script reads the CGLS points with the added ESA classes and removes the CGLS point
# where the classes do not overlap
#### 1 Initializing ####
source('../funcs/0_checkLoadPackages.R')
source('../funcs/1_out_classFrequencyTable.R')
# load all packages
listOfPackages <- c("sf","dplyr","purrr","terra")
lapply(listOfPackages, checkInstall) -> output
# create output dir and load input dit
CGLSext <- "1_in_ESACGLS"
CGLSoutputs <- "../../CGL-100/1_out_CGLS"
if (!dir.exists(CGLSoutputs)) {dir.create(CGLSoutputs)}
# load the complete data set and the shp with which the thing can be subset
filename<-file.path(CGLSoutputs,'CGLSwithESA.shp')
CGLS <- st_read(filename)
# now change all the landcover labels
# snow and ice and mangroves were not present so these changes were not made
CGLS[CGLS$Map == 10,]$Map <- 3
CGLS[CGLS$Map == 20,]$Map <- 4
CGLS[CGLS$Map == 30,]$Map <- 5
CGLS[CGLS$Map == 40,]$Map <- 2
CGLS[CGLS$Map == 50,]$Map <- 1
CGLS[CGLS$Map == 60,]$Map <- 6
#CGLS[CGLS$Map == 70,]$Map <- 6
CGLS[CGLS$Map == 80,]$Map <- 7
CGLS[CGLS$Map == 90,]$Map <- 8
# select the columns of CGLS which are equal to the ESA map
CGLS$equal <- CGLS$LC1_lbl == CGLS$Map
equal<-CGLS[which(CGLS$equal),]
# save the first reduced version
st_write(equal,file.path(CGLSoutputs,"1_out_CGLSESAcorrected.shp"),append=F)
classFreqTable(equal$LC1_lbl,file.path(CGLSoutputs,"1_out_CGLSClassFreqExt.csv"))
# Title: load and mosaic tiff files
# Name: Zina Janssen
# Created: 29/09
### this is the script for loading the DW tiles and sample them s.t they are points
### 1 Initializing ####
# load functions
source('../funcs/0_checkLoadPackages.R')
source('../funcs/1_out_DWsampleTiles.R')
source('../funcs/1_out_classFrequencyTable.R')
# load and install all required packages
listOfPackages<-c("stars","sf","terra","purrr", "dplyr","readr","future","geodata")
lapply(listOfPackages, checkInstall) -> output
# make output dirs
fileSel <- "1_int_fileSelect"
if (!dir.exists(fileSel)) {dir.create(fileSel)}
DWtrainingdata <- "../../DW/1_out_DW"
if (!dir.exists(DWtrainingdata)) {dir.create(DWtrainingdata)}
# load the required data
researchArea <- geodata::gadm("Spain", level = 0, path = ".")
sf_AOI <- st_as_sf(researchArea)
#### 2 Processing ####
# make a buffer to ensure inclusion of sufficient TD points
AOIbuf<- st_buffer(sf_AOI,dist =5000)
#### 2.1 Sampling ####
# now we will sample from the tiles while conserving the class distribution that is present
rasterNameList <- list.files(path=fileSel,pattern = glob2rx("*.tif"),full.names=T)
# run the sampling and write csv files
plan(multisession, workers = 11)
furrr::future_map(rasterNameList,.f=propStratSample,sampleSize=10,outputfold=DWtrainingdata) -> outputSamples
# this code reads the csvs with the sampled points and creates a large csv file
# code adapted from https://www.statology.org/r-merge-csv-files/
pointsWithValues <- list.files(path=DWtrainingdata,full.names = T) %>%
lapply(read_csv,show_col_types = FALSE) %>%
bind_rows
# now remove all the intermediate files
list.files(path=DWtrainingdata,full.names = T) %>% lapply(file.remove)
# # now we do the land cover class harmonization based on LUCAS harmonization
# remove no data and cloud class
pointsWithValues<- pointsWithValues[which(pointsWithValues$class != 0 & pointsWithValues$class != 10),]
# transform the classes to the lucas classes
pointsWithValues$LC1_lbl <- NA
pointsWithValues[pointsWithValues$class == 1,]$LC1_lbl <- 7
pointsWithValues[pointsWithValues$class == 2,]$LC1_lbl <- 3
pointsWithValues[pointsWithValues$class == 3,]$LC1_lbl <- 5
pointsWithValues[pointsWithValues$class == 4,]$LC1_lbl <- 8
pointsWithValues[pointsWithValues$class == 5,]$LC1_lbl <- 2
pointsWithValues[pointsWithValues$class == 6,]$LC1_lbl <- 4
pointsWithValues[pointsWithValues$class == 7,]$LC1_lbl <- 1
pointsWithValues[pointsWithValues$class == 8,]$LC1_lbl <- 6
pointsWithValues[pointsWithValues$class == 9,]$LC1_lbl <- 6
# make a sp object and subset it to the AOI
spatPoints <- st_as_sf(pointsWithValues,coords=c("x","y"),crs=st_crs(4326))
subsetDW <- st_filter(spatPoints,AOIbuf)
# save the frequencyt able
filename <- file.path(DWtrainingdata,paste0("1_out_DWclassFreqSpain.csv"))
classFreqTable(subsetDW$LC1_lbl,filename)
# save the shapefile
sf::st_write(subsetDW,file.path(DWtrainingdata,"1_in_DWTrainingPoints.shp"),driver="ESRI Shapefile",append=F,delete_layer=T)
# Title: Prepare CGLS training data
# Name: Zina Janssen
# create: 09/11
# description: this script loads and subset the CGLS data. Data are not freely available
#### 1 Initializing ####
source('../funcs/0_checkLoadPackages.R')
source('../funcs/1_out_classFrequencyTable.R')
source('../funcs/1_in_extractCGLSpoints.R')
# load all packages
listOfPackages <- c("sf","dplyr","stars","purrr","terra", "future")
lapply(listOfPackages, checkInstall) -> output
# create output dir
CGLSoutputs <- "../../CGL-100/1_out_CGLS"
if (!dir.exists(CGLSoutputs)) {dir.create(CGLSoutputs)}
# set the country that will be used for subsetting
Country <- "Spain"
# load the complete data set and the shp with which the data set can be subset
fileLoc <- "../../CGL-100/training_data_2015_100m_20190402_V4.csv"
# load the file
CGLS <- read.csv(fileLoc)
# load the aoi
researchArea <- geodata::gadm(Country, level = 0, path = tempdir())
sf_AOI <- st_as_sf(researchArea)
# buffer so all points are included
countryBuf <- st_buffer(sf_AOI,dist =5000)
# subset the files using a shapefile of spain
CGLSspat <- st_as_sf(CGLS,coords=c("x","y"))
st_crs(CGLSspat) <- 4326
subsetCGLS <- CGLSspat[countryBuf,]
# now do some harmonization
# remove the unsure class
subsetCGLSsubs<- subsetCGLS[which(subsetCGLS$dominant_lc != "not_sure"),]
# transform the classes to the lucas classes
subsetCGLSsubs$LC1_lbl <- NA
subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "grassland",]$LC1_lbl <- 5
subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "tree",]$LC1_lbl <- 3
subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "wetland_herbaceous",]$LC1_lbl <- 8
subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "crops" ,]$LC1_lbl <- 2
subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "shrub",]$LC1_lbl <- 4
subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "bare",]$LC1_lbl <- 6
subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "water",]$LC1_lbl <- 7
subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "urban_built_up",]$LC1_lbl <- 1
subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "urban/built-up" ,]$LC1_lbl <- 1
subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "fallow_shifting_cultivation" ,]$LC1_lbl <- 2
# remove the redudant columns
subsetCGLSsubs <- subset(subsetCGLSsubs,select=-c(bare,burnt,crops,fallow_shifting_cultivation,grassland,lichen_and_moss,shrub,
snow_and_ice,tree,urban_built_up,water,wetland_herbaceous,not_sure,
forest_type))
# save the subset as a shp
sf::st_write(subsetCGLSsubs,file.path(CGLSoutputs,"1_in_CGLSTD.shp"),driver="ESRI Shapefile",append=F)
# save the frequency table
filename <- file.path(CGLSoutputs,paste0("1_out_CGLSClassFreq",Country,".csv"))
classFreqTable(subsetCGLSsubs$LC1_lbl,filename)
# now we will extent the amount of points
## first only select the relevant columns
CGLSwithLCLabel <- subset(subsetCGLSsubs,select=c(geometry,LC1_lbl))
#now buffer so we have a surface to extract from
CGLSbuffer <- st_buffer(CGLSwithLCLabel,dist = 100)
# now sample from all the buffered points
# ensure all but one cores are used for processing
workers <- parallel::detectCores() - 1
plan(multisession, workers = workers)
CGLSpoints <- CGLSbuffer %>%
group_by(geometry) %>%
group_split() %>%
furrr::future_map(.f=extractPoint)
CGLSpoints <- bind_rows(CGLSpoints)
# now combine this with the existing points
completeData <- rbind(CGLSwithLCLabel,st_as_sf(CGLSpoints,coords=c("x","y"),crs=st_crs(CGLSwithLCLabel)))
CGLSextendedPoints<-st_as_sf(CGLSpoints,coords=c("x","y"),crs=st_crs(CGLSwithLCLabel))
# also save the new CGLS TD
sf::st_write(completeData,file.path(CGLSoutputs,"1_in_CGLSTDextended.shp"),driver="ESRI Shapefile",append=F)
#### NOTE THAT FILTERING BASED ON OVERLAP WITH ESA CLSSES IS STILL REQUIRED FOR QUALITY ASSURANCE
# save the frequencytable
filename <- file.path(CGLSoutputs,paste0("1_out_CGLSExtendedClassFreq",Country,".csv"))
classFreqTable(completeData$LC1_lbl,filename)
# Title: Create barplots
# Name: Zina Janssen
# Created: 16/03
# Description: this script takes a frequency and area tables and creates barplots and treeplots
#### 1 Initializing ####
source('../funcs/0_checkLoadPackages.R')
listOfPackages <- c("ggplot2","scales","treemapify","extrafont","readr","tidyr")
lapply(listOfPackages, checkInstall) -> output
source('../funcs/1_out_treeMaps.R')
# link to output dirs
## change this to where you have saved the data
CGLSdir<- "../../CGL-100/1_out_CGLS"
DWdir <- "../../DW/1_out_DW"
LUCASdir <- "../../LUCAS/1_out_LUCAS"
outAreaTables <- '../2_difference_evaluation/2_out_AreaTables'
outAreaTables3 <- '../2_difference_evaluation/3_out_AreaTables'
# create output directories
outputfigures <- "../../../figures/1_out_plots"
if (!dir.exists(outputfigures)) {dir.create(outputfigures)}
outputfigures3 <- "../../../figures/3_out_plots"
if (!dir.exists(outputfigures3)) {dir.create(outputfigures3)}
# load input data
CGLS <- read.table(file.path(CGLSdir,"1_out_CGLSExtendedClassFreqSpain.csv"),sep=",")
DW <- read.table(file.path(DWdir,"1_out_DWClassFreqSpain.csv"),sep=",")
LUCAS <- read.table(file.path(LUCASdir,"1_out_LUCASClassFreqSpain.csv"),sep=",")
classArea <- read_csv(file.path(outAreaTables,"2_out_classAreaForPlot.csv"))
areaPercentages <- read_csv(file.path(outAreaTables,"2_out_areaPercentages.csv"),col_select = -c(...1))
classAreaTable <- read_csv(file.path(outAreaTables3,"3_out_classAreaTable.csv"))
classArea3 <- read_csv(file.path(outAreaTables3,"3_out_classAreaForPlot.csv"))
areaPercentages3 <- read_csv(file.path(outAreaTables3,"3_out_areaPercentages.csv"),col_select = -c(...1))
# transform the plots to make them suitable for plotting
CGLSplot <- pivot(CGLS,"CGLS")
DWplot <- pivot(DW,"DW")
LUCASplot <- pivot(LUCAS,"LUCAS")
plotDF <- rbind(CGLSplot,DWplot,LUCASplot)
# transform the area data to make them suitable for plotting as well
CGLSarea <- pivot(classArea[1,],"CGLS")
DWarea<- pivot(classArea[2,],"DW")
LUCASarea<- pivot(classArea[3,],"LUCAS")
areaDF <- rbind(CGLSarea,DWarea,LUCASarea)
# transform the area data to make them suitable for treemaps as well
CGLSareaTr <- pivot2(classAreaTable[1:2,],"CGLS")
DWareaTr<- pivot2(classAreaTable[3:4,],"DW")
LUCASareaTr<- pivot2(classAreaTable[5:6,],"LUCAS")
# transform the satellite area data to make them suitable for plotting as well
S1CGarea <- pivot(classArea3[1,],"S1")
L8CGarea <- pivot(classArea3[2,],"L8")
S2CGarea <- CGLSarea
S2CGarea$name <- "S2"
areaCGLSsat <- rbind(S1CGarea,L8CGarea,S2CGarea)
S1DWarea <- pivot(classArea3[3,],"S1")
L8DWarea <- pivot(classArea3[4,],"L8")
S2DWarea <- DWarea
S2DWarea$name <- "S2"
areaDWsat <- rbind(S1DWarea,L8DWarea,S2DWarea)
S1LUarea <- pivot(classArea3[5,],"S1")
L8LUarea <- pivot(classArea3[6,],"L8")
S2LUarea <- LUCASarea
S2LUarea$name <- "S2"
areaLUCASsat <- rbind(S1LUarea,L8LUarea,S2LUarea)
# make the spatial consistency data useablefor plotting as well
percentageDf <- areaPercentages[grepl(glob2rx("%*"),areaPercentages$class),]
percentageDf <- percentageDf %>% replace_na(list(`3` =0))
# make the spatial consistency data useablefor plotting as well
percentageDf <- areaPercentages3[grepl(glob2rx("%*"),areaPercentages3$class),]
percentageDf <- percentageDf %>% replace_na(list(`3` =0))
# create the treemaps
MakeTreeMap(CGLSplot,"CGLS")
MakeTreeMap(DWplot,"DW")
MakeTreeMap(LUCASplot,"LUCAS")
# create the treemaps for class areas
MakeTreeMap(CGLSareaTr,"CGLS based Land Cover Map")
MakeTreeMap(DWareaTr,"DW based Land Cover Map")
MakeTreeMap(LUCASareaTr,"LUCAS based Land Cover Map")
# make the barplot with all the classfrequencies
pl<-ggplot(data=plotDF, aes(x=class, y=percentage,fill=name)) +
geom_bar(position=position_dodge(),stat="identity",color="black",) +
#theme_grey(base_family = "Times") +
#theme_minimal(base_family = "Times") +
scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
xlab("Class")+
ylab("Percentage [%]") +
labs(fill='') +
ylim(0,36)+
ggtitle(paste0("Comparison of class frequency distributions"))
pl + theme(axis.text = element_text(size = 15),axis.title = element_text(size = 15))
ggsave(pl,file=file.path(outputfigures,("frequencyBarPlot.png")),dpi=200)
# make the barplot with all the classfrequencies for the classified image
pl<-ggplot(data=areaDF, aes(x=class, y=percentage,fill=name)) +
geom_bar(position=position_dodge(),stat="identity",color="black",) +
scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
xlab("Class")+
ylab("Percentage [%]") +
labs(fill='') +
ylim(0,48)+
ggtitle(paste0("Class distributions in land cover maps based different on training data"))
pl + theme(axis.text = element_text(size = 15),axis.title = element_text(size = 15))
ggsave(pl,file=file.path(outputfigures,("classAreaBarPlot.png")),dpi=200)
# make a combined plot for the satellite images
# make the barplot with all the classfrequencies for the classified image
pla<-ggplot(data=areaDF, aes(x=class, y=percentage,fill=name)) +
geom_bar(position=position_dodge(),stat="identity",color="black",) +
scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
xlab("Class")+
ylab("Percentage [%]") +
labs(fill='') +
ylim(0,48)+
ggtitle(paste0("Class area based on different training data"))
pla + theme(axis.text = element_text(size = 10),axis.title = element_text(size = 10))
pl<-ggplot(data=areaCGLSsat, aes(x=class, y=percentage,fill=name)) +
geom_bar(position=position_dodge(),stat="identity",color="black",) +
scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
xlab("Class")+
ylab("Percentage [%]") +
labs(fill='') +
ylim(0,48)+
ggtitle(paste0("CGLS based class area with different RS input"))
pl + theme(axis.text = element_text(size = 10),axis.title = element_text(size = 10))
pli<-ggplot(data=areaDWsat, aes(x=class, y=percentage,fill=name)) +
geom_bar(position=position_dodge(),stat="identity",color="black",) +
scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
xlab("Class")+
ylab("Percentage [%]") +
labs(fill='') +
ylim(0,48)+
ggtitle(paste0("DW based class area with different RS input"))
pli + theme(axis.text = element_text(size = 10),axis.title = element_text(size = 10))
plo<-ggplot(data=areaLUCASsat, aes(x=class, y=percentage,fill=name)) +
geom_bar(position=position_dodge(),stat="identity",color="black",) +
scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
xlab("Class")+
ylab("Percentage [%]") +
labs(fill='') +
ylim(0,48)+
ggtitle(paste0("LUCAS based class area with different RS input"))
plo + theme(axis.text = element_text(size = 5),axis.title = element_text(size =5))
pz<-ggarrange(pla,pl,pli, plo, ncol=2, nrow = 2, common.legend = F, legend="right")
ggsave(plot=pz,file=file.path(outputfigures,("classAreas.png")),dpi=200,width = 9.1)
# Title: add copernicus attributes to the polygons, harmonize the data and save the TD points
# Name: Zina Janssen
# Created: 26/09
### This script loads the LUCAS polygons and adds the respective attributes based on point id
# the points are then saved and subset to the location of interest
#### 1 Initializing ####
source('../funcs/0_checkLoadPackages.R')
source('../funcs/1_out_classFrequencyTable.R')
# load packages
listOfPackages <- c("sf","dplyr","readr","purrr","terra")
lapply(listOfPackages, checkInstall) -> output
# create directories for storing the outputs
LUCASint <- "../../LUCAS/1_out_LUCAS/data"
if (!dir.exists(LUCASint)) {dir.create(LUCASint)}
LUCASoutputs <- "../../LUCAS/1_out_LUCAS"
if (!dir.exists(LUCASoutputs)) {dir.create(LUCASoutputs)}
# load the AOI
researchArea <- geodata::gadm("Spain", level = 0, path = ".")
sf_AOI <- st_as_sf(researchArea)
# the buffer is 5km
countryBuf <- st_buffer(sf_AOI,dist =5000)
## Download the input data and load them
url_lucasharmo2006_survey<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/1_table/lucas_harmo_uf_2006.zip'
url_lucasharmo2009_survey<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/1_table/lucas_harmo_uf_2009.zip'
url_lucasharmo2012_survey<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/1_table/lucas_harmo_uf_2012.zip'
url_lucasharmo2015_survey<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/1_table/lucas_harmo_uf_2015.zip'
url_lucasharmo2018_survey<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/1_table/lucas_harmo_uf_2018.zip'
url_lucas_geometry<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/2_geometry/LUCAS_th_geom.zip'
# Download LUCAS harmo tables
urlList <- list(url_lucasharmo2006_survey,url_lucasharmo2009_survey,url_lucasharmo2012_survey,url_lucasharmo2015_survey,url_lucasharmo2018_survey,url_lucas_geometry)
# only the 2018 file contains the copernicus module so only download the geometries and 2018
downloadFromUrl <- function(file,outputDir){
destfile<-file.path(outputDir,'data',basename(file))
if(!file.exists(destfile)){
download.file(file,destfile=destfile)}
}
# download all survey information
lapply(urlList, downloadFromUrl,outputDir=LUCASoutputs) -> output
#### 2 Processsing ####
# Read the shapefile file with the theroretical points locations
unzip(file.path(LUCASint,basename(url_lucas_geometry)),exdir=file.path(LUCASoutputs,'data'))
lucas_geom<-st_read(file.path(LUCASoutputs,'data','LUCAS_th_geom.shp')) # read the grid
# only select the points from 2018
lucas_geom <- lucas_geom[lucas_geom$YEAR=="2018",]
# Read the shapefile file with the theroretical points locations
# load the survey results
unzipFiles <- function(name,outdir){
unzip(file.path(outdir,'data',basename(name)),exdir=file.path(outdir,'data'))
}
# unzip all the files and store the names in a list
fileNameList <- unlist(lapply(urlList, unzipFiles,outdir=LUCASoutputs))
# load all the csvs
Luc2006<- read_csv(fileNameList[[1]])
Luc2009<- read_csv(fileNameList[[2]])
Luc2012<-read_csv(fileNameList[[3]])
Luc2015<-read_csv(fileNameList[[4]])
Luc2018<-read_csv(fileNameList[[5]])
# remove redundant columns, some of which are corrupted
cleanFile <- function(file){
# folowing the approach of Plugmacher et al (2019) we remove these classes
listOfExcl <- c("A22",'A39',"B55","E30","F40")
new_df <- subset(file, !(lc1 %in% listOfExcl))
subset <- subset(new_df, select = c(point_id,letter_group))
names(subset) <- c("point_id","LC1_label")
return(subset)
}
files <- list(Luc2006,Luc2009,Luc2012,Luc2015,Luc2018)
cleanFiles <- lapply(files, cleanFile)
# Merge LUCAS grid (point location) with LUCAS survey data
CopPoly<-terra::merge(x = lucas_geom, y = Luc2018, by.x = "POINT_ID",by.y = "point_id") # merge them
# subset the lucas points to the AOI
CopPointsSub <- CopPoly[countryBuf,]
# change the letters to numbers which is required for GEE
CopPointsSub[which(CopPointsSub$LC1_label == "A"),]$LC1_label <- "1"
CopPointsSub[which(CopPointsSub$LC1_label == "B"),]$LC1_label <- "2"
CopPointsSub[which(CopPointsSub$LC1_label == "C"),]$LC1_label <- "3"
CopPointsSub[which(CopPointsSub$LC1_label == "D"),]$LC1_label <- "4"
CopPointsSub[which(CopPointsSub$LC1_label == "E"),]$LC1_label <- "5"
CopPointsSub[which(CopPointsSub$LC1_label == "F"),]$LC1_label <- "6"
CopPointsSub[which(CopPointsSub$LC1_label == "G"),]$LC1_label <- "7"
CopPointsSub[which(CopPointsSub$LC1_label == "H"),]$LC1_label <- "8"
# change the column to numeric values, otherwise earth engine doesnt work
CopPointsSub$LC1_label <- as.numeric(CopPointsSub$LC1_label)
# ensure there are no duplicates
CopPointsSorted <- CopPointsSub[order(CopPointsSub$YEAR),]
CopPointsSubUn <- CopPointsSorted[!duplicated(CopPointsSorted,fromLast=T),]
# remove redundant columns
CopPointsSubUn <- subset(CopPointsSubUn, select = c(LC1_label,geometry))
# save shapefile
sf::st_write(reduced,file.path(LUCASoutputs,paste0("1_out_LUCASTDPoints.shp")),append=F)
# create frequency table
CopPointsSubUn <- st_read(file.path(LUCASoutputs,"1_out_LUCASTDPoints.shp"))
filename <- file.path(LUCASoutputs,paste0("1_out_LUCASclassFreqSpain.csv"))
classFreqTable(CopPointsSubUn$LC1_label,filename)
# Title: Create maps for displaying the training data
# Name: Zina Janssen
# create: 03/03
# this script loads training data points and creates maps that visualize the points with the total number of points
#### 1 Initializing ####
source('../funcs/0_checkLoadPackages.R')
# load all packages
listOfPackages <- c("sf","readr","terra")
lapply(listOfPackages, checkInstall) -> output
source('../funcs/1_out_TDlocMaps.R')
# get storage dirs
CGLSdir<- "../../CGL-100/1_out_CGLS"
DWdir <- "../../DW/1_out_DW"
LUCASdir <- "../../LUCAS/1_out_LUCAS"
# create outputdirs
outputmaps <- "../../../figures/1_out_TDmaps"
if (!dir.exists(outputmaps)) {dir.create(outputmaps)}
# load training points
CGLS <- sf::st_read(file.path(CGLSdir,"1_out_CGLSESAcorrected.shp"))
DW <- sf::st_read(file.path(DWdir,"1_in_DWTrainingPoints.shp"))
LUCAS <- st_read(file.path(LUCASdir,"1_out_LUCASTDPoints.shp"))
#### 2 Processing ####
# now create the maps with the td locations
makeTDLocMap(CGLS,outfold=outputmaps,outname="1_out_CLGSTD.pdf","Copernicus Global Land Services (CGLS) Training Data")
# DW normal and the changed class distribution
makeTDLocMap(DW,outfold=outputmaps,outname="1_out_DWTD.pdf","Dynamic World (DW) Training Data")
# LUCAS normal and the changed class distribution
makeTDLocMap(LUCAS,outfold=outputmaps,outname="1_out_LUCASTD.pdf"," Land Use/Cover Area frame Survey (LUCAS) Training Data",pointcex=0.1)
# Title: create mosaic of the FROM GLC tiles
# Name: Zina Janssen
# Created: 06/12
### this script loads the FROM gcl map and finds the most hetereogeneous areas
#### 1 Initializing ####
source('../funcs/0_checkLoadPackages.R')
listOfPackages <- c("sf","dplyr","purrr","terra")
lapply(listOfPackages, checkInstall) -> output
## create dirs to store the output map
# you can download the FROM GLC tiles through http://data.ess.tsinghua.edu.cn/
# it is best to select the tiles that correspond with the coordinates of your AOI
LCMapsin <- "../../FROM-GCLS/2_in_GLCS"
if (!dir.exists(LCMapsin)) {dir.create(LCMapsin)}
LCMapsOut <- "2_out_GLCS"
if (!dir.exists(LCMapsOut)) {dir.create(LCMapsOut)}
# load outline of spain to subset the raster
researchArea <- geodata::gadm("Spain", level = 0, path = ".")
sf_AOI <- st_as_sf(researchArea)
# load all the tiles
listofFiles <- list.files("../../FROM-GCLS/2_in_FROM_GLCTiles",full.names = T)
listofRasters <- lapply(listofFiles, rast)
rasCollection <- sprc(listofRasters)
# now merge the rasters. The file is written to disk to prevent crashing the R session
LCMap <- terra::merge(rasCollection,filename=file.path(LCMapsin,"2_in_GLCSrast.tif"))
LCMap <- rast(file.path(LCMapsOut,"GLC_entropy_map.tif"))
# we use the bbox that is also used for the grid construction to crop the raster
# if we would use the spain shp, the oversees terretories are also included so that is annoying
spainExt <-ext(-11, 37, 4, 44)
mainlandSpain <- crop(researchArea,spainExt)
# use the mainland to reduce the size of the raster
FROMGCLSSpainmask <- mask(LCMap,mainlandSpain)
# save the raster
writeRaster(FROMGCLSSpainmask,filename=file.path(LCMapsOut,"2_out_GLCSmask.tif"))
\ No newline at end of file
# Title: download and load GEE outputs
# Name: Zina Janssen
# Created: 14/11
# this script downloads the GEE outputs on google drive and combines them into one tbale
# this script downloads and combines the GEE outputs into tables
#### 1 Initializing ####
source('../funcs/0_checkLoadPackages.R')
source('../funcs/2_in_downloadFromDrive.R')
# download packages
listOfPackages <- c("ggpubr","dplyr","googledrive","purrr","readr")
lapply(listOfPackages, checkInstall) -> output
# make a dirs to store tables downloaded from the drive and the combined output tables
inAreaTables <- "2_in_AreaTables"
if (!dir.exists(inAreaTables)) {dir.create(inAreaTables)}
outAreaTables <- "2_out_AreaTables"
if (!dir.exists(outAreaTables)) {dir.create(outAreaTables)}
outAreaTables3 <- "3_out_AreaTables"
if (!dir.exists(outAreaTables3)) {dir.create(outAreaTables3)}
# find the folder in which the file is stored
# prior to downloading it is required to authenticate your google account on which the
# files were stored
# https://search.r-project.org/CRAN/refmans/googledrive/html/drive_auth.html
url <- #<url of the drive folder>
downloadFromDrive(url, pattern=".csv",inAreaTables)
# now load all the files in the area table input folder into R
classAreas <- list.files(path=inAreaTables,pattern=glob2rx("*classArea*"),full.names = T) %>%
lapply(read_csv,show_col_types = FALSE,id="filename") %>%
bind_rows
# remove the weird geo thing column
classAreas <- subset(classAreas,select = -c(.geo))
# get the values
# these numbers are based on manually inspecting the data set
CGLS <- unlist(classAreas[1,][3:10])
DW <- unlist(classAreas[2,][3:10])
LUCAS <- unlist(classAreas[3,][3:10])
l8CGLS <- unlist(classAreas[4,][3:10])
l8DW <- unlist(classAreas[5,][3:10])
l8LUCAS <- unlist(classAreas[6,][3:10])
s1CGLS <- unlist(classAreas[7,][3:10])
s1DW <- unlist(classAreas[8,][3:10])
s1LUCAS <- unlist(classAreas[9,][3:10])
# calculate percentages
addPercentages <- function(classArea){
# calculate total and add to the table
total <- sum(classArea)
# calculate the percentages
percentage<-round(classArea/total*100,2)
classWithPerc <- rbind(classArea,percentage)
return(classWithPerc)
}
# add percentages to all class area tables
listOfTables <- list(CGLS,DW,LUCAS,s1CGLS,l8CGLS,s1DW,l8DW,s1LUCAS,l8LUCAS)
listOfPercentageTables<- map(listOfTables, addPercentages)
# create data frame
classAreaDf <- as.data.frame(do.call(rbind, listOfPercentageTables))
listOfNames <- c('CGLS','CGLS%','DW',"DW%",'LUCAS',"LUCAS%",'s1CGLS',"s1CGLS%",'l8CGLS',"l8CGLS%",'s1DW',"s1DW%",'l8DW',"l8DW%",'s1LUCAS',"s1LUCAS%",'l8LUCAS',"l8LUCAS%")
classAreaDf$names <- listOfNames
write.csv(classAreaDf,file.path(outAreaTables3,"3_out_classAreaTable.csv"))
# write a df with only the percentages of the main training data sets for comparison
comparisonDf <- classAreaDf[classAreaDf$names %in% c('CGLS%','DW%','LUCAS%'),]
write_csv(comparisonDf,file.path(outAreaTables,"2_out_classAreaForPlot.csv"))
# write a df with only the percentages of the satellite training data sets for comparison
comparisonDf <- classAreaDf[classAreaDf$names %in% c("s1CGLS%","l8CGLS%","s1DW%","l8DW%","s1LUCAS%","l8LUCAS%"),]
write_csv(comparisonDf,file.path(outAreaTables3,"3_out_classAreaForPlot.csv"))
#### load the table with the class areas of the confused classes
confClass <- read_csv(file.path(inAreaTables,"2_out_confmapClass_CGLS,DW,LUCAS.csv"),col_names =T)
# remove all instances were class area is 0
noZeros <- confClass[, colSums(confClass != 0) > 0]
# remove columns with names that prevent the sorting
noZeros <- subset(noZeros,select = -c(.geo))
sorted <- sort(noZeros,decreasing = T)
# save the dataframe
write.csv(sorted,file.path(outAreaTables,"2_out_sortedClassConsistency.csv"))
# now combine the spatial consistency area tables
spatialConFileList <- list.files(path=inAreaTables,pattern=glob2rx("*Dif.csv"),full.names = T)
spatialConAreas<-lapply(spatialConFileList,read_csv,show_col_types = FALSE,id="filename") %>%
bind_rows
CompleteAreasTabs <- spatialConAreas[grepl(pattern=glob2rx("*3_*"),spatialConAreas$filename),]
# obtain the file names
classList <- list()
for (i in 1:length(spatialConFileList)){
classname<-unlist(strsplit(unlist(strsplit(spatialConFileList[i],split="_"))[5],split="Dif"))[1]
classList[i]<-classname}
# remove redundant columns
spatialConAreas <- subset(CompleteAreasTabs,select = -c(`system:index`,.geo,`0`,filename))
# calculate the total area
totalPixels <- sum(spatialConAreas,na.rm = T)
percentage<-spatialConAreas/totalPixels*100
# add class names to the dfs
spatialConAreas$class <- unlist(classList[1:8])
percentage$class <- paste("%",unlist(classList)[1:8])
# combine the two files
percentageAreaTable <- rbind(spatialConAreas,percentage)
# save the file
write.csv(percentageAreaTable,file = file.path(outAreaTables,"2_out_areaPercentages.csv"))
write.csv(percentageAreaTable,file = file.path(outAreaTables3,"3_out_areaPercentages.csv"))
# Title: package check and install
# Name: Zina Janssen
# Created: 23/02
# Description: this function takes a package name and checks if it is installed
# if not installs it. The package is subsequently loaded
checkInstall <- function(package){
# check if package is installed, otherwise install
if( !(package %in% installed.packages()[,1]) ) install.packages(package)
# load package
require(package,character.only=T,quietly=T)
}
# Title: reproject all rasters to common resolution
# Name: Zina Janssen
# Created: 20/09
## Description: this function takes a list of rasters and all transforms them
# to a common resolution
transformRaster <- function(rasterL,filename,proj="epsg:4326"){
# load the inner part as the raster
ras <- rasterL[[1]]
# reproject the raster to the required proj
ras<- terra::project(ras,proj)
# create the output filename
outfilename<-unlist(strsplit(filename,"/"))[7]
outfilepath<-file.path("1_int_Reprojected",outfilename)
# save the projected raster
terra::writeRaster(ras,filename=outfilepath,overwrite=T)
}
rasterTransform <- function(num,foldNameStart,expert=F){
# load folder name depending on whether it is needed to look at expert or non expert files
if (expert){foldname <- paste0(foldNameStart,"Experts_tiles/")}
else {foldname <- paste0(foldNameStart,"Non_expert_tiles/")}
# make a list of all the rasters
listname <- paste0(foldname,as.character(num))
filelist <- list.files(listname,pattern = glob2rx("*.tif"),full.names = T)
# remove incorrect filenames from namelist
fileListNames <- filelist[!grepl(pattern = "/$",filelist)]
# read them as rast
allrasters <- lapply(fileListNames, rast)
# now transform and write all rasers
list(ras=allrasters,filename=filelist) %>%
purrr::pmap(transformRaster)}
# Title: extract CGLS points
# Name: Zina Janssen
# Created: 23/02
# Description: this function takes the buffered point data set of CGLS and samples new points
# from each buffer
extractPoint <- function(buffer,sampSize){
# sample points
bufferSamp <-spatSample(vect(buffer),sampSize,method="regular")
bufferDf <- as.data.frame(bufferSamp,geom="XY")
return(bufferDf)
}
\ No newline at end of file
# Title: select tiles based on lat and lon inputs
# Name: Zina Janssen
# Created: 20/09
## Description: this function takes a file, uses its filename to determine it location
# and deletes it if it is outside the aoi
fileClean <- function(num,researchArea,outputdir,foldname){
# get the name of the rasters
listname <- paste0(foldname,as.character(num))
# get all filenames to get a list of the files that need to be deleted
filelist <- list.files(file.path("../../DW/1_in_DW",listname),full.names = T)
# write the raster if it falls within aoi
lapply(filelist, writeFile,researchArea,outputdir=outputdir)
}
## Description: this function takes a file, uses its filename to load the raster and write the raster if it is in the right location
writeFile <- function(fil,researchArea,outputdir){
# load raster
raster <- rast(fil)
# get the filename
fileName <- colnames(raster[1])
if (crs(raster)!=crs(researchArea)){
raster <- terra::project(raster,crs(researchArea))
}
# check if raster is within, intersecting or touching the AOI. If neither, it is not needed
if (!relate(raster,researchArea,relation="disjoint")[1]){
writeRaster(raster,file.path(outputdir,paste0(fileName,".tif")),overwrite=T)}
}
# Title: sample from the rasters to create training data points
# Name: Zina Janssen
# Created: 20/09
## Description: this function takes a list of rasters samples points from each of them to create
# td points
propStratSample <- function(rname,sampleSize,outputfold){
# create a raster from the filename given
raster <-rast(rname)
# create output file name
outputFileName <- file.path(outputfold,paste0(colnames(raster[1]),".csv"))
# we need to determine the class distribution, so make a histogram and calculate
# the weights per class
histogram <- terra::hist(raster,plot=F)
# get the totals for all classes
totalClass<-sum(histogram$counts)
# determine the class weights by dividing by the total
weightedClass <-histogram$counts/totalClass
# use the breaks to change the values of a raster to NA
# a raster is transformed to na if it does not correspond with the current class
breaks <- histogram$breaks
weightList <- seq(from=1,to=length(weightedClass))
# run the strata selection and sampling per raster break
propStratS <- weightList %>%
purrr::map(.f=selectStrata,raster=raster,breaks=breaks,weightedClass=weightedClass,sampleSize=sampleSize)
# now combine the obtained points with class values
propStratS <- bind_rows(propStratS)
# round the values
propStratS$class <- round(propStratS$class)
# save the a csv with the sample points
write_csv(propStratS,file=outputFileName)
}
selectStrata <- function(n,raster,breaks,weightedClass,sampleSize){
# select the parts of the raster that fall within a certain class
#raster[raster > breaks[n] & raster < breaks[n+1]] <- NA
reclassifiedRaster <- clamp(raster,lower = breaks[n],upper=breaks[n+1])
# sample a number of points from this raster that corresponds to its class weights
# + 1 ensures that at least 1 sample is taken per class
size <- round(weightedClass[n]*sampleSize) + 1
# this large sample size is used to ensure enough points are sampled
samp <- spatSample(reclassifiedRaster, size=3*size,method="regular",na.rm=T,as.points=T)
# make a dataframe with the coordinates per sample st it can be saved easily
values <- as.data.frame(samp,geom="XY")
# randomly sample size amount of points to ensure not too many are given
valuesSamp <- values[sample(1:nrow(values), size), ]
names(valuesSamp) <- c("class","x","y")
return(valuesSamp)
}
\ No newline at end of file
# Title: make td loc maps
# Name: Zina Janssen
# Created: 10/03
# Description: this function takes td location points, plot title, output folder and output name
# and creates a map with the training point locations with the corresponding class label colors
# load the countries that will be the background for the maps
researchArea <- geodata::gadm("Spain", level = 0, path = tempdir())
portugal <- geodata::gadm("Portugal", level = 0, path = tempdir())
france <- geodata::gadm("France", level = 0, path = tempdir())
morocco <- geodata::gadm("Morocco", level = 0, path = tempdir())
algeria <- geodata::gadm("Algeria", level = 0, path = tempdir())
# clip spain st the oversees territories are not included
spainExt <-ext(-11,37,4,44)
spain <- crop(researchArea,spainExt)
# plot with grid
# set the colours and class names
makeTDLocMap <- function(input,outfold,outname,plottitle,bg=T,zoom=F,pointcex=0.1){
# there is a difference in the column names which might cause some trouble so make them uniform
if (!"LC1_label" %in% names(input)){
names(input)[which(names(input) == "LC1_lbl")] <- "LC1_label"
}
# set the color of the scalebar
scalebCol <- "black"
# set the colors for the lc classes as in the LC maps
pal <- c("#FF0000", "#dd1fff","#1d5f16","#eeaa18","#78ff39","#939393","#5bdde1","#259b6b")
# add the corresponding labels
classNames <- c("Artificial","Crops","Woodland","Shrubs","Grassland","Bare","Water","Wetlands")
# add a column to the input dataset for visualisation
input$colour <- NA
input[input$LC1_label == 5,]$colour <- pal[5]
input[input$LC1_label == 3,]$colour <- pal[3]
input[input$LC1_label == 8,]$colour <- pal[8]
input[input$LC1_label == 2,]$colour <- pal[2]
input[input$LC1_label == 4,]$colour <- pal[4]
input[input$LC1_label == 6,]$colour <- pal[6]
input[input$LC1_label == 7,]$colour <- pal[7]
input[input$LC1_label == 1,]$colour <- pal[1]
input[input$LC1_label == 2,]$colour <- pal[2]
# make the plots
pdf(file=file.path(outfold,outname),family = "Times",width = 5.7,height = 4.5)
# add a background colour
# convert the main country into sf to have nice grid labels
plot(st_as_sf(spain)["COUNTRY"],main=plottitle,axes=T,bgc ="#232057",reset=F)
# add the countries for context
plot(portugal,col="#dfdfe7",add=T)
plot(france,col="#dfdfe7",add=T)
plot(algeria, col = "#dfdfe7", add = T)
plot(morocco,col="#dfdfe7",add=T)
plot(spain, col = "#fef5e4", add = T)
# add the td location points and corresponding class label colour
plot(input["LC1_label"],pch=19,cex=pointcex,col=input$colour,add=T)
grid(lwd=0.8)
# define the legend
legend(x="bottomright", # Coordinates (x also accepts keywords)
classNames, # Vector with the name of each group
col = pal, # Color of lines or symbols
fill = pal, # use the colors to create legend symbols
bg = rgb(1,1,1,alpha=0.75), # background color, bit transparent
border = "black", # Fill box border color
#pch=15, # Add pch symbols to legend lines or boxes
box.lwd = par("lwd"), # Legend box line width
box.lty = par("lty"), # Legend box line type
box.col = par("fg"), # Legend box line color
cex = 0.85, # Legend size
title = NULL # Legend title
)
sbar(d=100,xy=c(-3.5,35.5),col="white",lonlat = T,cex=1,label=c("","100 km",""),lwd=2,type="line")
# add the number of points
numOfPoints <- prettyNum(dim(input)[1], big.mark = ",",decimal.mark = ".")
text(-8,35.7,paste(numOfPoints,"training points"),col="white",cex=0.8)
dev.off()
if (zoom){
# this is an option when it is wished to look more closely on the local variation of points
# make the plots
pdf(file=file.path(outfold,paste0("zoomed",outname)),family = "Times")
# add a background colour
# add an extent to zoom into
zoomExt <-ext(-3,-2,38,39)
zoom(spain,zoomExt,col = "bisque2")
# add the td location points and corresponding class label colour
plot(input["LC1_label"],pch=19,cex=3,col=input$colour,add=T)
sbar(d=0.2,xy="top",col="black",lonlat = T,cex=1,label=c("","200 m",""),lwd=4,type="line")
box()
dev.off()
}
}
# Title: create frequency table
# Name: Zina Janssen
# Created: 28/02
## Description: This function takes a training dataset, a table with the target distributions
# and samples from the input st the class distribution is comparable to the input one
classFreqTable <- function(TD,outputName){
# make a table of the input classes
classFrequencies <- table(TD)
# get the percentual distribution per class
percentages <- classFrequencies/sum(classFrequencies)
# add the percentages to the classes
classwithpercentages <- rbind(classFrequencies,percentages)
# save the table
write.table(classwithpercentages,outputName,sep=",")
}
# Title: make tree maps for frequency distribution
# Name: Zina Janssen
# Created: 17/03
# Description: this function takes class frequency tables and creates a treemap
# this function makes the class frequencies suitable for plotting by changing the structure and adding labels
pivot <- function(df,name){
# get the class numbers to add as xlabels
class <- c("Built","Crop","Wood","Shrub","Grass","Bare","Water","Wetl.")
# get the percentages to fill in the bars
if (dim(df)[1] == 2){
percentages <- unlist(df[2,])
percentages <- percentages * 100
}
if (dim(df)[1] == 1){
percentages <- unlist(df[1:8])
}
# create data frame from the classes and add the percentages
dp <- data.frame(class)
dp$percentage<- round(percentages,2)
if (dim(df)[1] == 2){
dp$freq <-round(unlist(df[1,]))
}
# add the name of the dataset so it can be combined
dp$name <- name
return(dp)
}
# this function is for using the the area tables
pivot2 <- function(df,name){
# get the class numbers to add as xlabels
class <- c("Built","Crop","Wood","Shrub","Grass","Bare","Water","Wetl.")
# get the percentages to fill in the bars
if (dim(df)[1] == 2){
percentages <- unlist(df[2,][2:9])
}
# create data frame from the classes and add the percentages
dp <- data.frame(class)
dp$percentage<- round(percentages,2)
dp$freq <-round(unlist(df[1,][2:9]))
# add the name of the dataset so it can be combined
dp$name <- name
return(dp)
}
# this function creates the treemaps
MakeTreeMap <-function(df,name){
# create the treemap
plot<-ggplot(df, aes(area = freq, fill = class,label = paste(class, freq, sep = "\n"))) +
geom_treemap() +
geom_treemap_text(colour="white",
place="centre") +
ggtitle(paste(name, "class frequencies")) +
theme_grey(base_family = "Times") +
scale_fill_manual("Classes",values=c("#939393","#FF0000", "#dd1fff","#78ff39","#eeaa18","#5bdde1","#259b6b","#1d5f16"))
plot
ggsave(plot,file=file.path(outputfigures,paste0("FrequencyTreemap",name,".png")),dpi=200)
}