Compare revisions

Janssen, Zina · Janssen, Zina · Janssen, Zina · 820ebdf6 · 820ebdf6 · 820ebdf6
--- a/Python/1_out_createVisualisations.ipynb
+++ b/Python/1_out_createVisualisations.ipynb
--- a/Python/2_out_entropyStatistics.ipynb
+++ b/Python/2_out_entropyStatistics.ipynb
--- a/R/1_preprocessing/1_in_selectDWtiles.R
+++ b/R/1_preprocessing/1_in_selectDWtiles.R
+# Title: select relevant DW tiles
+# Name: Zina Janssen
+# Created: 20/09
+# Description: this is the script for reading tab delimited data and use it to keep the usefull tiles
+### 1 Initializing ####
+# load the required functions
+# make output dirs
+relevantFiles <- "1_int_fileSelect"
+if (!dir.exists(relevantFiles)) {dir.create(relevantFiles)}
+# load functions
+source('../funcs/0_checkLoadPackages.R')
+source("../funcs/1_in_writeFiles.R")
+# load and install all required packages
+listOfPackages<-c("sf","terra","dplyr","parallel","geodata")
+lapply(listOfPackages, checkInstall) -> output
+# you can download the DW training data from https://download.pangaea.de/dataset/933475/allfiles.zip
+# this requires an pangea account
+# subsequently, all folders with the "WH" tag were manually removed as the AOI considers the Easter hemisphere 
+# load the required data
+researchArea <- geodata::gadm("Spain", level = 0, path = ".")
+rasterNameList <- list.files(path="1_int_Reprojected",pattern = glob2rx("*.tif"),full.names=T)
+raster<- rast(rasterNameList[100])
+relate(raster,researchArea,relation="disjoint")
+if(!relate(raster,researchArea,relation="disjoint")[1]){print("test")}
+### 2 Processing ####
+# loop over all the folder numbers and delete all the files that are not
+# relevant for our locations
+LCnums <- seq(from=1,to=14)
+# rasters are reprojected and then it is checked if they are within the research area
+# if so they are stored
+# do the processing on the expert tiles
+foldname <- "Experts_tiles/"
+mclapply(LCnums,fileClean,researchArea=researchArea,outputdir=relevantFiles,foldname=foldname) -> outputSamples
+# do the same for the non expert tiles
+foldname <- "Non_expert_tiles/"
+mclapply(LCnums,fileClean,researchArea,outputdir=relevantFiles,foldname)-> outputSamples
--- a/R/1_preprocessing/1_out_ESAcorrectCGLS.R
+++ b/R/1_preprocessing/1_out_ESAcorrectCGLS.R
+# Title: Reduce the CGLS data set based on ESA
+# Name: Zina Janssen
+# create: 14/03
+# this script reads the CGLS points with the added ESA classes and removes the CGLS point
+# where the classes do not overlap
+#### 1 Initializing ####
+source('../funcs/0_checkLoadPackages.R')
+source('../funcs/1_out_classFrequencyTable.R')
+# load all packages
+listOfPackages <- c("sf","dplyr","purrr","terra")
+lapply(listOfPackages, checkInstall) -> output
+# create output dir and load input dit
+CGLSext <- "1_in_ESACGLS"
+CGLSoutputs <- "../../CGL-100/1_out_CGLS"
+if (!dir.exists(CGLSoutputs)) {dir.create(CGLSoutputs)}
+# load the complete data set and the shp with which the thing can be subset
+filename<-file.path(CGLSoutputs,'CGLSwithESA.shp')
+CGLS <- st_read(filename)
+# now change all the landcover labels
+# snow and ice and mangroves were not present so these changes were not made
+CGLS[CGLS$Map == 10,]$Map <- 3
+CGLS[CGLS$Map == 20,]$Map <- 4
+CGLS[CGLS$Map == 30,]$Map <- 5
+CGLS[CGLS$Map == 40,]$Map <- 2
+CGLS[CGLS$Map == 50,]$Map <- 1
+CGLS[CGLS$Map == 60,]$Map <- 6
+#CGLS[CGLS$Map == 70,]$Map <- 6
+CGLS[CGLS$Map == 80,]$Map <- 7
+CGLS[CGLS$Map == 90,]$Map <- 8
+# select the columns of CGLS which are equal to the ESA map
+CGLS$equal <- CGLS$LC1_lbl == CGLS$Map
+equal<-CGLS[which(CGLS$equal),]
+# save the first reduced version
+st_write(equal,file.path(CGLSoutputs,"1_out_CGLSESAcorrected.shp"),append=F)
+classFreqTable(equal$LC1_lbl,file.path(CGLSoutputs,"1_out_CGLSClassFreqExt.csv"))
--- a/R/1_preprocessing/1_out_createDWTD.R
+++ b/R/1_preprocessing/1_out_createDWTD.R
+# Title: load and mosaic tiff files
+# Name: Zina Janssen
+# Created: 29/09
+### this is the script for loading the DW tiles and sample them s.t they are points
+### 1 Initializing ####
+# load functions
+source('../funcs/0_checkLoadPackages.R')
+source('../funcs/1_out_DWsampleTiles.R')
+source('../funcs/1_out_classFrequencyTable.R')
+# load and install all required packages
+listOfPackages<-c("stars","sf","terra","purrr", "dplyr","readr","future","geodata")
+lapply(listOfPackages, checkInstall) -> output
+# make output dirs
+fileSel <- "1_int_fileSelect"
+if (!dir.exists(fileSel)) {dir.create(fileSel)}
+DWtrainingdata <- "../../DW/1_out_DW"
+if (!dir.exists(DWtrainingdata)) {dir.create(DWtrainingdata)}
+# load the required data
+researchArea <- geodata::gadm("Spain", level = 0, path = ".")
+sf_AOI <- st_as_sf(researchArea)
+#### 2 Processing ####
+# make a buffer to ensure inclusion of sufficient TD points
+AOIbuf<- st_buffer(sf_AOI,dist =5000)
+#### 2.1 Sampling ####
+# now we will sample from the tiles while conserving the class distribution that is present
+rasterNameList <- list.files(path=fileSel,pattern = glob2rx("*.tif"),full.names=T)
+# run the sampling and write csv files 
+plan(multisession, workers = 11)
+furrr::future_map(rasterNameList,.f=propStratSample,sampleSize=10,outputfold=DWtrainingdata) -> outputSamples
+# this code reads the csvs with the sampled points and creates a large csv file
+# code adapted from https://www.statology.org/r-merge-csv-files/
+pointsWithValues <- list.files(path=DWtrainingdata,full.names = T) %>% 
+  lapply(read_csv,show_col_types = FALSE) %>% 
+  bind_rows
+# now remove all the intermediate files
+list.files(path=DWtrainingdata,full.names = T) %>% lapply(file.remove)
+# # now we do the land cover class harmonization based on LUCAS harmonization
+# remove no data and cloud class
+pointsWithValues<- pointsWithValues[which(pointsWithValues$class != 0 & pointsWithValues$class != 10),]
+# transform the classes to the lucas classes
+pointsWithValues$LC1_lbl <- NA
+pointsWithValues[pointsWithValues$class == 1,]$LC1_lbl <- 7
+pointsWithValues[pointsWithValues$class == 2,]$LC1_lbl <- 3
+pointsWithValues[pointsWithValues$class == 3,]$LC1_lbl <- 5
+pointsWithValues[pointsWithValues$class == 4,]$LC1_lbl <- 8
+pointsWithValues[pointsWithValues$class == 5,]$LC1_lbl <- 2
+pointsWithValues[pointsWithValues$class == 6,]$LC1_lbl <- 4
+pointsWithValues[pointsWithValues$class == 7,]$LC1_lbl <- 1
+pointsWithValues[pointsWithValues$class == 8,]$LC1_lbl <- 6
+pointsWithValues[pointsWithValues$class == 9,]$LC1_lbl <- 6
+# make a sp object and subset it to the AOI
+spatPoints <- st_as_sf(pointsWithValues,coords=c("x","y"),crs=st_crs(4326))
+subsetDW <- st_filter(spatPoints,AOIbuf)
+# save the frequencyt able
+filename <- file.path(DWtrainingdata,paste0("1_out_DWclassFreqSpain.csv"))
+classFreqTable(subsetDW$LC1_lbl,filename)
+# save the shapefile
+sf::st_write(subsetDW,file.path(DWtrainingdata,"1_in_DWTrainingPoints.shp"),driver="ESRI Shapefile",append=F,delete_layer=T)
--- a/R/1_preprocessing/1_out_createExtendCGLSTD.R
+++ b/R/1_preprocessing/1_out_createExtendCGLSTD.R
+# Title: Prepare CGLS training data
+# Name: Zina Janssen
+# create: 09/11
+# description: this script loads and subset the CGLS data. Data are not freely available
+#### 1 Initializing ####
+source('../funcs/0_checkLoadPackages.R')
+source('../funcs/1_out_classFrequencyTable.R')
+source('../funcs/1_in_extractCGLSpoints.R')
+# load all packages
+listOfPackages <- c("sf","dplyr","stars","purrr","terra", "future")
+lapply(listOfPackages, checkInstall) -> output
+# create output dir
+CGLSoutputs <- "../../CGL-100/1_out_CGLS"
+if (!dir.exists(CGLSoutputs)) {dir.create(CGLSoutputs)}
+# set the country that will be used for subsetting
+Country <- "Spain"
+# load the complete data set and the shp with which the data set can be subset
+fileLoc <- "../../CGL-100/training_data_2015_100m_20190402_V4.csv"
+# load the file
+CGLS <- read.csv(fileLoc)
+# load the aoi
+researchArea <- geodata::gadm(Country, level = 0, path = tempdir())
+sf_AOI <- st_as_sf(researchArea)
+# buffer so all points are included
+countryBuf <- st_buffer(sf_AOI,dist =5000)
+# subset the files using a shapefile of spain
+CGLSspat <- st_as_sf(CGLS,coords=c("x","y"))
+st_crs(CGLSspat) <- 4326
+subsetCGLS <- CGLSspat[countryBuf,]
+# now do some harmonization
+# remove the unsure class
+subsetCGLSsubs<- subsetCGLS[which(subsetCGLS$dominant_lc != "not_sure"),]
+# transform the classes to the lucas classes
+subsetCGLSsubs$LC1_lbl <- NA
+subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "grassland",]$LC1_lbl <- 5
+subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "tree",]$LC1_lbl <- 3
+subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "wetland_herbaceous",]$LC1_lbl <- 8
+subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "crops" ,]$LC1_lbl <- 2
+subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "shrub",]$LC1_lbl <- 4
+subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "bare",]$LC1_lbl <- 6
+subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "water",]$LC1_lbl <- 7
+subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "urban_built_up",]$LC1_lbl <- 1
+subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "urban/built-up" ,]$LC1_lbl <- 1
+subsetCGLSsubs[subsetCGLSsubs$dominant_lc == "fallow_shifting_cultivation" ,]$LC1_lbl <- 2
+# remove the redudant columns
+subsetCGLSsubs <- subset(subsetCGLSsubs,select=-c(bare,burnt,crops,fallow_shifting_cultivation,grassland,lichen_and_moss,shrub,
+                                              snow_and_ice,tree,urban_built_up,water,wetland_herbaceous,not_sure,
+                                              forest_type))
+# save the subset as a shp
+sf::st_write(subsetCGLSsubs,file.path(CGLSoutputs,"1_in_CGLSTD.shp"),driver="ESRI Shapefile",append=F)
+# save the frequency table
+filename <- file.path(CGLSoutputs,paste0("1_out_CGLSClassFreq",Country,".csv"))
+classFreqTable(subsetCGLSsubs$LC1_lbl,filename)
+# now we will extent the amount of points
+## first only select the relevant columns
+CGLSwithLCLabel <- subset(subsetCGLSsubs,select=c(geometry,LC1_lbl)) 
+#now buffer so we have a surface to extract from
+CGLSbuffer <- st_buffer(CGLSwithLCLabel,dist = 100)
+# now sample from all the buffered points
+# ensure all but one cores are used for processing
+workers <- parallel::detectCores() - 1
+plan(multisession, workers = workers)
+CGLSpoints <- CGLSbuffer %>%
+  group_by(geometry) %>%
+  group_split() %>%
+  furrr::future_map(.f=extractPoint)
+CGLSpoints <- bind_rows(CGLSpoints)
+# now combine this with the existing points
+completeData <- rbind(CGLSwithLCLabel,st_as_sf(CGLSpoints,coords=c("x","y"),crs=st_crs(CGLSwithLCLabel)))
+CGLSextendedPoints<-st_as_sf(CGLSpoints,coords=c("x","y"),crs=st_crs(CGLSwithLCLabel))
+# also save the new CGLS TD
+sf::st_write(completeData,file.path(CGLSoutputs,"1_in_CGLSTDextended.shp"),driver="ESRI Shapefile",append=F)
+#### NOTE THAT FILTERING BASED ON OVERLAP WITH ESA CLSSES IS STILL REQUIRED FOR QUALITY ASSURANCE
+# save the frequencytable
+filename <- file.path(CGLSoutputs,paste0("1_out_CGLSExtendedClassFreq",Country,".csv"))
+classFreqTable(completeData$LC1_lbl,filename)
--- a/R/1_preprocessing/1_out_createFrequencyPlot.R
+++ b/R/1_preprocessing/1_out_createFrequencyPlot.R
+# Title: Create barplots
+# Name: Zina Janssen
+# Created: 16/03
+# Description: this script takes a frequency and area tables and creates barplots and treeplots
+#### 1 Initializing ####
+source('../funcs/0_checkLoadPackages.R')
+listOfPackages <- c("ggplot2","scales","treemapify","extrafont","readr","tidyr")
+lapply(listOfPackages, checkInstall) -> output
+source('../funcs/1_out_treeMaps.R')
+# link to output dirs
+## change this to where you have saved the data
+CGLSdir<- "../../CGL-100/1_out_CGLS"
+DWdir <- "../../DW/1_out_DW"
+LUCASdir <- "../../LUCAS/1_out_LUCAS"
+outAreaTables <- '../2_difference_evaluation/2_out_AreaTables'
+outAreaTables3 <- '../2_difference_evaluation/3_out_AreaTables'
+# create output directories
+outputfigures <- "../../../figures/1_out_plots"
+if (!dir.exists(outputfigures)) {dir.create(outputfigures)}
+outputfigures3 <- "../../../figures/3_out_plots"
+if (!dir.exists(outputfigures3)) {dir.create(outputfigures3)}
+# load input data
+CGLS <- read.table(file.path(CGLSdir,"1_out_CGLSExtendedClassFreqSpain.csv"),sep=",")
+DW <- read.table(file.path(DWdir,"1_out_DWClassFreqSpain.csv"),sep=",")
+LUCAS <- read.table(file.path(LUCASdir,"1_out_LUCASClassFreqSpain.csv"),sep=",")
+classArea <- read_csv(file.path(outAreaTables,"2_out_classAreaForPlot.csv"))
+areaPercentages <- read_csv(file.path(outAreaTables,"2_out_areaPercentages.csv"),col_select = -c(...1))
+classAreaTable <- read_csv(file.path(outAreaTables3,"3_out_classAreaTable.csv"))
+classArea3 <- read_csv(file.path(outAreaTables3,"3_out_classAreaForPlot.csv"))
+areaPercentages3 <- read_csv(file.path(outAreaTables3,"3_out_areaPercentages.csv"),col_select = -c(...1))
+# transform the plots to make them suitable for plotting
+CGLSplot <- pivot(CGLS,"CGLS")
+DWplot <- pivot(DW,"DW")
+LUCASplot <- pivot(LUCAS,"LUCAS")
+plotDF <- rbind(CGLSplot,DWplot,LUCASplot)
+# transform the area data to make them suitable for plotting as well
+CGLSarea <- pivot(classArea[1,],"CGLS")
+DWarea<- pivot(classArea[2,],"DW")
+LUCASarea<- pivot(classArea[3,],"LUCAS")
+areaDF <- rbind(CGLSarea,DWarea,LUCASarea)
+# transform the area data to make them suitable for treemaps as well
+CGLSareaTr <- pivot2(classAreaTable[1:2,],"CGLS")
+DWareaTr<- pivot2(classAreaTable[3:4,],"DW")
+LUCASareaTr<- pivot2(classAreaTable[5:6,],"LUCAS")
+# transform the satellite area data to make them suitable for plotting as well
+S1CGarea <- pivot(classArea3[1,],"S1")
+L8CGarea <- pivot(classArea3[2,],"L8")
+S2CGarea <- CGLSarea
+S2CGarea$name <- "S2"
+areaCGLSsat <- rbind(S1CGarea,L8CGarea,S2CGarea)
+S1DWarea <- pivot(classArea3[3,],"S1")
+L8DWarea <- pivot(classArea3[4,],"L8")
+S2DWarea <- DWarea
+S2DWarea$name <- "S2"
+areaDWsat <- rbind(S1DWarea,L8DWarea,S2DWarea)
+S1LUarea <- pivot(classArea3[5,],"S1")
+L8LUarea <- pivot(classArea3[6,],"L8")
+S2LUarea <- LUCASarea
+S2LUarea$name <- "S2"
+areaLUCASsat <- rbind(S1LUarea,L8LUarea,S2LUarea)
+# make the spatial consistency data useablefor plotting as well
+percentageDf <- areaPercentages[grepl(glob2rx("%*"),areaPercentages$class),]
+percentageDf <- percentageDf %>% replace_na(list(`3` =0))
+# make the spatial consistency data useablefor plotting as well
+percentageDf <- areaPercentages3[grepl(glob2rx("%*"),areaPercentages3$class),]
+percentageDf <- percentageDf %>% replace_na(list(`3` =0))
+# create the treemaps
+MakeTreeMap(CGLSplot,"CGLS")
+MakeTreeMap(DWplot,"DW")
+MakeTreeMap(LUCASplot,"LUCAS")
+# create the treemaps for class areas
+MakeTreeMap(CGLSareaTr,"CGLS based Land Cover Map")
+MakeTreeMap(DWareaTr,"DW based Land Cover Map")
+MakeTreeMap(LUCASareaTr,"LUCAS based Land Cover Map")
+# make the barplot with all the classfrequencies
+pl<-ggplot(data=plotDF, aes(x=class, y=percentage,fill=name)) +
+  geom_bar(position=position_dodge(),stat="identity",color="black",) +
+  #theme_grey(base_family = "Times") +
+  #theme_minimal(base_family = "Times") +
+  scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
+  xlab("Class")+
+  ylab("Percentage [%]") +
+  labs(fill='') +
+  ylim(0,36)+
+  ggtitle(paste0("Comparison of class frequency distributions")) 
+pl + theme(axis.text = element_text(size = 15),axis.title = element_text(size = 15))
+ggsave(pl,file=file.path(outputfigures,("frequencyBarPlot.png")),dpi=200)
+# make the barplot with all the classfrequencies for the classified image
+pl<-ggplot(data=areaDF, aes(x=class, y=percentage,fill=name)) +
+  geom_bar(position=position_dodge(),stat="identity",color="black",) +
+  scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
+  xlab("Class")+
+  ylab("Percentage [%]") +
+  labs(fill='') +
+  ylim(0,48)+
+  ggtitle(paste0("Class distributions in land cover maps based different on training data")) 
+pl + theme(axis.text = element_text(size = 15),axis.title = element_text(size = 15))
+ggsave(pl,file=file.path(outputfigures,("classAreaBarPlot.png")),dpi=200)
+# make a combined plot for the satellite images
+# make the barplot with all the classfrequencies for the classified image
+pla<-ggplot(data=areaDF, aes(x=class, y=percentage,fill=name)) +
+  geom_bar(position=position_dodge(),stat="identity",color="black",) +
+  scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
+  xlab("Class")+
+  ylab("Percentage [%]") +
+  labs(fill='') +
+  ylim(0,48)+
+  ggtitle(paste0("Class area based on different training data")) 
+pla + theme(axis.text = element_text(size = 10),axis.title = element_text(size = 10))
+pl<-ggplot(data=areaCGLSsat, aes(x=class, y=percentage,fill=name)) +
+  geom_bar(position=position_dodge(),stat="identity",color="black",) +
+  scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
+  xlab("Class")+
+  ylab("Percentage [%]") +
+  labs(fill='') +
+  ylim(0,48)+
+  ggtitle(paste0("CGLS based class area with different RS input")) 
+pl + theme(axis.text = element_text(size = 10),axis.title = element_text(size = 10))
+pli<-ggplot(data=areaDWsat, aes(x=class, y=percentage,fill=name)) +
+  geom_bar(position=position_dodge(),stat="identity",color="black",) +
+  scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
+  xlab("Class")+
+  ylab("Percentage [%]") +
+  labs(fill='') +
+  ylim(0,48)+
+  ggtitle(paste0("DW based class area with different RS input")) 
+pli + theme(axis.text = element_text(size = 10),axis.title = element_text(size = 10))
+plo<-ggplot(data=areaLUCASsat, aes(x=class, y=percentage,fill=name)) +
+  geom_bar(position=position_dodge(),stat="identity",color="black",) +
+  scale_fill_manual(values=c("#9bdfe8","#f6b3e2","#f2e774"))+
+  xlab("Class")+
+  ylab("Percentage [%]") +
+  labs(fill='') +
+  ylim(0,48)+
+  ggtitle(paste0("LUCAS based class area with different RS input")) 
+plo + theme(axis.text = element_text(size = 5),axis.title = element_text(size =5))
+pz<-ggarrange(pla,pl,pli, plo, ncol=2, nrow = 2, common.legend = F, legend="right")
+ggsave(plot=pz,file=file.path(outputfigures,("classAreas.png")),dpi=200,width = 9.1)
--- a/R/1_preprocessing/1_out_createLUCSATD.R
+++ b/R/1_preprocessing/1_out_createLUCSATD.R
+# Title: add copernicus attributes to the polygons, harmonize the data and save the TD points
+# Name: Zina Janssen
+# Created: 26/09
+### This script loads the LUCAS polygons and adds the respective attributes based on point id
+# the points are then saved and subset to the location of interest
+#### 1 Initializing ####
+source('../funcs/0_checkLoadPackages.R')
+source('../funcs/1_out_classFrequencyTable.R')
+# load packages
+listOfPackages <- c("sf","dplyr","readr","purrr","terra")
+lapply(listOfPackages, checkInstall) -> output
+# create directories for storing the outputs
+LUCASint <- "../../LUCAS/1_out_LUCAS/data"
+if (!dir.exists(LUCASint)) {dir.create(LUCASint)}
+LUCASoutputs <- "../../LUCAS/1_out_LUCAS"
+if (!dir.exists(LUCASoutputs)) {dir.create(LUCASoutputs)}
+# load the AOI
+researchArea <- geodata::gadm("Spain", level = 0, path = ".")
+sf_AOI <- st_as_sf(researchArea)
+# the buffer is 5km
+countryBuf <- st_buffer(sf_AOI,dist =5000)
+## Download the input data and load them
+url_lucasharmo2006_survey<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/1_table/lucas_harmo_uf_2006.zip'
+url_lucasharmo2009_survey<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/1_table/lucas_harmo_uf_2009.zip'
+url_lucasharmo2012_survey<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/1_table/lucas_harmo_uf_2012.zip'
+url_lucasharmo2015_survey<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/1_table/lucas_harmo_uf_2015.zip'
+url_lucasharmo2018_survey<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/1_table/lucas_harmo_uf_2018.zip'
+url_lucas_geometry<-'https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUCAS/LUCAS_harmonised/2_geometry/LUCAS_th_geom.zip'
+# Download LUCAS harmo tables
+urlList <- list(url_lucasharmo2006_survey,url_lucasharmo2009_survey,url_lucasharmo2012_survey,url_lucasharmo2015_survey,url_lucasharmo2018_survey,url_lucas_geometry)
+# only the 2018 file contains the copernicus module so only download the geometries and 2018
+downloadFromUrl <- function(file,outputDir){
+  destfile<-file.path(outputDir,'data',basename(file))
+  if(!file.exists(destfile)){
+    download.file(file,destfile=destfile)}
+}
+# download all survey information
+lapply(urlList, downloadFromUrl,outputDir=LUCASoutputs) -> output
+#### 2 Processsing ####
+# Read the shapefile file with the theroretical points locations
+unzip(file.path(LUCASint,basename(url_lucas_geometry)),exdir=file.path(LUCASoutputs,'data'))
+lucas_geom<-st_read(file.path(LUCASoutputs,'data','LUCAS_th_geom.shp')) # read the grid
+# only select the points from 2018
+lucas_geom <- lucas_geom[lucas_geom$YEAR=="2018",]
+# Read the shapefile file with the theroretical points locations
+# load the survey results 
+unzipFiles <- function(name,outdir){
+  unzip(file.path(outdir,'data',basename(name)),exdir=file.path(outdir,'data'))
+}
+# unzip all the files and store the names in a list
+fileNameList <- unlist(lapply(urlList, unzipFiles,outdir=LUCASoutputs))
+# load all the csvs
+Luc2006<- read_csv(fileNameList[[1]])
+Luc2009<- read_csv(fileNameList[[2]])
+Luc2012<-read_csv(fileNameList[[3]])
+Luc2015<-read_csv(fileNameList[[4]])
+Luc2018<-read_csv(fileNameList[[5]])
+# remove redundant columns, some of which are corrupted
+cleanFile <- function(file){
+  # folowing the approach of Plugmacher et al (2019) we remove these classes
+  listOfExcl <- c("A22",'A39',"B55","E30","F40")
+  new_df <- subset(file, !(lc1 %in% listOfExcl))
+  subset <- subset(new_df, select = c(point_id,letter_group))
+  names(subset) <- c("point_id","LC1_label")
+  return(subset)
+}
+files <- list(Luc2006,Luc2009,Luc2012,Luc2015,Luc2018)
+cleanFiles <- lapply(files, cleanFile)
+# Merge LUCAS grid (point location) with LUCAS survey data
+CopPoly<-terra::merge(x = lucas_geom, y = Luc2018, by.x = "POINT_ID",by.y = "point_id") # merge them
+# subset the lucas points to the AOI
+CopPointsSub <- CopPoly[countryBuf,]
+# change the letters to numbers which is required for GEE
+CopPointsSub[which(CopPointsSub$LC1_label == "A"),]$LC1_label <- "1"
+CopPointsSub[which(CopPointsSub$LC1_label == "B"),]$LC1_label <- "2"
+CopPointsSub[which(CopPointsSub$LC1_label == "C"),]$LC1_label <- "3"
+CopPointsSub[which(CopPointsSub$LC1_label == "D"),]$LC1_label <- "4"
+CopPointsSub[which(CopPointsSub$LC1_label == "E"),]$LC1_label <- "5"
+CopPointsSub[which(CopPointsSub$LC1_label == "F"),]$LC1_label <- "6"
+CopPointsSub[which(CopPointsSub$LC1_label == "G"),]$LC1_label <- "7"
+CopPointsSub[which(CopPointsSub$LC1_label == "H"),]$LC1_label <- "8"
+# change the column to numeric values, otherwise earth engine doesnt work
+CopPointsSub$LC1_label <- as.numeric(CopPointsSub$LC1_label)
+# ensure there are no duplicates
+CopPointsSorted <- CopPointsSub[order(CopPointsSub$YEAR),]
+CopPointsSubUn <- CopPointsSorted[!duplicated(CopPointsSorted,fromLast=T),]
+# remove redundant columns
+CopPointsSubUn <- subset(CopPointsSubUn, select = c(LC1_label,geometry))
+# save shapefile
+sf::st_write(reduced,file.path(LUCASoutputs,paste0("1_out_LUCASTDPoints.shp")),append=F)
+# create frequency table
+CopPointsSubUn <- st_read(file.path(LUCASoutputs,"1_out_LUCASTDPoints.shp"))
+filename <- file.path(LUCASoutputs,paste0("1_out_LUCASclassFreqSpain.csv"))
+classFreqTable(CopPointsSubUn$LC1_label,filename)
--- a/R/1_preprocessing/1_out_visualiseTrainingPointlocation.R
+++ b/R/1_preprocessing/1_out_visualiseTrainingPointlocation.R
+# Title: Create maps for displaying the training data
+# Name: Zina Janssen
+# create: 03/03
+# this script loads training data points and creates maps that visualize the points with the total number of points
+#### 1 Initializing ####
+source('../funcs/0_checkLoadPackages.R')
+# load all packages
+listOfPackages <- c("sf","readr","terra")
+lapply(listOfPackages, checkInstall) -> output
+source('../funcs/1_out_TDlocMaps.R')
+# get storage dirs
+CGLSdir<- "../../CGL-100/1_out_CGLS"
+DWdir <- "../../DW/1_out_DW"
+LUCASdir <- "../../LUCAS/1_out_LUCAS"
+# create outputdirs
+outputmaps <- "../../../figures/1_out_TDmaps"
+if (!dir.exists(outputmaps)) {dir.create(outputmaps)}
+# load training points
+CGLS <- sf::st_read(file.path(CGLSdir,"1_out_CGLSESAcorrected.shp"))
+DW <- sf::st_read(file.path(DWdir,"1_in_DWTrainingPoints.shp"))
+LUCAS <- st_read(file.path(LUCASdir,"1_out_LUCASTDPoints.shp"))
+#### 2 Processing ####
+# now create the maps with the td locations
+makeTDLocMap(CGLS,outfold=outputmaps,outname="1_out_CLGSTD.pdf","Copernicus Global Land Services (CGLS) Training Data")
+# DW normal and the changed class distribution
+makeTDLocMap(DW,outfold=outputmaps,outname="1_out_DWTD.pdf","Dynamic World (DW) Training Data")
+# LUCAS normal and the changed class distribution
+makeTDLocMap(LUCAS,outfold=outputmaps,outname="1_out_LUCASTD.pdf"," Land Use/Cover Area frame Survey (LUCAS) Training Data",pointcex=0.1)
--- a/R/2_difference_evaluation/2_in_FROMGCL.R
+++ b/R/2_difference_evaluation/2_in_FROMGCL.R
+# Title: create mosaic of the FROM GLC tiles
+# Name: Zina Janssen
+# Created: 06/12
+### this  script loads the FROM gcl map and finds the most hetereogeneous areas
+#### 1 Initializing ####
+source('../funcs/0_checkLoadPackages.R')
+listOfPackages <- c("sf","dplyr","purrr","terra")
+lapply(listOfPackages, checkInstall) -> output
+## create dirs to store the output map
+# you can download the FROM GLC tiles through http://data.ess.tsinghua.edu.cn/
+# it is best to select the tiles that correspond with the coordinates of your AOI
+LCMapsin <- "../../FROM-GCLS/2_in_GLCS"
+if (!dir.exists(LCMapsin)) {dir.create(LCMapsin)}
+LCMapsOut <- "2_out_GLCS"
+if (!dir.exists(LCMapsOut)) {dir.create(LCMapsOut)}
+# load outline of spain to subset the raster
+researchArea <- geodata::gadm("Spain", level = 0, path = ".")
+sf_AOI <- st_as_sf(researchArea)
+# load all the tiles
+listofFiles <- list.files("../../FROM-GCLS/2_in_FROM_GLCTiles",full.names = T)
+listofRasters <- lapply(listofFiles, rast)
+rasCollection <- sprc(listofRasters)
+# now merge the rasters. The file is written to disk to prevent crashing the R session
+LCMap <- terra::merge(rasCollection,filename=file.path(LCMapsin,"2_in_GLCSrast.tif"))
+LCMap <- rast(file.path(LCMapsOut,"GLC_entropy_map.tif"))
+# we use the bbox that is also used for the grid construction to crop the raster
+# if we would use the spain shp, the oversees terretories are also included so that is annoying
+spainExt <-ext(-11, 37, 4, 44)
+mainlandSpain <- crop(researchArea,spainExt)
+# use the mainland to reduce the size of the raster
+FROMGCLSSpainmask <- mask(LCMap,mainlandSpain)
+# save the raster 
+writeRaster(FROMGCLSSpainmask,filename=file.path(LCMapsOut,"2_out_GLCSmask.tif"))
\ No newline at end of file
--- a/R/2_difference_evaluation/2_out_downloadGoogleDrive.R
+++ b/R/2_difference_evaluation/2_out_downloadGoogleDrive.R
+# Title: download and load GEE outputs
+# Name: Zina Janssen
+# Created: 14/11
+# this script downloads the GEE outputs on google drive and combines them into one tbale
+# this script downloads and combines the GEE outputs into tables
+#### 1 Initializing ####
+source('../funcs/0_checkLoadPackages.R')
+source('../funcs/2_in_downloadFromDrive.R')
+# download packages
+listOfPackages <- c("ggpubr","dplyr","googledrive","purrr","readr")
+lapply(listOfPackages, checkInstall) -> output
+# make a dirs to store tables downloaded from the drive and the combined output tables
+inAreaTables <- "2_in_AreaTables"
+if (!dir.exists(inAreaTables)) {dir.create(inAreaTables)}
+outAreaTables <- "2_out_AreaTables"
+if (!dir.exists(outAreaTables)) {dir.create(outAreaTables)}
+outAreaTables3 <- "3_out_AreaTables"
+if (!dir.exists(outAreaTables3)) {dir.create(outAreaTables3)}
+# find the folder in which the file is stored
+# prior to downloading it is required to authenticate your google account on which the 
+# files were stored
+# https://search.r-project.org/CRAN/refmans/googledrive/html/drive_auth.html
+url <- #<url of the drive folder>
+downloadFromDrive(url, pattern=".csv",inAreaTables)
+# now load all the files in the area table input folder into R
+classAreas <- list.files(path=inAreaTables,pattern=glob2rx("*classArea*"),full.names = T) %>% 
+  lapply(read_csv,show_col_types = FALSE,id="filename") %>% 
+  bind_rows
+# remove the weird geo thing column
+classAreas <- subset(classAreas,select = -c(.geo))
+# get the values 
+# these numbers are based on manually inspecting the data set
+CGLS <- unlist(classAreas[1,][3:10])
+DW <- unlist(classAreas[2,][3:10])
+LUCAS <- unlist(classAreas[3,][3:10])
+l8CGLS <- unlist(classAreas[4,][3:10])
+l8DW <- unlist(classAreas[5,][3:10])
+l8LUCAS <- unlist(classAreas[6,][3:10])
+s1CGLS <- unlist(classAreas[7,][3:10])
+s1DW <- unlist(classAreas[8,][3:10])
+s1LUCAS <- unlist(classAreas[9,][3:10])
+# calculate percentages
+addPercentages <- function(classArea){
+  # calculate total and add to the table
+  total <- sum(classArea)
+  # calculate the percentages
+  percentage<-round(classArea/total*100,2) 
+  classWithPerc <- rbind(classArea,percentage)
+  return(classWithPerc)
+}
+# add percentages to all class area tables
+listOfTables <- list(CGLS,DW,LUCAS,s1CGLS,l8CGLS,s1DW,l8DW,s1LUCAS,l8LUCAS)
+listOfPercentageTables<- map(listOfTables, addPercentages)
+# create data  frame
+classAreaDf <- as.data.frame(do.call(rbind, listOfPercentageTables))
+listOfNames <- c('CGLS','CGLS%','DW',"DW%",'LUCAS',"LUCAS%",'s1CGLS',"s1CGLS%",'l8CGLS',"l8CGLS%",'s1DW',"s1DW%",'l8DW',"l8DW%",'s1LUCAS',"s1LUCAS%",'l8LUCAS',"l8LUCAS%")
+classAreaDf$names <- listOfNames
+write.csv(classAreaDf,file.path(outAreaTables3,"3_out_classAreaTable.csv"))
+# write a df with only the percentages of the main training data sets for comparison
+comparisonDf <- classAreaDf[classAreaDf$names %in% c('CGLS%','DW%','LUCAS%'),]
+write_csv(comparisonDf,file.path(outAreaTables,"2_out_classAreaForPlot.csv"))
+# write a df with only the percentages of the satellite training data sets for comparison
+comparisonDf <- classAreaDf[classAreaDf$names %in% c("s1CGLS%","l8CGLS%","s1DW%","l8DW%","s1LUCAS%","l8LUCAS%"),]
+write_csv(comparisonDf,file.path(outAreaTables3,"3_out_classAreaForPlot.csv"))
+#### load the table with the class areas of the confused classes
+confClass <- read_csv(file.path(inAreaTables,"2_out_confmapClass_CGLS,DW,LUCAS.csv"),col_names =T)
+# remove all instances were class area is 0
+noZeros <- confClass[, colSums(confClass != 0) > 0]
+# remove columns with names that prevent the sorting
+noZeros <- subset(noZeros,select = -c(.geo))
+sorted <- sort(noZeros,decreasing = T)
+# save the dataframe
+write.csv(sorted,file.path(outAreaTables,"2_out_sortedClassConsistency.csv"))
+# now combine the spatial consistency area tables
+spatialConFileList <- list.files(path=inAreaTables,pattern=glob2rx("*Dif.csv"),full.names = T)
+spatialConAreas<-lapply(spatialConFileList,read_csv,show_col_types = FALSE,id="filename") %>% 
+  bind_rows
+CompleteAreasTabs <- spatialConAreas[grepl(pattern=glob2rx("*3_*"),spatialConAreas$filename),]
+# obtain the file names
+classList <- list()
+for (i in 1:length(spatialConFileList)){
+  classname<-unlist(strsplit(unlist(strsplit(spatialConFileList[i],split="_"))[5],split="Dif"))[1]
+  classList[i]<-classname}
+# remove redundant columns
+spatialConAreas <- subset(CompleteAreasTabs,select = -c(`system:index`,.geo,`0`,filename))
+# calculate the total area
+totalPixels <- sum(spatialConAreas,na.rm = T)
+percentage<-spatialConAreas/totalPixels*100 
+# add class names to the dfs
+spatialConAreas$class <- unlist(classList[1:8])
+percentage$class <- paste("%",unlist(classList)[1:8])
+# combine the two files
+percentageAreaTable <- rbind(spatialConAreas,percentage)
+# save the file 
+write.csv(percentageAreaTable,file = file.path(outAreaTables,"2_out_areaPercentages.csv"))
+write.csv(percentageAreaTable,file = file.path(outAreaTables3,"3_out_areaPercentages.csv"))
--- a/R/funcs/0_checkLoadPackages.R
+++ b/R/funcs/0_checkLoadPackages.R
+# Title: package check and install
+# Name: Zina Janssen
+# Created: 23/02
+# Description: this function takes a package name and checks if it is installed
+# if not installs it. The package is subsequently loaded
+checkInstall <- function(package){
+  # check if package is installed, otherwise install
+  if( !(package %in% installed.packages()[,1]) )  install.packages(package)
+  # load package
+  require(package,character.only=T,quietly=T)
+}
--- a/R/funcs/1_in_DWtransformRaster.R
+++ b/R/funcs/1_in_DWtransformRaster.R
+# Title: reproject all rasters to common resolution
+# Name: Zina Janssen
+# Created: 20/09
+## Description: this function takes a list of rasters and all transforms them
+# to a common resolution
+transformRaster <- function(rasterL,filename,proj="epsg:4326"){
+  # load the inner part as the raster
+  ras <- rasterL[[1]]
+  # reproject the raster to the required proj
+  ras<- terra::project(ras,proj)
+  # create the output filename
+  outfilename<-unlist(strsplit(filename,"/"))[7]
+  outfilepath<-file.path("1_int_Reprojected",outfilename)
+  # save the projected raster
+  terra::writeRaster(ras,filename=outfilepath,overwrite=T)
+  }
+rasterTransform <- function(num,foldNameStart,expert=F){
+  # load folder name depending on whether it is needed to look at expert or non expert files
+  if (expert){foldname <- paste0(foldNameStart,"Experts_tiles/")}
+  else {foldname <- paste0(foldNameStart,"Non_expert_tiles/")}
+  # make a list of all the rasters  
+  listname <- paste0(foldname,as.character(num))
+  filelist <- list.files(listname,pattern = glob2rx("*.tif"),full.names = T)
+  # remove incorrect filenames from namelist
+  fileListNames <- filelist[!grepl(pattern = "/$",filelist)]
+  # read them as rast
+  allrasters <- lapply(fileListNames, rast)
+  # now transform and write all rasers
+  list(ras=allrasters,filename=filelist) %>%
+    purrr::pmap(transformRaster)}
--- a/R/funcs/1_in_extractCGLSpoints.R
+++ b/R/funcs/1_in_extractCGLSpoints.R
+# Title: extract CGLS points
+# Name: Zina Janssen
+# Created: 23/02
+# Description: this function takes the buffered point data set of CGLS and samples new points
+# from each buffer
+extractPoint <- function(buffer,sampSize){
+  # sample points
+  bufferSamp <-spatSample(vect(buffer),sampSize,method="regular")
+  bufferDf <- as.data.frame(bufferSamp,geom="XY")
+  return(bufferDf)
+}
\ No newline at end of file
--- a/R/funcs/1_in_writeFiles.R
+++ b/R/funcs/1_in_writeFiles.R
+# Title: select tiles based on lat and lon inputs
+# Name: Zina Janssen
+# Created: 20/09
+## Description: this function takes a file, uses its filename to determine it location
+# and deletes it if it is outside the aoi
+fileClean <- function(num,researchArea,outputdir,foldname){
+  # get the name of the rasters
+  listname <- paste0(foldname,as.character(num))
+  # get all filenames to get a list of the files that need to be deleted
+  filelist <- list.files(file.path("../../DW/1_in_DW",listname),full.names = T)
+  # write the raster if it falls within aoi
+  lapply(filelist, writeFile,researchArea,outputdir=outputdir)
+}
+## Description: this function takes a file, uses its filename to load the raster and write the raster if it is in the right location
+writeFile <- function(fil,researchArea,outputdir){
+  # load raster
+  raster <- rast(fil)
+  # get the filename
+  fileName <- colnames(raster[1])
+  if (crs(raster)!=crs(researchArea)){
+    raster <- terra::project(raster,crs(researchArea))
+  }
+  # check if raster is within, intersecting or touching the AOI. If neither, it is not needed
+  if (!relate(raster,researchArea,relation="disjoint")[1]){
+    writeRaster(raster,file.path(outputdir,paste0(fileName,".tif")),overwrite=T)}
+  }
--- a/R/funcs/1_out_DWsampleTiles.R
+++ b/R/funcs/1_out_DWsampleTiles.R
+# Title: sample from the rasters to create training data points
+# Name: Zina Janssen
+# Created: 20/09
+## Description: this function takes a list of rasters samples points from each of them to create
+# td points
+propStratSample <- function(rname,sampleSize,outputfold){
+  # create a raster from the filename given
+  raster <-rast(rname)
+  # create output file name
+  outputFileName <- file.path(outputfold,paste0(colnames(raster[1]),".csv"))
+  # we need to determine the class distribution, so make a histogram and calculate
+  # the weights per class
+  histogram <- terra::hist(raster,plot=F)
+  # get the totals for all classes
+  totalClass<-sum(histogram$counts)
+  # determine the class weights by dividing by the total
+  weightedClass <-histogram$counts/totalClass
+  # use the breaks to change the values of a raster to NA
+  # a raster is transformed to na if it does not correspond with the current class 
+  breaks <- histogram$breaks
+  weightList <- seq(from=1,to=length(weightedClass))
+  # run the strata selection and sampling per raster break
+  propStratS <- weightList %>%
+    purrr::map(.f=selectStrata,raster=raster,breaks=breaks,weightedClass=weightedClass,sampleSize=sampleSize)
+  # now combine the obtained points with class values
+  propStratS <- bind_rows(propStratS)
+  # round the values
+  propStratS$class <- round(propStratS$class)
+  # save the a csv with the sample points
+  write_csv(propStratS,file=outputFileName)
+}
+selectStrata <- function(n,raster,breaks,weightedClass,sampleSize){
+  # select the parts of the raster that fall within a certain class
+  #raster[raster > breaks[n] & raster < breaks[n+1]] <- NA
+  reclassifiedRaster <- clamp(raster,lower = breaks[n],upper=breaks[n+1])
+  # sample a number of points from this raster that corresponds to its class weights
+  # + 1 ensures that at least 1 sample is taken per class
+  size <- round(weightedClass[n]*sampleSize) + 1
+  # this large sample size is used to ensure enough points are sampled
+  samp <- spatSample(reclassifiedRaster, size=3*size,method="regular",na.rm=T,as.points=T)
+  # make a dataframe with the coordinates per sample st it can be saved easily
+  values <- as.data.frame(samp,geom="XY")
+  # randomly sample size amount of points to ensure not too many are given
+  valuesSamp <- values[sample(1:nrow(values), size), ] 
+  names(valuesSamp) <- c("class","x","y")
+  return(valuesSamp)
+}
\ No newline at end of file
--- a/R/funcs/1_out_TDlocMaps.R
+++ b/R/funcs/1_out_TDlocMaps.R
+# Title: make td loc maps
+# Name: Zina Janssen
+# Created: 10/03
+# Description: this function takes td location points, plot title, output folder and output name
+# and creates a map with the training point locations with the corresponding class label colors
+# load the countries that will be the background for the maps
+researchArea <- geodata::gadm("Spain", level = 0, path = tempdir())
+portugal <- geodata::gadm("Portugal", level = 0, path = tempdir())
+france <- geodata::gadm("France", level = 0, path = tempdir())
+morocco <- geodata::gadm("Morocco", level = 0, path = tempdir())
+algeria <- geodata::gadm("Algeria", level = 0, path = tempdir())
+# clip spain st the oversees territories are not included
+spainExt <-ext(-11,37,4,44)
+spain <- crop(researchArea,spainExt)
+# plot with grid 
+# set the colours and class names
+makeTDLocMap <- function(input,outfold,outname,plottitle,bg=T,zoom=F,pointcex=0.1){
+  # there is a difference in the column names which might cause some trouble so make them uniform
+  if (!"LC1_label" %in% names(input)){
+    names(input)[which(names(input) == "LC1_lbl")] <- "LC1_label"
+  }
+  # set the color of the scalebar
+  scalebCol <- "black"
+  # set the colors for the lc classes as in the LC maps
+  pal <- c("#FF0000", "#dd1fff","#1d5f16","#eeaa18","#78ff39","#939393","#5bdde1","#259b6b")
+  # add the corresponding labels
+  classNames <- c("Artificial","Crops","Woodland","Shrubs","Grassland","Bare","Water","Wetlands")
+  # add a column to the input dataset for visualisation
+  input$colour <- NA
+  input[input$LC1_label == 5,]$colour <- pal[5]
+  input[input$LC1_label == 3,]$colour <- pal[3]
+  input[input$LC1_label == 8,]$colour <- pal[8]
+  input[input$LC1_label == 2,]$colour <- pal[2]
+  input[input$LC1_label == 4,]$colour <- pal[4]
+  input[input$LC1_label == 6,]$colour <- pal[6]
+  input[input$LC1_label == 7,]$colour <- pal[7]
+  input[input$LC1_label == 1,]$colour <- pal[1]
+  input[input$LC1_label == 2,]$colour <- pal[2]
+  # make the plots
+  pdf(file=file.path(outfold,outname),family = "Times",width = 5.7,height = 4.5)
+  # add a background colour
+  # convert the main country into sf to have nice grid labels
+  plot(st_as_sf(spain)["COUNTRY"],main=plottitle,axes=T,bgc ="#232057",reset=F)
+  # add the countries for context
+  plot(portugal,col="#dfdfe7",add=T)
+  plot(france,col="#dfdfe7",add=T)
+  plot(algeria, col = "#dfdfe7", add = T)
+  plot(morocco,col="#dfdfe7",add=T)
+  plot(spain, col = "#fef5e4", add = T)
+  # add the td location points and corresponding class label colour
+  plot(input["LC1_label"],pch=19,cex=pointcex,col=input$colour,add=T)
+  grid(lwd=0.8)
+  # define the legend
+  legend(x="bottomright",    # Coordinates (x also accepts keywords)
+         classNames, # Vector with the name of each group
+         col = pal, # Color of lines or symbols
+         fill = pal, # use the colors to create legend symbols
+         bg = rgb(1,1,1,alpha=0.75), # background color, bit transparent
+         border = "black", # Fill box border color
+         #pch=15,              # Add pch symbols to legend lines or boxes
+         box.lwd = par("lwd"), # Legend box line width
+         box.lty = par("lty"), # Legend box line type
+         box.col = par("fg"),  # Legend box line color
+         cex = 0.85,          # Legend size
+         title = NULL      # Legend title
+  )
+  sbar(d=100,xy=c(-3.5,35.5),col="white",lonlat = T,cex=1,label=c("","100 km",""),lwd=2,type="line")
+  # add the number of points
+  numOfPoints <-  prettyNum(dim(input)[1], big.mark = ",",decimal.mark = ".")
+  text(-8,35.7,paste(numOfPoints,"training points"),col="white",cex=0.8)
+  dev.off()
+  if (zoom){
+    # this is an option when it is wished to look more closely on the local variation of points
+    # make the plots
+    pdf(file=file.path(outfold,paste0("zoomed",outname)),family = "Times")
+    # add a background colour
+    # add an extent to zoom into
+    zoomExt <-ext(-3,-2,38,39)
+    zoom(spain,zoomExt,col = "bisque2")
+    # add the td location points and corresponding class label colour
+    plot(input["LC1_label"],pch=19,cex=3,col=input$colour,add=T)
+    sbar(d=0.2,xy="top",col="black",lonlat = T,cex=1,label=c("","200 m",""),lwd=4,type="line")
+    box()
+    dev.off()
+  }
+}
--- a/R/funcs/1_out_classFrequencyTable.R
+++ b/R/funcs/1_out_classFrequencyTable.R
+# Title: create frequency table
+# Name: Zina Janssen
+# Created: 28/02
+## Description: This function takes a training dataset, a table with the target distributions
+# and samples from the input st the class distribution is comparable to the input one
+classFreqTable <- function(TD,outputName){
+  # make a table of the input classes
+  classFrequencies <- table(TD)
+  # get the percentual distribution per class
+  percentages <- classFrequencies/sum(classFrequencies)
+  # add the percentages to the classes
+  classwithpercentages <- rbind(classFrequencies,percentages)
+  # save the table
+  write.table(classwithpercentages,outputName,sep=",")
+}
--- a/R/funcs/1_out_treeMaps.R
+++ b/R/funcs/1_out_treeMaps.R
+# Title: make tree maps for frequency distribution
+# Name: Zina Janssen
+# Created: 17/03
+# Description: this function takes class frequency tables and creates a treemap
+# this function makes the class frequencies suitable for plotting by changing the structure and adding labels
+pivot <- function(df,name){
+  # get the class numbers to add as xlabels
+  class <- c("Built","Crop","Wood","Shrub","Grass","Bare","Water","Wetl.")
+  # get the percentages to fill in the bars
+  if (dim(df)[1] == 2){
+    percentages <- unlist(df[2,])
+    percentages <- percentages * 100
+  }
+  if (dim(df)[1] == 1){
+    percentages <- unlist(df[1:8])
+  }
+  # create data frame from the classes and add the percentages
+  dp <- data.frame(class)
+  dp$percentage<- round(percentages,2)
+  if (dim(df)[1] == 2){
+    dp$freq <-round(unlist(df[1,]))
+  }
+  # add the name of the dataset so it can be combined
+  dp$name <- name
+  return(dp)
+}
+# this function is for using the the area tables
+pivot2 <- function(df,name){
+  # get the class numbers to add as xlabels
+  class <- c("Built","Crop","Wood","Shrub","Grass","Bare","Water","Wetl.")
+  # get the percentages to fill in the bars
+  if (dim(df)[1] == 2){
+    percentages <- unlist(df[2,][2:9])
+  }
+  # create data frame from the classes and add the percentages
+  dp <- data.frame(class)
+  dp$percentage<- round(percentages,2)
+  dp$freq <-round(unlist(df[1,][2:9]))
+  # add the name of the dataset so it can be combined
+  dp$name <- name
+  return(dp)
+}
+# this function creates the treemaps
+MakeTreeMap <-function(df,name){
+  # create the treemap
+  plot<-ggplot(df, aes(area = freq, fill = class,label = paste(class, freq, sep = "\n"))) +
+    geom_treemap() +
+    geom_treemap_text(colour="white",
+                      place="centre") +
+    ggtitle(paste(name, "class frequencies")) +
+    theme_grey(base_family = "Times") +
+    scale_fill_manual("Classes",values=c("#939393","#FF0000", "#dd1fff","#78ff39","#eeaa18","#5bdde1","#259b6b","#1d5f16"))
+  plot
+  ggsave(plot,file=file.path(outputfigures,paste0("FrequencyTreemap",name,".png")),dpi=200) 
+}
No results found