# Clear workingspace
rm(list = ls(all.names = TRUE))
# gc()

library(foreign)
library(tibble)
library(dplyr)
library(plyr)
library(data.table)

# Load data ----------------------------------------------------------------

# Yields - all technologies together, ie complete crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield.csv')
dat_TA_yld <- read.csv("spam2010V2r0_global_Y_TA.csv")
dat_tbl_TA_yld <- as_tibble(dat_TA_yld)
# head(dat_tbl_yld)

# Yields - Iirrigated portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield.csv')
dat_TI_yld <- read.csv("spam2010V2r0_global_Y_TI.csv")
dat_tbl_TI_yld <- as_tibble(dat_TI_yld)
# Yields - rainfed high inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield.csv')
dat_TH_yld <- read.csv("spam2010V2r0_global_Y_TH.csv")
dat_tbl_TH_yld <- as_tibble(dat_TH_yld)

# Yields - rainfed low inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield.csv')
dat_TL_yld <- read.csv("spam2010V2r0_global_Y_TL.csv")
dat_tbl_TL_yld <- as_tibble(dat_TL_yld)

# Yields - rainfed subsistence portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield.csv')
dat_TS_yld <- read.csv("spam2010V2r0_global_Y_TS.csv")
dat_tbl_TS_yld <- as_tibble(dat_TS_yld)

# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS)
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield.csv')
dat_TR_yld <- read.csv("spam2010V2r0_global_Y_TR.csv")
dat_tbl_TR_yld <- as_tibble(dat_TR_yld)



# --------------------------------
# Physical area - all technologies together, ie complete crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area.csv')
dat_TA_pa <- read.csv("spam2010V2r0_global_A_TA.csv")
dat_tbl_TA_pa <- as_tibble(dat_TA_pa)
# head(dat_tbl_pa)

#   phys_area - Iirrigated portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area.csv')
dat_TI_pa <- read.csv("spam2010V2r0_global_A_TI.csv")
dat_tbl_TI_pa <- as_tibble(dat_TI_pa)

#   phys_area - rainfed high inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area.csv')
dat_TH_pa <- read.csv("spam2010V2r0_global_A_TH.csv")
dat_tbl_TH_pa <- as_tibble(dat_TH_pa)

#   phys_area - rainfed low inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area.csv')
dat_TL_pa <- read.csv("spam2010V2r0_global_A_TL.csv")
dat_tbl_TL_pa <- as_tibble(dat_TL_pa)

#   phys_area - rainfed subsistence portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area.csv')
dat_TS_pa <- read.csv("spam2010V2r0_global_A_TS.csv")
dat_tbl_TS_pa <- as_tibble(dat_TS_pa)

#   phys_area - rainfed portion of crop (= TA - TI, or TH + TL + TS)
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area.csv')
dat_TR_pa <- read.csv("spam2010V2r0_global_A_TR.csv")
dat_tbl_TR_pa <- as_tibble(dat_TR_pa)



# ----------------------------------
# Harvesting area - all technologies together, ie complete crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TA_ha <- read.csv("spam2010V2r0_global_H_TA.csv")
dat_tbl_TA_ha <- as_tibble(dat_TA_ha)
# head(dat_tbl_ha)

# Physical area - Iirrigated portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TI_ha <- read.csv("spam2010V2r0_global_H_TI.csv")
dat_tbl_TI_ha <- as_tibble(dat_TI_ha)

# Physical area - rainfed high inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TH_ha <- read.csv("spam2010V2r0_global_H_TH.csv")
dat_tbl_TH_ha <- as_tibble(dat_TH_ha)

# Physical area - rainfed low inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TL_ha <- read.csv("spam2010V2r0_global_H_TL.csv")
dat_tbl_TL_ha <- as_tibble(dat_TL_ha)

# Physical area - rainfed subsistence portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TS_ha <- read.csv("spam2010V2r0_global_H_TS.csv")
dat_tbl_TS_ha <- as_tibble(dat_TS_ha)

# Physical area - rainfed portion of crop (= TA - TI, or TH + TL + TS)
# setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area.csv')
# dat_TR_ha <- read.csv("spam2010V2r0_global_H_TR.csv")
# dat_tbl_TR_ha <- as_tibble(dat_TR_ha)

# --------------------------------

# Checking data
str(dat_tbl$NAME_CNTR)
str(dat_tbl$NAME_ADM1)
str(dat_tbl$NAME_ADM2)



# Compute means for yields for admin borders -----------------------------------------
x <- c(10:51) #indicating which columns to take 10=Wheat to 51=Rest (to check: colnames(dat_tbl[50]))
# yields avg - countries
  for (i in x) {
  a <- with(dat_tbl, ave(dat_tbl[[i]], dat_tbl$NAME_CNTR))
  c <- c(colnames(dat_tbl[i]))
  r <- rank(c[i])
  dat_tbl[,paste0("CNTRYmean",c[r])] <- a
  }

# yields avg - subnational level 2
for (i in x) {
  a <- with(dat_tbl, ave(dat_tbl[[i]], dat_tbl$NAME_ADM2))
  c <- c(colnames(dat_tbl[i]))
  r <- rank(c[i])
  dat_tbl[,paste0("ADM2mean",c[r])] <- a
} 

# yields avg - subnational level 1
for (i in x) {
  a <- with(dat_tbl, ave(dat_tbl[[i]], dat_tbl$NAME_ADM1))
  c <- c(colnames(dat_tbl[i]))
  r <- rank(c[i])
  dat_tbl[,paste0("ADM1mean",c[r])] <- a
}

# Removing duplicate based on admin borders -------------------------------
dat_uc <- dat_tbl %>% distinct(NAME_CNTR, .keep_all= TRUE)
dat_ad1 <- dat_tbl %>% distinct(NAME_ADM1, .keep_all= TRUE)
dat_ad2 <- dat_tbl %>% distinct(NAME_ADM2, .keep_all= TRUE)


df_all <- as.data.frame(dat_tbl)
write.dbf(df_all, 'df_all.dbf')


# Adding the arable land (al)  -----------------------------------------------
al <- ('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area.csv')
setwd(al)
dat_dbval <- read.csv('spam2010V2r0_global_A_TA.csv')
# dat_al <- rio::import('spam2010V2r0_global_A_TA.DBF')
dat_tblal <- as_tibble(dat_dbval)
head(dat_tblal)

# Checking data
str(dat_tblal$name_cntr)
str(dat_tblal$name_adm1)
str(dat_tblal$name_adm2)

# Change unique identifier
dat_aloc <- dplyr::rename(dat_tblal,  ALLOC_KEY=alloc_key)
head(dat_aloc)

# # Joining dfs
Join_ha_yld <- full_join(dat_aloc,dat_tbl, by="ALLOC_KEY")

# TODO: Check data for Fertilizer, Rotat, NP modelling. How to link it with this data


# ugb --------------------------------------------------------------------
Join_FAO_ISRIC<- full_join(dat_FAOsoil_tbl ,dat_isr_tibSUM, by="SNUM")
