diff --git a/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R b/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R
index 884fce7276b92a27ceb51d3e30f6948cc6d18d7d..9a8095a69eae9a91cf13c8bb4796cdc7070eea2a 100644
--- a/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R
+++ b/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R
@@ -39,6 +39,9 @@ dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv",locale = locale(encoding =
 dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv",locale = locale(encoding = "LATIN2"))
 dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv",locale = locale(encoding = "LATIN2"))
 
+test <- read_csv("Test_distinct.txt")
+
+
 # Create a spatial SPAM file 
 geo_spam=dplyr::select(dat_TA_pr, x,y, cell5m, name_adm1, iso3) 
 # %>% 
@@ -62,23 +65,23 @@ Admin2 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_2_2020_
 Admin1Adj = st_read(here::here("Input_data","g2008_1_now", "g2008_1.shp"), options = "ENCODING=LATIN2")#WINDOWS-1252
 
 # Now the values from GGS that are still not matching the SPAM2010 adm1 values are recoded
-Admin1Ad2 = Admin1Adj %>% as_tibble() %>% 
+Admin1Adj2 = Admin1Adj %>% as_tibble() %>% 
   mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
                             NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi",
                          TRUE ~ NAME1)) %>%
   mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
                            NAME1 =="Region IX (Zamboanga Peninsula)" ~ "Region IX (Zamboanga Peninsula",
                          TRUE ~ NAME1)) %>% 
-  mutate(NAME1 = case_when(ADM0_CODE == 19 & #Azerbaijan
+  mutate(NAME1 = case_when(#ADM0_CODE == 19 & #Azerbaijan
                              NAME1 =="Administrative unit not available" ~ "Administrative unit not availa",
                            TRUE ~ NAME1)) %>% 
-  mutate(NAME1 = case_when(ADM0_CODE == 152 & #Malawi
+  mutate(NAME1 = case_when(#ADM0_CODE == 152 & #Malawi
                              NAME1 =="Area under National Administration" ~ "Area under National Administra",
                            TRUE ~ NAME1)) %>% 
   mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
                              NAME1 =="Cordillera Administrative region (CAR)" ~ "Cordillera Administrative regi",
                            TRUE ~ NAME1)) %>% 
-  mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
+  mutate(NAME1 = case_when(#ADM0_CODE == 196 & #Philipines
                              NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi",
                            TRUE ~ NAME1)) %>%
   mutate(NAME1 = case_when(ADM0_CODE == 108 & #Haiti
@@ -88,14 +91,10 @@ Admin1Ad2 = Admin1Adj %>% as_tibble() %>%
                              NAME1 =="Nor Oriental" ~ "Gracias A Dios",
                            TRUE ~ NAME1))
 
-# issue1: Some country like cote d ivoire are still wrongly written. Check out windows decoding. 
-# Issue2: Check out also where to combine the arable land. 
-
-# ENCODING=WINDOWS-1252
 # Define projection for GAULGADMSPAM
 proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
 proj_meter = "+datum=WGS84 +units=m +no_defs"
-Admin1 <- st_as_sf(x = Admin1Adj,                         
+AdminAdj2.2 <- st_as_sf(x = Admin1Adj2,                         
                       # coords = c("x", "y"),
                       crs = proj)
 
@@ -106,27 +105,35 @@ Admin1 <- st_as_sf(x = Admin1Adj,
 # The matching names are in columns Adm0_name, name1 and name2.# 
 # Columns which have names O_fips1, O_fips2, O_name1 and O_name2 refer to an “old” classification system at level 1 and level 2. Its entries differ from the columns without “O_” in some EU countries and a few others where the statistic units did not match the admin units. Please note that the shape files themselves were not modified, only the entries in the dbf tables. 
 
-# adm1 unit match check
-spam_admin_link = 
-  dat_TA_pr%>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
-  distinct_at(vars(name_adm1), .keep_all = T) #%>% 
-  full_join(., Admin1Adj, by=c("name_adm1"="NAME1"))
-
-adm1_distinct=Admin1Adj %>% as_tibble() %>% 
-  distinct_at(vars(NAME1), .keep_all = T)
-str(Admin1)
-
-adm1_distinct=as_tibble(Admin1) %>% 
-  distinct_at(vars(FIPS1))#, .keep_all = T)
-
-# adm2 units 
-adm2o_distinct=as_tibble(Admin1) %>% 
-  distinct_at(vars(O_NAME))#, .keep_all = T)
-
-adm2_distinct=as_tibble(Admin1) %>% 
-  distinct_at(vars(FIPS1))#, .keep_all = T)
-
-# Validation - Making sure SPAM2010 matches with admin Shapefiles 
+# # adm1 unit match check
+# spam_admin_link = dat_TA_pr%>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
+#   group_by(name_adm1, iso3) %>% 
+#   distinct_at(vars(name_adm1), .keep_all = T) %>% 
+#   full_join(., AdminAdj2.2, by=c("name_adm1"="NAME1"))
+# 
+# # I need to get the adm1 units unique per country otherwise generic terms will be merged (like no "administrative units") 
+# adm1_distinctAdj=AdminAdj2.2 %>% as_tibble() %>% 
+#   group_by(NAME1, ADM0_NAME) %>% 
+#   distinct_at(vars(NAME1), .keep_all = T)
+# # str(Admin1)
+# 
+# # test
+# test1=test %>% as_tibble() %>%
+#   group_by(alpha, beta) %>% 
+#   distinct_at(vars(beta), .keep_all = T)
+# str(Admin1)
+# 
+# adm1_distinct=as_tibble(Admin1) %>% 
+#   distinct_at(vars(FIPS1))#, .keep_all = T)
+# 
+# # adm2 units 
+# adm2o_distinct=as_tibble(Admin1) %>% 
+#   distinct_at(vars(O_NAME))#, .keep_all = T)
+# 
+# adm2_distinct=as_tibble(Admin1) %>% 
+#   distinct_at(vars(FIPS1))#, .keep_all = T)
+# 
+# # Validation - Making sure SPAM2010 matches with admin Shapefiles 
 
 # Adm0 - unique countries 
 # SPAM2010
@@ -169,31 +176,83 @@ join_SPAM_adm1 = st_join(Admin1, geo_spam1, join = st_intersects, largest = T)
 # 25 adm1 units needs to be added to the representative units. Otherwise, the total arable land will be off. TODO! 
 
 # Disinct GGS adjusted
-adm1_distinctAdj=Admin1Adj2 %>% as_tibble() %>% 
-  distinct_at(vars(NAME1))#, .keep_all = T)
+adm1_distinctAdj=AdminAdj2.2 %>% as_tibble() %>% 
+  group_by(NAME1, ADM0_NAME) %>% 
+  distinct_at(vars(NAME1), .keep_all = T)
 #2,741 rows
 
 # Distinct SPAM2010
 spam_admin_link = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
-  distinct_at(vars(name_adm1))#, .keep_all = T) %>% 
+  group_by(name_adm1, iso3) %>%
+  distinct_at(vars(name_adm1), .keep_all = T)  
 #2,493 (diff=248)
 
-
-# Left joining the SPAM2010 and GGS adjusted. THe result is not yet matching in terms of rows 
-FINAL_adm1_spam2010_GGS =  dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
-  distinct_at(vars(name_adm1), .keep_all = T) #%>% 
-  left_join(., adm1_distinctAdj, by=c("name_adm1" = "NAME1")) %>% 
-  dplyr::select(iso3, cell5m, name_adm1, ADM1_CODE, FIPS1, FIPS0) #%>% 
-   # drop_na()
-
-write_csv(FINAL_adm1_spam2010_GGS, here::here("Input_data", "test67.csv"))
-
-
-# Difference between "FINAL_adm1_spam2010_GGS"  (rows 2493 ) AND  "spam_admin_link" (rows 2468).
-# The difference of 25 rows is due to 8 adm1 district available in the SPAM2010 but not available in the GGS dataset.
-# Further, spelling errors could be a reason as well. 
-
-
+# Left joining the SPAM2010 and GGS adjusted.
+# This function's result is the final Shapefile that matches SPAM2010 adm1 
+# (-17 that are not in the GGS but will be merged into other adm1 units)
+FINAL_adm1_spam2010_GGS_distinct = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
+  group_by(name_adm1, iso3) %>%
+  distinct_at(vars(name_adm1), .keep_all = T) %>%
+  left_join(., AdminAdj2.2, by=c("name_adm1" = "NAME1")) %>% 
+  # dplyr::select(iso3, cell5m, name_adm1, ADM1_CODE, FIPS1, FIPS0) %>% 
+  drop_na(ADM1_CODE) %>% 
+  distinct_at(vars(name_adm1), .keep_all = T) 
+
+# Converting to shapefile
+ADM1_SPAM_GGS = st_as_sf(FINAL_adm1_spam2010_GGS_distinct)
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+ADM1_SPAM_GGS <- st_as_sf(x = ADM1_SPAM_GGS,                         
+                        # coords = c("x", "y"),
+                        crs = proj)
+
+# Writing Adm1 zone files tabular and spatial 
+write_csv(FINAL_adm1_spam2010_GGS, here::here("Input_data", "Core_data",  "ADM1_SPAM2010_GGS"))
+st_write(ADM1_SPAM_GGS, here::here("Input_data", "Core_data",  "ADM1_SPAM2010_GGS_geo.shp"), 
+         driver = "ESRI Shapefile")
+
+# Mind the LATIN2 encoding when reading in the ADM1_SPAM_GGS 
+ADM1_SPAM_GGS = st_read(here::here("Input_data", "Core_data",  "ADM1_SPAM2010_GGS_geo.shp"), options = "ENCODING=LATIN2")
+
+
+# Validation questions:  --------------------------------------------------
+# 1. What is the amount of adm1 units in the initial SPAM2010 data?
+# SPAM2010/Adm1: Without grouping per country  = 2,493 
+# SPAM2010/ADm1: With grouping per country     = 2,586  (-> Diff=93)
+
+# 2. How many adm1 units are not represented in SPAm2010 but are in GGS? 
+# with drop na: 2569 >> The amount of NAs in a left-join indicates which values in SPAM2010 are not available in GGS.
+# without drop na: 5869 # Difference between so 2869-2569=300 units are not in SPAM that would actually be in GGS. 
+# The number seems very high but I saw also that these reperesent many tiny islands around the world. So we just
+# work with the assumption that SPAM2010 considered which adm1 units were important for crop growth and which not. 
+
+# 3. How many units are there in the SPAM2010 but not in GGS? 
+# 2586 (without drop_na) >> this indicates the df after left_join includin the na's indicating the adm1 units in SPAM but not in GGS
+# 2569(with drop na) -> Difference between na and no-na is 17 -> These are the values that are in SPAM but 
+# 17 adm1 units are that are in SPAM2010 are not in GGS! 
+
+# 4. What are the adm1 units that need merging? 
+# From the set of adm1 nits that are in SPAM but not in GGS we need to filter those who are combined together.
+# Only then we can sum the correct area (both physical and harvested). 
+
+# iso3	name_adm1	                  Merged with
+# GBR	  North East (UK)	            East Midlands (UK)
+# GBR	  North West (UK)	            East Midlands (UK)
+# GBR	  Yorkshire and The Humber	  East Midlands (UK)
+# GBR	  West Midlands (UK)	        East Midlands (UK)
+# GBR	  East of England	            East Midlands (UK)
+# GBR	  South East (UK)	            East Midlands (UK)
+# GBR	  South West (UK)	            East Midlands (UK)
+# GBR	  London	                    East Midlands (UK)
+# CZE	  StrednÃ¬ Morava	            Strední Cechy
+# MKD	  Kriva Palanka	              Kumanova
+# HND	  Nor Oriental	              Gracias A Dios
+# ECU	  Nororiente	                Sucumbios
+# MWI	  Mzuzu	                      Karonga
+# MWI	  Salima	                    Kasungu
+# MWI	  Lilongwe	                  Kasungu
+# MWI	  Blantyre	                  Machinga
+# MWI	  Shire valley	              Machinga
 
 # With aggregation long -----------------------------------------------------
 ChangeNames <- function(x) { ## Renaming function
diff --git a/SPAM_Data_Transformation/SPAM_GSS_ADM1_ZONES.R b/SPAM_Data_Transformation/SPAM_GSS_ADM1_ZONES.R
new file mode 100644
index 0000000000000000000000000000000000000000..92ad8b49c25cba280591a1a1ed9b10ef82cd1676
--- /dev/null
+++ b/SPAM_Data_Transformation/SPAM_GSS_ADM1_ZONES.R
@@ -0,0 +1,255 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr",
+                         "readxl", "foreign", "data.table", "rbenchmark", "benchmarkme",
+                         "validate", "plyr"))
+
+# Set working directory
+setwd(here::here("Input_data"))
+
+# Load data ----------------------------------------------------------------
+# Yields ------------------------------------------------------------------
+# dat_TA_yld <- read.csv("spam2010V2r0_global_Y_TA.csv", locale = locale(encoding = "LATIN2"))# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020
+# dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv", locale = locale(encoding = "LATIN2"))# Yields - Iirrigated portion of crop
+# dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed high inputs portion of crop
+# dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed low inputs portion of crop
+# dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv",locale = locale(encoding = "LATIN2"))# Yields - rainfed subsistence portion of crop
+# dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS)
+
+# Production
+dat_TA_pr <- read_csv("spam2010V2r0_global_P_TA.csv",locale = locale(encoding = "LATIN2")) # LATIN2 encoding is used for proper display of nordic and slavic names
+# dat_TI_pr <- read_csv("spam2010V2r0_global_P_TI.csv",locale = locale(encoding = "LATIN2"))
+# dat_TH_pr <- read_csv("spam2010V2r0_global_P_TH.csv",locale = locale(encoding = "LATIN2"))
+# dat_TL_pr <- read_csv("spam2010V2r0_global_P_TL.csv",locale = locale(encoding = "LATIN2"))
+# dat_TS_pr <- read_csv("spam2010V2r0_global_P_TS.csv",locale = locale(encoding = "LATIN2"))
+# dat_TR_pr <- read_csv("spam2010V2r0_global_P_TR.csv",locale = locale(encoding = "LATIN2"))
+# 
+# # Physical area--------------------------------
+# dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv",locale = locale(encoding = "LATIN2"))
+# dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv",locale = locale(encoding = "LATIN2"))
+# dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv",locale = locale(encoding = "LATIN2"))
+# dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv",locale = locale(encoding = "LATIN2"))
+# dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv",locale = locale(encoding = "LATIN2"))
+# dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv",locale = locale(encoding = "LATIN2"))
+# 
+# # Harvesting area----------------------------------
+# dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv",locale = locale(encoding = "LATIN2"))
+# dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv",locale = locale(encoding = "LATIN2"))
+# dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv",locale = locale(encoding = "LATIN2"))
+# dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv",locale = locale(encoding = "LATIN2"))
+# dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv",locale = locale(encoding = "LATIN2"))
+# dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv",locale = locale(encoding = "LATIN2"))
+
+test <- read_csv("Test_distinct.txt")
+
+
+# Create a spatial SPAM file 
+# geo_spam=dplyr::select(dat_TA_pr, x,y, cell5m, name_adm1, iso3) 
+# %>% 
+# distinct_at(vars(name_adm1))
+
+# Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+geo_spam1 <- st_as_sf(x = geo_spam,                         
+                      coords = c("x", "y"),
+                      crs = proj)
+
+# Load administrative unit shapefile which matches the SPAM adm1 zones
+# Admin0 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_0_now","g2008_0.shp"))
+# Admin1 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_1_now","g2008_1.shp")) #this is what we need
+# Admin2 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_2_2020_11_02","g2008_2.shp")) #this is what we need
+
+# Here I manually adjusted the NAME1 values to the once used in SPAM2010. see Documentation for more. 
+Admin1Adj = st_read(here::here("Input_data","g2008_1_now", "g2008_1.shp"), options = "ENCODING=LATIN2")#WINDOWS-1252
+
+# Now the values from GGS that are still not matching the SPAM2010 adm1 values are recoded
+Admin1Adj2 = Admin1Adj %>% as_tibble() %>% 
+  mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
+                             NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi",
+                           TRUE ~ NAME1)) %>%
+  mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
+                             NAME1 =="Region IX (Zamboanga Peninsula)" ~ "Region IX (Zamboanga Peninsula",
+                           TRUE ~ NAME1)) %>% 
+  mutate(NAME1 = case_when(#ADM0_CODE == 19 & #Azerbaijan
+    NAME1 =="Administrative unit not available" ~ "Administrative unit not availa",
+    TRUE ~ NAME1)) %>% 
+  mutate(NAME1 = case_when(#ADM0_CODE == 152 & #Malawi
+    NAME1 =="Area under National Administration" ~ "Area under National Administra",
+    TRUE ~ NAME1)) %>% 
+  mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
+                             NAME1 =="Cordillera Administrative region (CAR)" ~ "Cordillera Administrative regi",
+                           TRUE ~ NAME1)) %>% 
+  mutate(NAME1 = case_when(#ADM0_CODE == 196 & #Philipines
+    NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi",
+    TRUE ~ NAME1)) %>%
+  mutate(NAME1 = case_when(ADM0_CODE == 108 & #Haiti
+                             NAME1 =="NOROESTE" ~ "Nord-Est",
+                           TRUE ~ NAME1)) %>% 
+  mutate(NAME1 = case_when(ADM0_CODE == 111 & # Honduras
+                             NAME1 =="Nor Oriental" ~ "Gracias A Dios",
+                           TRUE ~ NAME1))
+
+# Define projection for GAULGADMSPAM
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+AdminAdj2.2 <- st_as_sf(x = Admin1Adj2,                         
+                        # coords = c("x", "y"),
+                        crs = proj)
+
+# Joining SPAM df and GAUL-GADM-SPAM --------------------------------------
+
+# Ulrike's comment: Shape files for GAUL-GADM-SPAM
+# Please note that the columns fips0, fips1 and fips2 are the ones which have the codes for admin level 0 (country), level 1 and level 2 which can be linked to SPAM results.
+# The matching names are in columns Adm0_name, name1 and name2.# 
+# Columns which have names O_fips1, O_fips2, O_name1 and O_name2 refer to an “old” classification system at level 1 and level 2. Its entries differ from the columns without “O_” in some EU countries and a few others where the statistic units did not match the admin units. Please note that the shape files themselves were not modified, only the entries in the dbf tables. 
+
+# # adm1 unit match check
+# spam_admin_link = dat_TA_pr%>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
+#   group_by(name_adm1, iso3) %>% 
+#   distinct_at(vars(name_adm1), .keep_all = T) %>% 
+#   full_join(., AdminAdj2.2, by=c("name_adm1"="NAME1"))
+# 
+# # I need to get the adm1 units unique per country otherwise generic terms will be merged (like no "administrative units") 
+# adm1_distinctAdj=AdminAdj2.2 %>% as_tibble() %>% 
+#   group_by(NAME1, ADM0_NAME) %>% 
+#   distinct_at(vars(NAME1), .keep_all = T)
+# # str(Admin1)
+# 
+# # test
+# test1=test %>% as_tibble() %>%
+#   group_by(alpha, beta) %>% 
+#   distinct_at(vars(beta), .keep_all = T)
+# str(Admin1)
+# 
+# adm1_distinct=as_tibble(Admin1) %>% 
+#   distinct_at(vars(FIPS1))#, .keep_all = T)
+# 
+# # adm2 units 
+# adm2o_distinct=as_tibble(Admin1) %>% 
+#   distinct_at(vars(O_NAME))#, .keep_all = T)
+# 
+# adm2_distinct=as_tibble(Admin1) %>% 
+#   distinct_at(vars(FIPS1))#, .keep_all = T)
+# 
+# # Validation - Making sure SPAM2010 matches with admin Shapefiles 
+
+# Adm0 - unique countries 
+# SPAM2010
+# 201 (name_cntr)
+# 201 (iso3)
+
+# ADM1 GAUL_SPAM_GADM
+# > 275 (ADM0_NAME)
+# > 250 (FIPS0)
+# 
+
+# ADM1
+# 1. Same amount of rows when distinct adm1 names? 
+
+# SPAM2010 Admin1 units
+# 2493 (name_adm1)
+
+# ADM1 GAUL_SPAM_GADM
+# 3230 (NAME1)
+# 3079 (O_NAME1
+# 2898 (FIPS1)
+
+# ADM2 GAUL_SPAM_GADM
+#  3080 (O_NAME1)
+#  2758 (NAME1)
+#  3222 (O_FIPS1)
+#  2898 (FIPS1)
+
+
+# How to deal with inconsistency? -----------------------------------------
+# 1. Spatial join 
+# 2. Fulljoin > detect NA's then check what is different
+
+# Spatial join 
+join_SPAM_adm1 = st_join(Admin1, geo_spam1, join = st_intersects, largest = T)
+
+# the final adm1 units are calculated by first full join and then by removing the NAs (=all variables that are not
+# in the SPAM2010 and some that are not represented in the GGS (no shapes available for England subunits for example)).
+# Finally, we have now 2468 adm1 units globally (of initially  2493 adm1 units; Diff: 25 adm1 units). The area of these
+# 25 adm1 units needs to be added to the representative units. Otherwise, the total arable land will be off. TODO! 
+
+# Disinct GGS adjusted
+adm1_distinctAdj=AdminAdj2.2 %>% as_tibble() %>% 
+  group_by(NAME1, ADM0_NAME) %>% 
+  distinct_at(vars(NAME1), .keep_all = T)
+#2,741 rows
+
+# Distinct SPAM2010
+spam_admin_link = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
+  group_by(name_adm1, iso3) %>%
+  distinct_at(vars(name_adm1), .keep_all = T)  
+#2,493 (diff=248)
+
+# Left joining the SPAM2010 and GGS adjusted.
+# This function's result is the final Shapefile that matches SPAM2010 adm1 
+# (-17 that are not in the GGS but will be merged into other adm1 units)
+FINAL_adm1_spam2010_GGS_distinct = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
+  group_by(name_adm1, iso3) %>%
+  distinct_at(vars(name_adm1), .keep_all = T) %>%
+  left_join(., AdminAdj2.2, by=c("name_adm1" = "NAME1")) %>% 
+  # dplyr::select(iso3, cell5m, name_adm1, ADM1_CODE, FIPS1, FIPS0) %>% 
+  drop_na(ADM1_CODE) %>% 
+  distinct_at(vars(name_adm1), .keep_all = T) 
+
+# Converting to shapefile
+ADM1_SPAM_GGS = st_as_sf(FINAL_adm1_spam2010_GGS_distinct)
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+ADM1_SPAM_GGS <- st_as_sf(x = ADM1_SPAM_GGS,                         
+                          # coords = c("x", "y"),
+                          crs = proj)
+
+# Writing Adm1 zone files tabular and spatial 
+write_csv(FINAL_adm1_spam2010_GGS, here::here("Input_data", "Core_data",  "ADM1_SPAM2010_GGS"))
+st_write(ADM1_SPAM_GGS, here::here("Input_data", "Core_data",  "ADM1_SPAM2010_GGS_geo.shp"), 
+         driver = "ESRI Shapefile")
+
+# Mind the LATIN2 encoding when reading in the ADM1_SPAM_GGS 
+ADM1_SPAM_GGS = st_read(here::here("Input_data", "Core_data",  "ADM1_SPAM2010_GGS_geo.shp"), options = "ENCODING=LATIN2")
+
+
+# Validation questions:  --------------------------------------------------
+# 1. What is the amount of adm1 units in the initial SPAM2010 data?
+# SPAM2010/Adm1: Without grouping per country  = 2,493 
+# SPAM2010/ADm1: With grouping per country     = 2,586  (-> Diff=93)
+
+# 2. How many adm1 units are not represented in SPAm2010 but are in GGS? 
+# with drop na: 2569 >> The amount of NAs in a left-join indicates which values in SPAM2010 are not available in GGS.
+# without drop na: 5869 # Difference between so 2869-2569=300 units are not in SPAM that would actually be in GGS. 
+# The number seems very high but I saw also that these reperesent many tiny islands around the world. So we just
+# work with the assumption that SPAM2010 considered which adm1 units were important for crop growth and which not. 
+
+# 3. How many units are there in the SPAM2010 but not in GGS? 
+# 2586 (without drop_na) >> this indicates the df after left_join includin the na's indicating the adm1 units in SPAM but not in GGS
+# 2569(with drop na) -> Difference between na and no-na is 17 -> These are the values that are in SPAM but 
+# 17 adm1 units are that are in SPAM2010 are not in GGS! 
+
+# 4. What are the adm1 units that need merging? 
+# From the set of adm1 nits that are in SPAM but not in GGS we need to filter those who are combined together.
+# Only then we can sum the correct area (both physical and harvested). 
+
+# iso3	name_adm1	                  Merged with
+# GBR	  North East (UK)	            East Midlands (UK)
+# GBR	  North West (UK)	            East Midlands (UK)
+# GBR	  Yorkshire and The Humber	  East Midlands (UK)
+# GBR	  West Midlands (UK)	        East Midlands (UK)
+# GBR	  East of England	            East Midlands (UK)
+# GBR	  South East (UK)	            East Midlands (UK)
+# GBR	  South West (UK)	            East Midlands (UK)
+# GBR	  London	                    East Midlands (UK)
+# CZE	  StrednÃ¬ Morava	            Strední Cechy
+# MKD	  Kriva Palanka	              Kumanova
+# HND	  Nor Oriental	              Gracias A Dios
+# ECU	  Nororiente	                Sucumbios
+# MWI	  Mzuzu	                      Karonga
+# MWI	  Salima	                    Kasungu
+# MWI	  Lilongwe	                  Kasungu
+# MWI	  Blantyre	                  Machinga
+# MWI	  Shire valley	              Machinga
\ No newline at end of file
diff --git a/Zones/Zones.R b/Zones/Zones.R
index 6c04a52c0986c7886f06d90d7547837c6a76f52c..153a84c07acd5668234186ad99b4b65f799a14d5 100644
--- a/Zones/Zones.R
+++ b/Zones/Zones.R
@@ -27,3 +27,21 @@ SPAM_GADM_JOIN_GIS = st_join(dat_cent, shape, join = st_within) #https://postgis
 head(SPAM_GADM_JOIN_GIS)
 class(SPAM_GADM_Join)
 
+# Importing Zones with GAEZ from Spam -------------------------------------
+
+zones=read_sf(here("Input_data","ARC_Pro_Input", "Zones_gaul2008_SpamID_AEZcode.shp"))
+projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+zone1 <- st_as_sf(x = zones,                         
+                     coords = c("x", "y"),
+                     crs = projcrs)
+
+zone2=zone1 %>% dplyr::select(AREA, G2008_1_ID, ADM1_CODE, ADM0_NAME, 
+                              ADM1_NAME, CONTINENT, REGION, AEZ, iso3, cell5m, 
+                              alloc_key, name_cntr, name_adm1, name_adm1, 
+                              name_adm2, Shape_Area, geometry) %>%
+  group_by(ADM0_NAME, AEZ) %>% 
+  summarize()
+
+plot(zone2)
+
+