From 8b2c29ff69034bde8b392e48028af9017aa2397f Mon Sep 17 00:00:00 2001 From: "Languillaume, Antoine" <antoine.languillaume@wur.nl> Date: Wed, 1 Jul 2020 18:20:01 +0200 Subject: [PATCH] Refactor metadata extraction script --- data/raw/Meststof proef WUR.csv | 182 ++++++------ data/raw/foo.xlsx | Bin 0 -> 9326 bytes data/raw/muxu.xls | Bin 0 -> 27648 bytes scripts/0_build_project.R | 2 +- scripts/1_extract_metadata.R | 120 ++++++++ .../{1_data_cleaning.R => 2_data_cleaning.R} | 3 +- scripts/{2_analysis.R => 3_analysis.R} | 6 +- scripts/Master_Script.R | 271 ------------------ scripts/RawDataConvertScript.R | 222 -------------- scripts/extract_metadata_prev.R | 183 ++++++++++++ scripts/{MiRAE_funcs.R => funcs.R} | 0 11 files changed, 400 insertions(+), 589 deletions(-) create mode 100644 data/raw/foo.xlsx create mode 100644 data/raw/muxu.xls create mode 100644 scripts/1_extract_metadata.R rename scripts/{1_data_cleaning.R => 2_data_cleaning.R} (95%) rename scripts/{2_analysis.R => 3_analysis.R} (96%) delete mode 100644 scripts/Master_Script.R delete mode 100644 scripts/RawDataConvertScript.R create mode 100644 scripts/extract_metadata_prev.R rename scripts/{MiRAE_funcs.R => funcs.R} (100%) diff --git a/data/raw/Meststof proef WUR.csv b/data/raw/Meststof proef WUR.csv index f694036..76dba7a 100644 --- a/data/raw/Meststof proef WUR.csv +++ b/data/raw/Meststof proef WUR.csv @@ -1,91 +1,91 @@ -ID,Boederij,Type Meststof,Opbrengst Mais$kg_ha -1,de Jong,SuperDash,83 -2,de Jong,SuperDash,96 -3,de Jong,SuperDash,-99 -4,de Jong,SuperDash,73 -5,de Jong,SuperDash,84 -6,de Jong,SuperDash,82 -7,de Jong,SuperDash,88 -8,de Jong,SuperDash,80 -9,de Jong,SuperDash,74 -10,de Jong,SuperDash,83 -11,de Jong,Miracle,101 -12,de jong,Miracle,103 -13,de Jong,Miracle,108 -14,de Jong,Miracle,98 -15,de Jong,Miracle,98 -16,de Jong,Miracle,109 -17,de Jong,Miracle,121 -18,de Jong,Miracle,112 -19,de Jong,Miracle,109 -20,de Jong,Miracle,101 -21,de Jong,Efficiëntie,132 -22,de Jong,Efficiëntie,120 -23,de Jong,Efficiëntie,136 -24,de Jong,Efficiëntie,135 -25,de Jong,Efficiëntie,126 -26,de Jong,Efficiëntie,134 -27,de Jong,Efficiëntie,137 -28,de Jong,Efficiëntie,150 -29,de Jong,Efficiëntie,128 -30,de jong,Efficiëntie,125 -31,de Kerk,SuperDash,86 -32,de Kerk,SuperDash,96 -33,de Kerk,SuperDash,80 -34,de Kerk,SuperDash,92 -35,de Kerk,SuperDash,99 -36,de Kerk,SuperDash,92 -37,de Kerk,SuperDash,90 -38,de Kerk,SuperDash,87 -39,de Kerk,SuperDash,84 -40,de Kerk,SuperDash,95 -41,de Kerk,Miracle,127 -42,de Kerk,Miracle,102 -43,de Kerk,Miracle,107 -44,de Kerk,Miracle,99 -45,de Kerk,Miracle,/ -46,de Kerk,Miracle,107 -47,de Kerk,Miracle,104 -48,de Kerk,Miracle,100 -49,de Kerk,Miracle,100 -50,de Kerk,Miracle,110 -51,de Kerk,Efficiëntie,125 -52,de Kerk,Efficiëntie,126 -53,de Kerk,Efficiëntie,120 -54,de Kerk,Efficiëntie,129 -55,de Kerk,Efficiëntie,124 -56,de Kerk,Efficiëntie,127 -57,de Kerk,Efficiëntie,140 -58,de Kerk,Efficiëntie,128 -59,de Kerk,Efficiëntie,128 -60,de Kerk,Efficiëntie,137 -61,van de Boer,SuperDash,93 -62,van de Boer,SuperDash,82 -63,van de Boer,SuperDash,91 -64,van de Boer,SuperDash,94 -65,van de Boer,SuperDash,90 -66,van de Boer,SuperDash,83 -67,van de Boer,SuperDash,99 -68,van de Boer,SuperDash,87 -69,van de Boer,SuperDash,100 -70,van de Boer,SuperDash,* -71,van de Boer,Miracle,107 -72,van de Boer,Miracle,105 -73,van de Boer,Miracle,103 -74,van de Boer,Miracle,117 -75,van de Boer,Miracle,103 -76,van de Boer,Miracle,108 -77,van de Boer,Miracle,115 -78,van de Boer,Miracle,93 -79,van de Boer,Miracle,102 -80,van de Boer,Miracle,110 -81,van de Boer,Efficiëntie,131 -82,van de Boer,Efficiëntie,127 -83,van de Boer,Efficiëntie,136 -84,van de Boer,Efficiëntie,133 -85,van de Boer,Efficiëntie,117 -86,van de Boer,Efficiëntie,142 -87,van de Boer,Efficiëntie,. -88,van de Boer,Efficiëntie,128 -89,van de Boer,Efficiëntie,125 -90,van de Boer,Efficiëntie,136 \ No newline at end of file +"ID","Boederij","Type Meststof","Opbrengst Mais" +1,"de Jong","SuperDash","83" +2,"de Jong","SuperDash","96" +3,"de Jong","SuperDash","-99" +4,"de Jong","SuperDash","73" +5,"de Jong","SuperDash","84" +6,"de Jong","SuperDash","82" +7,"de Jong","SuperDash","88" +8,"de Jong","SuperDash","80" +9,"de Jong","SuperDash","74" +10,"de Jong","SuperDash","83" +11,"de Jong","Miracle","101" +12,"de jong","Miracle","103" +13,"de Jong","Miracle","108" +14,"de Jong","Miracle","98" +15,"de Jong","Miracle","98" +16,"de Jong","Miracle","109" +17,"de Jong","Miracle","121" +18,"de Jong","Miracle","112" +19,"de Jong","Miracle","109" +20,"de Jong","Miracle","101" +21,"de Jong","Efficiëntie","132" +22,"de Jong","Efficiëntie","120" +23,"de Jong","Efficiëntie","136" +24,"de Jong","Efficiëntie","135" +25,"de Jong","Efficiëntie","126" +26,"de Jong","Efficiëntie","134" +27,"de Jong","Efficiëntie","137" +28,"de Jong","Efficiëntie","150" +29,"de Jong","Efficiëntie","128" +30,"de jong","Efficiëntie","125" +31,"de Kerk","SuperDash","86" +32,"de Kerk","SuperDash","96" +33,"de Kerk","SuperDash","80" +34,"de Kerk","SuperDash","92" +35,"de Kerk","SuperDash","99" +36,"de Kerk","SuperDash","92" +37,"de Kerk","SuperDash","90" +38,"de Kerk","SuperDash","87" +39,"de Kerk","SuperDash","84" +40,"de Kerk","SuperDash","95" +41,"de Kerk","Miracle","127" +42,"de Kerk","Miracle","102" +43,"de Kerk","Miracle","107" +44,"de Kerk","Miracle","99" +45,"de Kerk","Miracle","/" +46,"de Kerk","Miracle","107" +47,"de Kerk","Miracle","104" +48,"de Kerk","Miracle","100" +49,"de Kerk","Miracle","100" +50,"de Kerk","Miracle","110" +51,"de Kerk","Efficiëntie","125" +52,"de Kerk","Efficiëntie","126" +53,"de Kerk","Efficiëntie","120" +54,"de Kerk","Efficiëntie","129" +55,"de Kerk","Efficiëntie","124" +56,"de Kerk","Efficiëntie","127" +57,"de Kerk","Efficiëntie","140" +58,"de Kerk","Efficiëntie","128" +59,"de Kerk","Efficiëntie","128" +60,"de Kerk","Efficiëntie","137" +61,"van de Boer","SuperDash","93" +62,"van de Boer","SuperDash","82" +63,"van de Boer","SuperDash","91" +64,"van de Boer","SuperDash","94" +65,"van de Boer","SuperDash","90" +66,"van de Boer","SuperDash","83" +67,"van de Boer","SuperDash","99" +68,"van de Boer","SuperDash","87" +69,"van de Boer","SuperDash","100" +70,"van de Boer","SuperDash","*" +71,"van de Boer","Miracle","107" +72,"van de Boer","Miracle","105" +73,"van de Boer","Miracle","103" +74,"van de Boer","Miracle","117" +75,"van de Boer","Miracle","103" +76,"van de Boer","Miracle","108" +77,"van de Boer","Miracle","115" +78,"van de Boer","Miracle","93" +79,"van de Boer","Miracle","102" +80,"van de Boer","Miracle","110" +81,"van de Boer","Efficiëntie","131" +82,"van de Boer","Efficiëntie","127" +83,"van de Boer","Efficiëntie","136" +84,"van de Boer","Efficiëntie","133" +85,"van de Boer","Efficiëntie","117" +86,"van de Boer","Efficiëntie","142" +87,"van de Boer","Efficiëntie","." +88,"van de Boer","Efficiëntie","128" +89,"van de Boer","Efficiëntie","125" +90,"van de Boer","Efficiëntie","136" diff --git a/data/raw/foo.xlsx b/data/raw/foo.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..81615bdd14db58961a61f70e38241f1383fd5596 GIT binary patch literal 9326 zcmWIWW@Zs#U}NB5U|>*WNP0InBaV@QVFC*SgD?XJQ?zq_UP)?RNqk6UL27ZVUPW$> z!Xg$XjRg!$45MH~hrpUCr~M8a2)KUt_~ZOU*XzzE2M*J>O~-|_0__eQnS5+ja`e2+ z%>VN`e+xJ*6wNZ<D0Kc#wb9438JqHQw>5Pf+a1Ky-FV_aXq>dU{?X%aRQHQZPAZCu ze7`_M+QCpVKE5|?Hp7g<?T3R*Tpl`lt&z$Q6|-x2Jonb|Fv0uBcJi!>T48zO_Q3~! zVfO?rPOZEw@c5$em8?w7>bTF3bv;?Sm8$QEmBlR$P>cxHObXgr^lQ5JzG96X##a-P zneG|1P1wtK_uPH9-TTbtd`+Y_t?c@;D(cvTZ0-FuF*Q~DHTt^01pL}%CYlki8$HjV zQL*XL4Tga1?k!?#jq=Xd9<di)>~s3VA%(lYRUZn=9Jw!@vTI)dgcLssrCgl{OOMZU z@wYVBYN}g);^YpC1O9K5Woq)4U1Wd4;`-yi$Tz{4yFMRq5;%E4QKh#1?Sm69t*hT$ zcpBaO?&GwV^+x~L7#RNlXJ$anmRmy&l)o@AF!(SrFmN+)FvS<8<`nDefymM9IHGeQ z-1BQD`T8F=;AwmB@u&U7_p5g&2sn9{J9_L^45@4Iyu-h>cd?4}-Tmq^0wGe(%<lDj z>;9a&eb=V<)d|IKk4)Bd3yD}OZ!2HhHvLy&DZ7?h(KNThH4l#nmR<dN_1jE$uS$)w zz5QCNcdS_Gee%At+vSfg8wJkmg-dY?FO_K%vz^%(b<8*NUeb?sj8~n4->z-ju-$yY ztI6kQ*I%Eu>?N;#ho#S(OEX1J6ep$MTvu}<NAc@VsnFmI%ghZ)^NOsLD&tdf4t#c* zy<hlmjjrFE6G2KQLO+boyB}9w@U=c_i^<2+b1RRSa~@uO;={?NZuu4W7_nuNM!PA~ zKQk~e>|$bIkYeCqtjN)a<dgFJqU@yn{A^I|1<Nyx8VeX0;ej_o8Eb-ky^k0O?B)K~ zZcskEOjPh@iN4aC(AWGNC%0(cR#P;t`*L4eYgcT<HE(_9{I<&XXOEYCH|2Z8Ed2GL z_KGecmMY<#_tvcWXFs21=16+GR<6mNRaofFtJkaFa`Vk97T#$$>4eTa5sPO(G*l(q z7cWzC`Lgp;n@QF=gHofc;tR5;d?W9j-twg3S4;4lwM|>LZ&#S5nfCPmPUqX-*ycA? z%3u5}$?0jsTgh<o#T7;E9|v8(aeUhIC(XC=T=v1mzj&U?toD{$Wolc~R-({;!d<>z zb5ilsMKgDYzJFNa^!t?N{@b5)PJely_T=R8tM?hOB^jahhxRftGcep?V_@KCU|^`o z(ML`!YeHlFZySiz&foH1;HT}8vfCz|94maIc{_bY4JuZ)_&hR`^PF>&PuJ<s_t@o2 zYD)Nd#8m(7I`?tsce&f`$7gRUc(>z{Q<$L=yM*N)uT-^Fp*?kRXYMdq-rAs*+`6Ws z=iN@X<L>eMW!5d-wRhsDYm5zE$2H8C_1X9@x743$H^FrMflX6ZrLgVFdipY|aN5f` z#ld27M{j)hQq$Vv+Hs}Y;7O5Ykln=0;7_M=97XhJF#fU<-}>&tsV}w#s=?cR-!9v< zhVhY`$e}yi7o5<%{30dP^JpGdLA5j6Ik$ta54#(PY>DRdsrH_#6dH7TsU<^3zIWC6 z`X*baxYmTt^3C5BzuF#s-*Wb0-_%d`Cr;f@{CPfsrCzGHVN29n7lA$ei+IcKXjoar zbT!52Eq%RV*S~{avsW@%vR+P`#d&zM1n)xa>wm5`WW6d>=Gri6LV$dwyS3%{sgpmd zW^)xYI=y?4xafU*NkV#)fm`dtRY`5?n~WA5R^M;8dZ(E6NmY$ImXG&KFSN+fxgh?7 zOVwMsZ1M9mPy7~lNvK|26!uBnzc=p8^XJ#ob7vYJUgsUQQS`$4Gl#y3cdZrI|8nY^ z`Eh%VX-my`<c=hsF1=kRExcgOBEPA{i)AwA`}ipuKYzpbi6!gAylo#jRg`wr&1jpV zvd_Noc!}7Kzc+Ip^rd&c7n=X=qzR9~ymN8~xu3ePOzAy2zbM}@`HFJAVQu>Cb4~?U zI^I<NW&iVK$F{oQl5Os<SK2<+V~u~EKJ~`B%{zrc)vmAA{;q%Pb=eQE6t#Xq-Ujoc zZOQj{L_9q(HGnH>Q+vl#k;Uw{ywhxD)bvyrv&6_JnoIbry~{A;a_V_mU~)`X{5|W; zZz`-oxo<<({yP8mCD+7^<2f-4*DCF9vpVT9d;7cXQ$#ga+zoqsB(dY<g>w(2i&yzZ zH(y}6pj>Ms^=QNUN16E#zBNzS<+$Jd?h^Zjmk$<L&Fo`0GybO5@$Y!$jaRm>)=8W; zN<S5&G40-=qb0|tCogk!j&5(z_t=p9wzOXMFV+%vY50yCiHr;klbIP9L?GFvI3uwr zH6^&DC^Ij;7*xRaOt$tvY#?yFTxDN#YU$E<%(u8yS1wr5!KcG&=%%($O{|XP_R33j zRWViyKdlVzz5n<9i)Hk;sEM`~`fE?Vtgu+b;*;8HQtEgA{xx%MubKyL1*yDGzA((w z>wdlZ%x{OJH6JZ=oig4_ehy`x^?PCNL6`IAtj;={#Gb8r-N1dHfo1KD6q8woXO}Kq zvS}`-<=pd{FMIyHODJW#tf_0H->oB}?=Q*idd%<I%r`qOFBReN4tT`$f9*rQm3vCd z^nF=1*QYq#I(5_RYUP7_v!-0rI~OYFx0Yw$tHi_lX78U0vfW7E(e}wic87^h-u`I| zg$)D(JkN$|E+}J=<E;EKeVzRBiTs)o$|tp3*yeOSZ+K;Qyz~4zle6sW4<4yz+OnpC z*?r6Z-dvuWS(39%0_`1BW=%RSlCv|2eN*T0?x=V9f%|q(Srfqc+jCoP4eyl-d*_$W zGL}}f9G%~G)IanO))MTXxWGO?b_RxH3ZM=fxCAT7NX<>v2a|@N7<m(Uwp-d<WWV?? zd!FL*Kc<iSRqs7GpELQcO3K_RGF!L32@EqfnzTn+m`T<0Ymmm0rPH1AZKqsvVCLQX zrJqw}%S-u->Su2#uYIaAJI5=kjJMnU-`?fRzt5|y`dsqy(%u<W?lZU#O_~<JFW{5y zoSqvqR4z}e&$iGosqy!i(j%D4nfH@h=a2X8`E^BtxhFY}-(ddflCyA{yr<=DEwQ5< zZ4-AKNV90Xuhw(i<B;-vvEwqb&G%-AX)g_0E1^8;M_f}<Ky|H=qVgNwsb2NfEBE+6 zG@R(f+oN$meAPzPg&Q9q-sIHL`XcC^tm3UtiSrDt<sMaE`mP})RG0Bn_3v}>?fPOD z!_-`!vPC-QE%<M=(<h-O;$rcqmj|;hDR4ALRWFTaUJ-Nr>+8Q!`?p<M&X{4+G<Dgw z8&<EU=1oZC4fu6+;qLJFp{K>}U5Jm}yi;~nf9v%HA@v*D<au^IaOGa_sA(5-xP5Ng z^7Pl&7l_11AHMGWHZEZMm+Sx+&!Qb|daYZt%6HxpO*!<UPxu~Bv*X@Ln&$&2@UDIn z6zmhBWd1a&=be!qLjzNgisz$~M|u1&aBSMqtzr4#$Zgfj976eRLTU*+LNs~3LVv8c zYy7uNxoew&(UPgL37rwrLEkEpj-2$__D6fcRdWUXdA8O6KD_+gzTdX~%Zs0f<2UWx zs^y)>t{NQt?rMMh|G$65@5S%`_u<t?^OslDKOQ==|Ig3U)A#@XX!Ui&qdTJd`)WUZ zdO2DAe(l%i{{6DIZp+Bd_@LOi@x!`@$?w&=6C3${e`K_ud&I!$$Rr2B7iBBnoj$=7 z7p?H>Q0a!Zhi=P69An*Mvp@UhF8Pl;Rr+SGciYH%C%LCgqTK4is#&K$d?=Xbb9eIR zot?WoFHFpSr{}eX;rnyr1>ui^ZuV|Sea71*)Va*!;HBXG8=ZS(SMPT2czWH|=#kHc z&M)r-);D)tR&_2)Pz}=C;+)iR`cSGu%dJmm@8zyPydwLmZB2@DVzk05gSw8DPmi6> z&<lJMWa(mPD!Z>c<%v;I{XuuNb^0nURb85&e<ka*ct5jh|0?CUpNr=)-*36tBW9Dl zi(XvX*)3r_(YEYHZ-7nlEuTpzj?}5~DY{N9Z}t?g>Az|1V(Tmuu*vYwcFt{w{-!DI zV11G`Yl?cuuhet08~60g+$%3$(zKyF^_I=H8(YtI8e3a^UODyFZi(QMo?{Z#+wF=Y zE+iVCe!lZ!xKhGY**9Kmv`?H1{W>RsF?G_a>}#4u(>b(Lzki&PyCFTlu&F{!qf_wy zlAls8cfDdQp3VuWn%l?c`@odxsr-}f%bM3&Ul>f<z2Eb7FGpnh^eVA;HL(TSb@`i* zC&b^geY=lEvL;JX;oJ6aX9WVYn`{!>IJkJk?>2Ra{)=&$yE-c8-IUokW}ERdi@1gD z6`uU0_vrb~P3nKjPsx=t7PGPTM}Pc1#cH-j>T{RI&79xmb+W56=G{3^FlkNkwaU=q zxT`<@|6Y}LInHi<>bdNhpQDa_{r1%{Yv!Js##3tVKfRdezh~o9W5p@&PO~1}?NnaN zvgcwCyTOXvfxFaqW-*>q`Z3FNhR&TS%DRsW)$G-cZuMW_iE>W8dtgqN&)1L&W3>gz zOWI4dMdx2w``hnxMSt&Qzx17JcFkJJ%{u94g=S2z^K`FeFQRPU-&=d-b=`}Q^2q6I z8aFRXB>JRD2bM|%#`8Ie#HF{y-JBV5u_WSRz^myCqudIgav9``idH=R_P}-T({{&O zj%{f&Q=TV_F5fb9PtQ88b2HtlCL6Du$rzBNZ>-Q?5`1%4VP#fn=BmA}OLUccN;dZz zTry}9f2d=9cG~aAW4mUyUorCEnY2^7$oN$+zhJhjlKPHDHMQUiN38RHZq4G@uBB{} z{Zsgb#Nq1|Y&(^&M2jy9-ccs$IWO-=dEzul@lEY6Q><bp`<-GloH(O;$`Rdo-TJA# zCKqKE<!XmF2-rnEp5>q|Kf6$%#QH>q(Lv?Rw!0F6^Zeu+9|o^0__yllec3fCH)Rg( zmGAA}%WU=BTyRN5guXU=UuQ#ibJ44_hK;i~h)#U^v;0xQsf_D;x-MBx=~K4q4k%kR zBW2zp)6R8gF8XWBOZeZMb#UQpJ)0S;n7{4yziTdk^;9ui=Ckx=N3~vEDVhAT-TDdd z>QByBHfb%*`|GD$v@Tn3C!f&OO0PW+uhe!kA2iUNf5yl+a*yBPuFabRZ*MWy;o$MV zV)i4q)Xl>7<2rTw6LZf*l{)CW_L}?S*Qq;uuBNe<H`-Jc)LSeMdJ*81KV4bl*Cpu_ zQ_K>&?@W&D<+#7cQeF3don{XEU9TIuQ=}623xqTWEfn0gy0uIq?DXFaF|SIs_#Y%p z42<Es{Q4JTOyULCvds0zuj{>g{8aaBt6+DxPy8A2tq*@a+81-PrtrO>!rb3mPc4Yd zEz4?6+iI)9ve$3%g*Vr%TI1G6zvr#GyWygY&4<0pynMH=<^{Yq=h|AdJ(?p+HMMSW z<O=qwUe+tNOaEQ)S3x^D;i+bA%07ngS-ZFW$J!#QT-cDago%M6h>d}P7uq5!smw_Q zH-(mj=4RhE5cp?zqh8?GyHjP4bvU=3zH}wv#jyplZ&oLr?pzVqqmq8^*uUR?WpOfA z+b-61`aQo~eEx26(52aaTw9Wl6|FUriEWiI41LwL+P7`Lo#;Ux<<~1#t+>#YT5(=y z-^u&?z1QC6{<b6ajABE8+2QEtvse|k-7)ffJ2h`clm1q-Nme&z@owx*+_LH41Q8X! z!<rMPXxk|0r2Mp3m^AD8Hcl^H=~q8yaliX!E9SN-=1%?;vl88gEOFnA&RrQ<Nmphj zPrCP8ssG8+i(%K_aTRo~%Z}SLz2td{Uq+=H)069puAH2YyKh;4_FBSem~(t@jfH>q z+BvRE^QJNzHm=A&_$cz_`&A$2?<wv$75tm|?Ft6}7SDOpqB5FgJlN;Hw(asVbGZ|A z@Os<t4YR!TzBOBa>x+BqJm<-U>h~)1jwfy`=-@6)60K%>a3XlZdBeqAHx@Mi-IA}c zx5-mF;n$9OmwcYP?uABH4!>F|cU@c7bkgqo;}@rb4`#8u@ZQh)`>KR_fA(9M74w&i zE|*@r;=@uN1$E|AnllCZ>wfLODt;rjH$<EJ<-659hiyOkTFeSKaZu(%TkmoC#k@E2 z+gxL7JNY}5Z;3DQe>w5G_2Kz{OGOPV**Ui4?JS7-+h~vwc6TT1!tet>S6?@OK84jP zaO13PhivECrTxgMPZG^ft?Bx8sQlJf0ZF^t{BbKYUKMXRXB#1*9}$>c(N=#?*Zxd+ zxvR{XLr-;P#B5fXp?*4af=}M`)uD#`k3W37{(`aM@YnbESKG99Z{5Ho$62iLE#T4P zo6{d&Oj~gOyv7B&;`zrfe@GT;bu5&1JDB2?elVwME{m(s(bX?gDyNH_?5O^9DqTq> z(8R`KUd^V07^8z%16MD0O4jdSxxHV+vCnJ5=lPERq#3Y`Gb&GbZkWu%z`!odz#suh z&Y=EqaYkxtNwGed0yin=%!tl^Wg*aTKm68zfv2)U*5VvYeOyz5GbV;TfAg-IgKg2I z8NQchhyA`^-sJpfrUK*gtfi0gZ9bk!7e4gic6yP)J=bZ+jbvx8$)4Tjp}e#6^4$}E zUZ1aAo-b7?xNP<Zg<wOY{VsWR(*OV6{@|Rp{%2grp$48zJEQNeg)$Lga&KLQp2b#* zXiivrX3~a+={L`v>YvGBzTETV!AINPMte#uUv|piMBYio?YAw2j3xJ~D(Qwy@{u{j zbZ!3o$H{f)rnzii;(h4N*S9x1Q;J$dUjGmfk6*rI*+iG-qk2;#ci1`HoqvW``LS_x zJYW5}pGM0+UYk0};#7*>c2{wu=&ZAEKQU`>IJDy2m39A@nVi{b^igg))9jEx*QXuX zV<nzH$#VaUGwUZ$eS3;6|6^Lpztfhp&r58!Ha6d2y)JwA+>>7}zW!ls_El~UyZ(cN z_0KAHbRH~-oI0`d?TMO%=Fo(w#}70A|B>zPkNJIH<Iq?CLV-ZV2U2HDPQBeZ?bBAC zm;)1dB|ir{%ryMZ{(Q@;C65z7|NqJK`Lf(UWn;4>>3y<Y@}C}i9@$;X82)L&rlM>f zvv}i)?i<t}M#a9nsy(;W)j}@q(_x1F8#errn6|u?H{#}$?%?BxSMby|WSp3G_|*O5 z-!4?OZ*>)&{h}w)?5gDPoMp!Bo89N`oN>1FS>m_Ybh|WvtJjI!E=0<2UV4t{%)G$2 zOAPfnCY@U&kQ0^MIPaEV&L3+T$C9@l6Wja*RAlEWc|=(`Px$gh(DBk0%cdo(?+dEr z);?O^>lU>wV6n}El)tCDAD;guYEim)Sw-yqa@%*epUY3G3~k+*Ct&c5?fLv=5>L4M zF3Rpx*!AtG=UQP|qgULHDCo51PbK!6g>JIf)Mpp2WYGL6`L+31Q1H56s}ALTd3x_D z_af=|CD%BcmfTy(C;7Ei0fC;z*~Di}`EJP>ANDYI)0gxRlbcKQvw|{vzQ2j9J>>B? zEL1|`_3ym?`Th~X+E$4L{ktExIF=;$=+reSm@Yd!&5&g!0=<948NB!mzeo6V(F{pv z*Wl!=OQFf3H+ql#+1&%uw56;`A@glWyKXW^5CZ)>YP`%%_PW~pI&aoD(+opQA`f)W zVmVgDS3di;@XCVF)~WBiWb(?x-zDvZ81p3o)9#J;z^0s66pzxnVAj^P?AVnhtB<YP zAl*Ba|Je5WY3(m=&2Y9q^Lj#Xa)#Q?1IDsOSv-zVw5I-J&&t4axu2%(4_T1QBkJ3` z(lfj_G|^m&`<)Tn#a_c-C!O1MlQYy%JiPJ2Bgx>!J-5G1-piDmydopZ$9GrO8m`@G zKDI5Z)1N2r5MFI?`)FpK%#m+j8hDS}2KzrK+ivsgQ-odF_XmEvr@1Ol{=C|@=Ap`i z$Xo9YvEN-g<>*h=nu!Tl;}d)TvtuimL{~KTcrh|CR5LL!2r_UmrsO9F6y+Bb>nG<I zrGh%S3X51qhjbYj7<wkfdmlCsIeMRUzw^n>xs&`toNhmq|7PM?u;_=_q}_TqmZ+Y5 z^X;wg?X_Xc9wZl?`}=f`_oknI;TP2-mS;^?o+TjCD7ZCb*_n%{vjp!iI8m9WvObI1 z@>a9+=SP2Zgn5OxwyH;)C+uYsXPs2&V`6!$;=V!8+#3zg^f>v1kCf(`et($ZeW&2# zLGJ^ais2E>&nH#)Sts1-aVc@|GE|zqc5PLIh=kUKggu8%7XG;X`e4@g*ADA_=0yFh z6}FOn<aaZO=h2IMmZnm-xB1?RlvjST{KwnEpLfE0r@kyxWe}Yy;uG;;-?pQZW*4nq z)s=4=<FftO^-~UQ>V0{$Ma%yg>eQ`uxcBzIl$B}Q^j}Gbixn0NOyxb=ck1<<e~DWi zT)l#XCcmC>L3C+PfSL09*gpBsv0olK$zS?2f5xUSMNT$MT|Dz^ra$7wmH-nyIR!cy z85jha85jhR6JTON!D#wJ%%9Ae<d}ckfT#66^DlXxyV90-19%$OBzc5?*^tOMX<NGQ zvgC!ER65p7d~>~j+r=w4t6ud!sjm3t;(1rbD*nrai$?=GpDq$%Ws%Mn|2DPa#@*OD zQPxSETR$2)sP;8Ha*MACnk^%oQq;ie&a%eEdrDZ)qTGeXI}$86Tsm?`@{!s5)Euew zhi12?^D&)Vy=l+10tF7!E7M<!Xe>Jt>Mzf`cCqY_4~6AEJZr9QpR#9j+mC6#mT$I9 z%31NFZo`AAbGo};ZcB*Yw0N=J)H&O;vU}T2*<T;4U{Sr#_Db%|hAq8?l3QP8UTt1F zgS(`@A~$@|k6Zz^IbL7Xtc5!^vfaMYv(?`t(*Ee59$kiolX7OXC0l+!m;K#U-<?_f z`7y-=ro@Lbh0jkg|I0E{yKH^rwB^SW1|R(_Za3yz6`wPYo9X{cy;^p6%Atv^!Cw!z zrq_7vuE^$yyIyG*a{F4!il3~0ukCoR*ybDkR!q4&YeW05tDpB9*!+9^<?z=}Re^PS zKV#F4Rqy}X^bekV8JTn$aINe>L>PpHWt~TWH>v?!QRbi_+92d~W(EexOq*U&YECh_ zM$~yZh!zOBhYdVn3DFB`Xrj#0p__v~SpzWxM84+$n}ciux@PpDYmhbwmg50y#u&&( zHvxU56`~tNZsY}<03LBg8h1rE0KKmW(h9)_qF@7x;XOxmt?2D?h&B-UUmUCz<aKcK z9J!T_ZVGB83epY3Gh`STFsf8^?Wjc{Oe=_%lt<DIEf52|S=m5xTnt<c4NMFSHxxlU E0RC=5@&Et; literal 0 HcmV?d00001 diff --git a/data/raw/muxu.xls b/data/raw/muxu.xls new file mode 100644 index 0000000000000000000000000000000000000000..92daaf7fa2270456fb02803ef8013c0c52c04fe5 GIT binary patch literal 27648 zcmca`Uhu)fjZzO8(10BSGsD0CoD6J8;*1Oo3??wX00YCn|NsAkxG>t7fq~)wC>T;9 zz{w%Nz`(}Ju+8f%`#}aK1~v``hKCGH3>#QLJO+kG3=9l03<V6L3=BSrdFiE@IXQ`? zxv4q|j(H{dnR%%SGy+awHzYDLF)%P3U}RvhWn^MtWSGOi#30BZ$p8`+0+YgEay?kx z9x!RkAOuR33=IVb^)0|GMurF`2e7(cFsTG4`4~VZZU^hR1r{-6kYl*Rz`(%4Faea* znHYrF8J!st8FCmh8Il-^7%~|Kj|we@3`Pb9Gq_86(Ojy);KPu~kj{|7Py%+f4kIIj z8N(h>z~S&03qcik7zmjG3Ozx%n{b7m2tgG#1Xb7(P{GJxkJq;#znBnE0V?hArvyd@ zPJ(7|5pXFJgE#>dj0_wEZQ;aeh8&9w11p0#1A~%^lCqAnvO&D2b&Lj#6Qe1|qR7C; zAcG<w9h909i>gS8ft^7Xp-9gF)f_zo1CUYb3>*wf=&I06gDN#=&|)xRV2B4fMN0wZ zGAK7j11u1uX$=)n(p8EFaU9|`<yiC>G#NBOT9Nf>DIjaoQb5sR&!EF#1=69Uq6F0h z@d4C$s8+DkkgT+~M_6jcpv_<iG6GFM++LVwu+<2i4;VzirO?;^3Sb&S98ee)2ZaRn z!-0_jDV&)g=Cd*|Fo45<3&>6e1_qY-;PA&3Uynn)6NmUL9OAQah_AvSz6OUlF855s zp?)<Eaa`u(Qoj+0Ika}qPO!bA81c0ahxk?;;@feEZ^j`G@h_&m5chCnxMwZc9DbPi z6hXo1+|#)kSRnBt2p3OF+b_Rgo`D789##fzNILuhO@|Dy^r?wMTnC4^7FKa#n7v<6 z&F6xN!_z+`9kRp3uP_8KFfhn7K;6TEDlP^S=R}ArFfcSoV2GpH%L`Kv4<AVS5rBzb zVMt(LV32~D!vhnCyAv&a&Jh$8;}+v)V1eX2KA8F`f`X86hJ-&W11Hw-0rh6U<p7#H zK_xU;{0c)N1A`qr96&8eusD+X8V(VL8c+bif|$XMAulXuN-c*jLoJFpNC6WAFM|jJ zGXnzys7VQ`B(38$wD=fU8JI9NfRvD=0n(f!!3K~+NV0*2fq{V}4WJ^GBn_ZAAxQ%$ z(UPPA6e%QW03~yhG=TCKNg6-}3P~D3IfEn(psYud22friNdu_xBT0icscA`vR1LbM zYS1H9gB7V7tWh+8@*i>`z*x)Sh*oMCGB7YO!%Gcx4NM%+;slg2t>ZNu;x!c%Fby#x z#*qK^_zeM#=MZ#>kuj<v=!GC?zzC5_Fua55bOX54L8Ti*BEx^=0Apfd5CM1AkG((l zAGiNCPz;&Fz=qwB|HtteVvcG^I1ZQme?14uB@@x2)0BaMfgK#3An(}Qm*H^9nVE1y z*r8smudn|P8(slLJIDZJF-|0Ppt=Txxv+@Ua-ciP63tQe*d6uh(<cl^+1uM=I0{(| z<|vRD$Uz_sa}=@|rlT}b9QCUfM{r|v6mqCg7E<WB(F`RGU^oh9+5ZMcct~OTObhNa zQ2Gaj2-YOMbm>yKA+R(63NrLGfGkFQaHD5TTX?|)N>~3G(1IJ&7ofp3NJ<9z0@PN) z>WlyX&tUjs)22-ri3V8=<_nM*D0n~^mS~X0Fq=&JD82xt0~ANqa)hGgM;lP@51b#d z`T`X0APM9`4HU^RF<4w+mRm~jlmb!<@&SrVK;;%l0@+|pLqH8<M3IX%jF}ly)9@Q2 zhT;+&hMe)iZ-@*rhM*Vz=($H3#Solf0%|kjF3UlkDI&rIzaio%F2NZl_zjUI#t<W8 z{D!EY7=klQK+R&@VFK!y5D_N$4Us@`3C=LVZ-^W*h8P*+H$)Z15S(EGYWd?16Hw=u zh%mu#h$M<jaE1wfL*$7u#K;)GA!;aw;0zN`=LUC}fI7BBgb98_q)=RfGfeOs0vePf z=n^Ai{D!EbxCCdIfckH^!vr+QL(nCuY4{D1MsW$wFu`w#A~A*-86z7qnMsI2fPt5R z!N@>WL07>sIXN}2#BdUaFT*5I(1C&kG}gc(_n()+h{1qCl|g|)mqCHSks+BOnIV-S z4?G_P>R~W4@G!8mq^8|`_~}0@hz;@t1H*q+(AWgX8bNexj4-WH!eb4j4+6G^H8t(e zlWW+m5kj}d7}FXxJk~&ZDPU{ZQq$hPeT&^1VRUOuFs;$TV-2Vu!^{A-hCMay|EDw9 ztr0=D#uU>UJv`Qcx(iTiI8xK@zW$Eg8c}p>%rLDn!eb4jSqt_JXKLD`_qVWHBL=d@ z1b<4C|If={LPSbC<Fn=1bL`fLqg#U|r77dF205jj@%eFjEp}@p(5=Ce($w)-gPhXN z_}spI8@n}<=+<CKY1(+KK~8CBeEvV1iQO70bZfArG<`hQAg8o5K6|gf!)}c<x;0o* znlT=0kW<<jpJNZUV7EpFWQ`gAl&0{Xm%)sPlxAeyw-QI($f8?=C8eq0u?9J%85w`s z(1P7Ja_H7zNog8*tU*p`M#d*kp2TjAJi0YlQko7PYmif#k@5eVY1pk%K(_`<N;AM? z4RT5|GM;_t8Fp(F(XGLf(oFDJgPhWgj8|Uh!)6T+185?f0bXx9qSc$S|JiZYl?)6n zMMofpLRUIL>q;I5Wd?QzsG&xaIM5AML^BjQ6)`Zptu@4Ms0zYRV@yL;(F{dSGYkx% zu>^3fj_y)bgrO#whH9c2ikw0i82%q%!0u8tgrTOGhU%gjiku!87@}ui#crrN!ca3z zLk-akMUH(2hJv;3*bL=h&;ZTFI3=b`;;=-Gc}D5~%nVKpi3};=Mj5DTWCab196tv( z2*d^rBSK?bfI*Xin}NYOF(<h+C$S_mKW`F8BdUQ+V*j}rAj^l68A=&)7!nyu7&001 z8S=oRK`;wI!&#t+0$BhWUIJUd#=yq_5&(^7Ffy>C@HkL-oG84>O#BR53|tHh&Kar6 z*$U37IXRO!>e1XK{-2A%nIVHAl_8lSn?ZrWnIV-Sham^WRalb`%vGTACpNgNAUv3> zAUv3>AiT*;EDYKW%>O5GD4;rpmH9s_gEK=uLoP!uXyT25fdQmer-AMNBn}NUwQT=6 zk<=<ML^BvL#Dd$qAU(PbEdM8QsG;d$`Om@N%uvcu#83oo`zA9~!gc61u>YULp@*h} z{XY+e4ph5D8T1(h7#LhD3UU(j5=-)nDiuOfD@rDDxT6}yWcpu#!Ihzcp#U8BdEofZ zXDDK*WKduTVMt}DU?>3(Ucq7?6xra|pUlL`V8Fn_z~G*rpE8NV8r57zssAht?hN@1 z`DjTMG$IZ&3p8vFHH(kIkb#qd!6P*>B{MHw!4Sit%mV*889W$L!D&B}A&()QK>@VV zg@Ks?WE?v)1A|LQ7NlPQN{Z|(u!3+h6CZ;Svf)OUh6`db98&(_F#Ny$q8$(avoe4T zH%2zx7}IbeEQUkMK^%rdng&c@!%dJ4H^C?nn1!(z4k{0sFoR_>6BmOi11kfAXI?>R z$s~?a)F@*T`OnJW$&klTz)%W~barq@0+hr5pMf;*Ky1+D065ZM84Z*oNXz~L3}&Em z$tN=}J2eHG|Gm*1Bk`Xbyf`qAA)6tUAqAxbfJ8aSG4Qeo6y+cR&=5Bp!(=8N26Irs z?3Y?vQk0m3Q7|*g{AXwIV@L)2v<SR<64u)UoAP=NqC5?U_cmD=7#bM<PvS5}bt@yo ze>Mg`22id66%e2@3L5&TomEZ-3s8RX%P&ct#F2z*1{2qR7O)v545<tZ46KNt|9|E` zETln$%!rVN@L)w6ga<3qAiT*;+zghW#nJwyCEyq@N3&G)KO2KTI1Di39J$B`m5?ZL z4iZ4P1j2*41j2*41i}LqepU^v|0i*%pgMt-^*=j907EK65xnCMD*Hf5)f$vkLo!Qp zQYUd3qUvJi|If-0!jQ>O!jJ<FFKBop6#|SPBW%D%<d=X_0NU6Cvn1Gve1;Ne?6W{J z404o#!W~wQKzs}1fs&XYgDnFO14DRXQ68wsf)uazs19Y~{?Ee@&XCAZ#E{1TE48o{ ztDs689KwAZ5e$7ugCr9e7#IW@1Q|jY5*d=fz71w5VW<SNe8D**lcAI$m%)f3fFYBi zi~-~!gbL*4wPp+n;AO<E415eFpa}*BHiqDg)YKBgSsW4!vykkBgaKqR$YKrwhQ&y7 zpxEhQU}0cnkY_l^$iZ-sk%7T;Hxt7j1|LRW1_lPuXb}?wgQp8S14CM3QEq%@3I_v2 zT4riaN_=JtI|D;<eraAwQ6&chLqSn~S!P~xDhmTca%M>-h$_l2U}In?PE9P%&l6-| zNH0pv%!{uCX;#Tj*UL!cff`zzS(U1ik;u)!ke8U78V|B4wMdeIAtygCJ+q`VB~>LQ zH90dkF(*DHHN7Y`wOE3I0n`G*CdI_SP+`c(z~IQlz~E-c&A{N7T9T1kl#`g3Qq0A` z;GUY30^xCiB0D`56x*qJtPBjfiJ4WYTnr3e`5Ac%!MT|w86cyKK!h<P1A`Mc14D3P zPEMtQSAJe<F-X`1M3^!%FgSBDFgWEWDFhef=cT8DM9n~iImiiM11$bCXfbdwtk-1# zr7?!Dx(p0K3=9m}dJGIF7#J9~=`k<}Gcqu|(_>)pW@KPU<`82@Mv5iS<O(RJm>8yU zaPm$=lHu)PVhHeNW)WdvV*m*&GQ7OUzaO+iLWYTf0W>-t?VO)ilA2c%A5vM6S{$oa zk(;w-;%UFb1_G|%J^nO5)%BXQ$$>-lWz+G#<BMtzoS3O1cJjui<)Od7`&<ffTA?78 zqjS^v`#kH%MSJDhw;rA`XSvt<Pfpq@uch5%m)<w`zqvN7vr$UICUGhMj4!+P?a?rA zZ}9A#6|}VaSwXtbCfmLVMe-AtT>53fblS8`t>JG2-`Uc@I})!uCt5r{>3sbu^R^AL z5w`E1{+yxmXu_6V_GW%-yqr0jHy@aE<;{wpZYRZ@^iJ(m|Ejk{yK~{$$U`ANB{#f? zOEtY{y5gR~C+;4h!y9-y<IkrnNa{Y@Z#}Q`&FyQEIeYH*ZR)Eri#y|Y@V`jHSNj*j zm(SEk*E4~(iZG*v*V1dtpC4dgU@&830M%j)@kOaQ#rk?6vZeog)?ovl_V=xOn=jS| zUOFtHTBg5rg0ueOT84)j^%+Xix20d-_CL<XwVR`5$@}jsZU3$7{`qjqa|N+ktJn@d za<)I6@vzKxe|^)*)+NhTIif2H+7?&e-WtCr;NoJNvxd@3J+>K472@x>>Axpvom0xi zlLD8#TyL8ssnjo@SQvj@e#Sh>2WCE(zlsH2H&0mCzvATo&4&%n%=153zh395D@&gF z6rao|Nefr#KMvkIVMXeyD{>dM?)Z7_FC#XOW>-!VYi3|z=we_1t-NL^$w<vj)kh+I z6Y~<&Q;R?;gy)Pe*I@+#hljNb%O~74Pu{KJnzZTky4~Lxoaab69$#>2{_X2eA2*wy zyDxHewl{C2&K!2$uGF)79os+D=q#6Bamz#Is7}tq<D$=AR!E%O(kp!5^F(oAt@70g zt!ua5Rk*fQ`>Nu5ox5>6RkCcm-NRp8{;v7R^|iJd7n22c#~&0I*w@I;z;H}~fdP~o zkem(if+5J+Zz9ijOS_Bh*MCx<@k{UZ>S+1q)W!b(ZzOL=Y;x{14f2eg_ew7?bk;_L zwuJfLHf@-|yOX8pw4#o*hl1MlKZb0xRerm_S^X-kd}_$1bhgbY(<KjHi2b!Ue1FyZ ztJVIY=K1#BE6dw<D(JlYaaCIM{^65T&U$Qm@uOdTanR}SEiNZ{0;fIb+kfoP{@T}* z_up}0H#%ha&B0EPeZ^z<AU*z5sfMkIDtQ}J{T}SLd~7)B!zuBnlJ3tJm@j)8we*t4 z@eY-J(-%lwv72%Jq)4^!(o6fd2HGw8Fw0{h+p;PBQ?qhBGv2(}vPvLH_t%u_^p3Kc z8R^H)ssEVy#kQ-ZMc=B<_5aVO(dMULC<req>R+R1eyM(DjGBS<+Lx9;AF0eY;W)J5 z;@Pj+3{!XSNG!e~VOCTt)vzo$BJ<SIYd6<?ZRW{txa8jc&apUPeN=^WeQkTOd5Aps zddHRj64>pfz8>rni)#+5+0o2j{r290YwH~~_ib#>58KteAoJz*1s#)~l(5HgN`L*A zw$(D_+>3?6_xPFvH&4^l4-nw3ELo}fOlRWnBwaUuNqI&GM$JhoAD>ipKF{C^yyHG) zPQfGDiLNY-+g&(?4PsV?x=pqE6F%R;KC^!ja}Q7EQSKQXa}|#s7Y_EDw(0SG*0uW> zD}Vp`I(xeP-pZe!XNRx9x93l|+4iv1ld_9a5>Ng8c=!JQzh}koy|=IXeSG?VkLCA{ zaSQ){H^092`|0)XgZZ`fd_I0#Er0Ltm&3Q$|NnOTd%V?_a!V`2hr&Y15APm-eotBK zFq7<`kDUAG98qyPGRZ;kMcE3uQzw}2#Vfx$^mRk-A$6HeJ=_)PU#~o}>igj(bo|Ws zMK_q_jNPhxta%Ik&!$FHZ1NI&ck<_{BeqU2&X|_Vt4?cfU#+?%{87-&{td356<s@B zxaTCiT>1Wn=CQu3wSFC`*OQH&`fS+w#ZEnrUvRmn+LI$(OPpRzIPzfn)^3Fzw>F*q zD7^moitMY*HEGI;(F(5=><?ZsdMvwTl1ACeISXdYVzYNLDw-&>{^yk=ysa$$`Ua`@ z3ffu=PTD-!5O?_HcZI_{?$-HS-`M+P(wto-a*D@%o;1w;rYxc#d&AIEN%;3li&g>G zd(LXHAClAF3w-6Pb2u4SZm)Qw=zF{H!DNqhYD<+Iu9$1T`|wou%$gsv>tD(whEIF> z>Bgybv3_$dUX0dVbHVP=j77yJhi_E>S=sE^ud6#-e93#a388!&wKu#|vJSgi-Nd@Z zbJhBrz7ySDA{4EUac^rs|FKH@<C;Jr<$eC2Yd`GD{Hz#anXXmquzb1N2fM~^{Ev=i zPGM&%kvO<F{Pkgu$gSRcxy$#g+xc{V_Z!0-)BSa&#BWSjXx7(d?z}sta(%8^N0hJ# zqp{<KgdLZ3J0jlcyM)AEP10U|=gz$?t<nn&H~pGBLq$HPZriPSANF7VW>I$dcxJ_& zuC-H2Pllz(ds*JeD){#BAN%cDj8Urmh9Q&g&MCcnHKyG3-~Zb1&H4KA-;I7-^}UWe z)-LyW*YuNqVRnA{!CUk%++1<F@5>bT`9F_T_t!jYxwJ7RxJKp6%KnCUlg)xWT9&nU zIzKBPNM-3?b><+8Sl+>j(>_{Eo-ZibtGuOU@xoQ>r|7sWt5V#fDqN7Xq<!u#(Ru40 zC7XHsTdfW)T{SZ-tTDwfj`MS&#nMTdyDsYZy)XJY>#fuLO?ey|H&0J6P}}TzMOJ0j zY{yiU+GX5s#S<=Q|JKmC#V4k_wC?dLSuqFQ?_7VLe0$Jdb)27BfKkz+r}<mWvOiOH zRXvV<d#3;8%i3dGlo+Dk__B!1%hO(0J$-rD^tM~9Hn|fG4xLq7sJmF&M(`m2LmjKD zFI+QXu3dk7XsMWODwnN%OPJ&0n^p4NtA9y7*;dT#d&b96`2C*8=C2f6&WPw*|C90S zd1x5t^oHZ>YVM1yF}r<K{muV`8l9fRZRw!o#k+2@-z?rFCByF;o!0xJ>fW)<dLgrD zXK3~Wfqz?`+?o*fFXzdDm)j<Ptm#{Rsbsg|BAvPWoZ9@0TR%tZ)|GCUl%iem-D1|< zYW0$%@`@ZAH$;ZA%@Jf2XTN%7mNAogVx^Myqx{tdshQXJbX_u>(x<$uCm<~FOv=1N zrCR4-zSw?;?O61-G=mk1lhz2#(+O!b?ad0kyeDgk%(t++{M?$SB|SH<m>Cup7<zf` z(KV80sR^g2g=>FeTix5KxO}Flo$uY-n%noDERC*pic>xCVCP?%(C<vF2N#6-qy+}| zlwaO(>gLTAvDRkXe;BrQi*PExHaE-K{$|>Bj^`x?6-!U?7u^ozcK+-ezfyPY`ZH&v zbIU%7Oghf6>yGGDr`<y5$}F4OGS*a2GRZw4R;jjmEu;UvuE~A>_Xw3W*Xh2QuC=xx zg6oyz(kooMzXqA}yuO+kv6iQ5t9U>}ib(k3FT3hk!X--P#@>4Uac=kTiem5L<WrA~ zE{lIo^^J+Y6Q2EB|D7$9=kv<NW?9#7?{!-l`@YZX;DYN~TYvY)WnOq;&c4{uK4+`D ze$9KI=H|S;@+NI-gBIU<KfTG#^<w^3@3RZSUbCIe%a$mKPs!z3W8*77Q}@80S=ZNs z8l3<Cqc^R1FHV@hje&tdnvsD4G>eX0X+xS>U=gHt2ejGc%XP>=q~&{4IY)l}<y`_3 z)$Vp&SbK%J<%ub;nY~Kjo435{bU5_d6Zg%`+&z8nN#SIpdxjaSq&TczPA!~uFn7nB zK8GMNzs{T1DH{@+`tr@+il$HW*6>wR3DVFroKeK5?lpPNg|3jF4eKlx_lG@ETD;8h z#O@0o-%D#3e4Nc#b5vz-l0fxU2FYmwv;J@XYa!K{t}ibZ`!CS3_+~4s#-d|q9w^UV z<->9+de^Qa_rZhRAg?nr=`!GMhlASh2#lpw9^j2?0D8j|q1k|u0lArqt`WVVhtMd= z#DKTihi(#js|#V$HEbq<+hFKspeF)^8Qol{uECQe0=!v4i+LDW8CV%&85tPbKx^3< zW^)KI%m(#(pk{(Px;-2M3_VC<ASuvFdB~n)0hB$*+)SXQDj*dMpkX<NE?WkM9v21% zUhpnm9tI|c6b61UTM)ebSAc<o;m^vCmwb}~?Bl`WT3~T)FxdnaX#jV_AZDsEftI`J zFg7wUF&tz8*(J)rz|ag<-2x`n7&sWxz~GrZXd5G#nfA<H18&L-drc(vLeNG>2GG7* zMg~R(76wrF&ln6Y+k+|N%l7>H=WwVo%s~qSh>IYk4QRL$%nW5<0-4SPT89q_1CZ|- z7&w?fivmGx9Z;Y$Ffj6i#XuZh22?T7N=|e!(4s?hF;G~bi-A_Hp^Jec8eI&uY6)Eo z<UwSyzo0n?1_lPu76AqZ2#=A0AG`+!!eau*CQOc*fgdz4169ewzz-VDgYsAz_(5Y$ zP#zltKWNwu%428X2aQESc^nM<pb&)eI2riCDG6d97Xv@oO%NV813$Qf3E}ZD@Pmf} zKs->W@`ENK!KQ#>mY)F>BM>=|t3iH(@IbBx#SMf9ay4kFB7_HWHE59@ga>jpXaHa@ zg9HO;xC>;)2YY@7h8xQqAZ$*keIR#&QYSML9|I!;$eqt(zcy5^5OaWtfZQs+{UXD? zf1VB?vq3S=589m%)(_gzj&Ls%1G0OW7?9n|#DMHx&>$Z_c;7t4Ol0>mF@Qpufq?<! z(t9gFB*O=Lh<lkBklo9K;a(;NboYW%BR{y?2(k&Z(uE(iHyP}DW)$}_qqvtD#l4_m zYJ_{48Iaw}40SI!e5D}a`vdA;X0UrDKr?(yd<@KB_ZkPiUpZ$PBz&2{?(N*@v)=_8 zzM!1K&j4B#2y!oIa1P;K78LigptzR>#l4^rP=tF~P~6J`buT!43ub}C7v^3T6!)@V zxR(XPy`Wso586-%4pGoxH$MYtts2PntSIhfMR6}HihDt2JHoxJDDGv2x)&V2lU9M< z`v4lgtYG(omku-WF|dN&Yj#3KIejA}d_l7^{0!4x=xpZr4@tkE5{4hNu^jAPHU>~B z0dp@KihJ2m+{=dIUN#2g{KkgjUeHV>D35`|chPDT_p+h5mkq<cY~b)kb}y*(<7WUZ zdIg0jXdVX<zU*N4f);;5<dE|lJBoYRQQXUp;$C*-@U>cq;$C(X_p)QSmmR~sppuy% zydxUqCJvPF<v?*S2TJ&IptzR<#l0LT?&W~G7aYD9CZV{O1I4`@81Cgjaqo2oF@{%c z3=9hxwlQo#fp$zn4BJ3Sg~0(_S28eg{AXb3<`7`$26+%F#=yY!Ulv-EfEGc5kMdyP zN#_t}NJmn}#J~tTqJ`Oo0bC=%HIGQfzyF|60M#O>-Uro#4_fDc`stq`uzDf~gSPtS zo1mRppy63i9stRM;sR7YdO!l2A%lSdT!(ZpK!(;AFo0Su3=C@+Aj97~z|-9f3{O}X zz&Qw14r4u3hJiSFP~GdBnOu}#oS#;r;98NKngdB64?TWP^S{Ojav#H0jo4NG*BC+d z4yq4Fd7~jP8UmvsFd71*Aut*OqaiRF0;3@?8UmvsFd71*Auw`60AK%~>FU|5bM&MI zCQs*J(AN364%GiY!T_0@0QLVtM}~n$)*=`fz+<?eofDwRAkZ>hP~RN1yd5;^UBdub zDAWQS59oo856nj!4*-oNfI=S9R~G<}(1Z9OGqKTf0+5B2pz#REa!Upt0nl+(kl_Q6 zw($I->|)SL%wq8T0?5@MlSc7q2#kinXb6mkz-S1JhQMeDjE2By2#kinXb6mkz-S1J ztPlY8h(WXepguRK{|#C{1X^eT>QjT(On}aw0<GBub*4dQ5QB~t2Cat!_4`46e9($F z&^k9zKOeMi4YaBfv?&O5-X3UvAGC4`bbc6UO%G^YkSug=AGAsabR?J}0|R&+CFl+z z(8+k9li)xnpMlmWYcMc?u6bYp9iIu3*I{5_0G;p)I<!}xfq?-u9sxQn7qp!mB>(sS z|NkHww7O#y6BhzO;DZE87!(*>!DoXNK~G2^HW!F7u!2|9fNTS$BLxl+$-n?wC&IwM z;D*f{P#nR;!@--ovl)`W?#PA-6HN;-Fv0Byxq(=XOs)`>tPH{6O}?P*<siR<cIl^q z-2@Ur9M}O-jY(=SFfoGi5Gel;Yd`2zOwbV$Ak$bGTp02hKnH~6f={R@A<;b+WcVL+ I4mZqx0L?}_Z~y=R literal 0 HcmV?d00001 diff --git a/scripts/0_build_project.R b/scripts/0_build_project.R index 7a9935e..8abc29b 100644 --- a/scripts/0_build_project.R +++ b/scripts/0_build_project.R @@ -3,7 +3,7 @@ ### Install required packages if necessary -required_packages <- c("tidyverse", "readxl", "openxlsx", "emmeans") +required_packages <- c("ggplot2", "readxl", "openxlsx", "emmeans") for (package in required_packages) { if(package %in% rownames(installed.packages()) == FALSE){ diff --git a/scripts/1_extract_metadata.R b/scripts/1_extract_metadata.R new file mode 100644 index 0000000..712adc9 --- /dev/null +++ b/scripts/1_extract_metadata.R @@ -0,0 +1,120 @@ + +library(openxlsx) +library(readxl) + +field <- c("Data ID", "Official title of the dataset", "Project name", + "Description of project", "Author", "Author ID(ORCID)", + "Contributor(s)", "Subject matter of research/Vocabulary", "Data origin", "Funder(s) or sponsor(s) of project", + "creation date (m/d/yyyy)", "Embargo end date", "Citation", + "keywords (AGROVOC)", "Country(ies) covered", "Point longitude coord. in Dec. Degrees", + "Agro-Ecological Zone(s)(FAO) covered", "Years covered by data", "Crops covered by data", + "Animals covered by data", "Start date of data collection", "End date of data collection", + "License (default=CC-BY)", "Permission given by email", "Rights", "Contact email") + +field_name <- c("data.id", "data.title", "project.name", "project.description", + "author", "orcid", "contributors", "subject.research", + "data.origin", "donor", "date.creation", "date.embargo", + "citation", "keywords.agrovoc", "countries", "longitude", + "aez", "years", "crops", "animals", + "date.collect.start", "date.collect.end", "licence", "permission", + "rights", "contact.mail") + +meta_data <- data.frame(field = field, + field_name = field_name, + values = NA, + stringsAsFactors = FALSE) + +raw_data_files <- list.files("./data/raw", full.names = TRUE) + +for (file in raw_data_files) { + + # file = raw_data_files[3] + + file_extension <- regmatches(file, regexpr("(?<=\\.)[a-z]+$", file, perl = TRUE)) + + + if(file_extension == "csv"){ + file_name <- gsub("\\.csv$", "", basename(file)) + sheets <- "data" + } + + if(file_extension %in% c("xls", "xlsx")){ + file_name <- gsub("\\.xls$|\\.xlsx$", "", basename(file)) + sheets <- excel_sheets(file) + } + + + meta_wb_name <- paste0(file_name, "_metadata") + meta_wb <- createWorkbook(meta_wb_name) + + variable_col <- c() + unit_col <- c() + sheet_col <- c() + + for (sheet in sheets) { + + if(file_extension == "csv"){ + dat <- read.csv(file) + } + + if(file_extension %in% c("xls", "xlsx")){ + dat <- read_excel(file, sheet = sheet) + } + + if(is.null(dat)) next + + header <- strsplit(names(dat), "\\$") + variables <- sapply(header, `[`, 1) + units <- sapply(header, `[`, 2) + + + variable_col <- c(variable_col, variables) + unit_col <- c(unit_col, units) + sheet_col <- c(sheet_col, rep(sheet, length(variables))) + + addWorksheet(meta_wb, sheet) + writeData(meta_wb, sheet, dat) + + } + + var_definitions <- data.frame("workbook" = file_name, + "sheet" = sheet_col, + "variable"= variable_col, + "unit"= unit_col, + "definition" = NA, + "unique identifier" = 0, + "personal information" = 0, + stringsAsFactors = FALSE) + + file_meta_data <- file.path("./data/processed/", paste0(meta_wb_name, ".xlsx")) + + if(file.exists(file_meta_data)){ + + current_var_definitions <- read_excel(file_meta_data, sheet = "variable definitions") + current_meta_data <- read_excel(file_meta_data, sheet = "meta data") + + var_definitions <- merge(current_var_definitions, var_definitions, all.x = TRUE) + meta_data <- merge(current_meta_data, meta_data, all.x = TRUE) + + var_definitions <- var_definitions[match(variable_col, var_definitions$variable), ] + meta_data <- meta_data[match(field, meta_data$field), ] + + } + + + ##now add metadata to metadata workbook + addWorksheet(meta_wb, "meta data") + addWorksheet(meta_wb, "variable definitions") + + writeData(meta_wb, "meta data", meta_data) + writeData(meta_wb, "variable definitions", var_definitions) + + ##now save workbook + saveWorkbook(wb = meta_wb, + file = file_meta_data, + overwrite = TRUE) ##later do not overwrite but make sure merged/aggregated + +} + + + diff --git a/scripts/1_data_cleaning.R b/scripts/2_data_cleaning.R similarity index 95% rename from scripts/1_data_cleaning.R rename to scripts/2_data_cleaning.R index c09b940..0da0f4d 100644 --- a/scripts/1_data_cleaning.R +++ b/scripts/2_data_cleaning.R @@ -6,7 +6,7 @@ library(readxl) library(openxlsx) # Load project functions -source("./scripts/MiRAE_funcs.R") +source("./scripts/funcs.R") ##### Inputs --------------------------------------------------------------------------------------- @@ -72,6 +72,7 @@ sheet_data <- list(d, metadata, variable_definition) walk(sheet_names, ~ addWorksheet(wb, sheetName = .x)) walk2(sheet_names, sheet_data, ~ writeData(wb, sheet = .x, x = .y)) +# mapply(function(name, data) writeData(wb, name, sheet), sheet_names, sheet_data) saveWorkbook(wb = wb, file = "./data/processed/fertilizer_trial_WUR_cleaned.xlsx", diff --git a/scripts/2_analysis.R b/scripts/3_analysis.R similarity index 96% rename from scripts/2_analysis.R rename to scripts/3_analysis.R index 8a6effa..c4cda70 100644 --- a/scripts/2_analysis.R +++ b/scripts/3_analysis.R @@ -1,7 +1,7 @@ ##### Minimal Reproducible Analysis Example ##### 3 - analysis -library(tidyverse) +library(ggplot2) library(readxl) library(emmeans) @@ -36,7 +36,7 @@ ggsave(filename = "./results/figures/2.summary_plot.png", fit <- lm(yield_tha ~ farm + fertilizer, data = d) ## Get an ANOVA table -av=anova(fit) +av <- anova(fit) ## Calculate Estimated Marginal Means, aka adjusted means: @@ -45,7 +45,7 @@ av=anova(fit) emms <- emmeans(fit, ~ farm + fertilizer) ##open text file to write results to -sink(file="results/raw/2.various_stats.txt") +sink(file ="results/raw/2.various_stats.txt") print("anova table:") print(av) print("em means table:") diff --git a/scripts/Master_Script.R b/scripts/Master_Script.R deleted file mode 100644 index 1d653e6..0000000 --- a/scripts/Master_Script.R +++ /dev/null @@ -1,271 +0,0 @@ -##install all required packages - -# Install required packages if necessary -required_packages <- c("openxlsx","car", "emmeans","readr") - -for (package in required_packages) { - if(package %in% rownames(installed.packages()) == FALSE){ - install.packages(package, dependencies = TRUE) - } -} - - - - -##required packages -library(openxlsx) - -###add ontology read -##add final metadata, implement automated inference -##make sure metdata and variable definitions are not overwritten if present!!!!! - -### -###set working directory -wd=getwd() -wd=gsub("scripts","", wd) -setwd(wd) - - -##download files if not present -file.list.wd.initial= list.files(wd) - -if(sum(grepl("example_data", file.list.wd.initial))==0) download.file("https://git.wur.nl/langu001/PPS_data_management/-/raw/master/example_data.csv", destfile="example_data.csv", method = "curl") -if(sum(grepl("Documentation", file.list.wd.initial))==0) download.file("https://git.wur.nl/langu001/PPS_data_management/-/raw/master/Documentation.docx?inline=false","Documentation.docx", method = "curl") -if(sum(grepl("Documentation", file.list.wd.initial))==0) download.file("https://git.wur.nl/langu001/PPS_data_management/-/raw/master/Documentation.pdf?inline=false","Documentation.pdf", method = "curl") -if(sum(grepl("Readme", file.list.wd.initial))==0) download.file("https://git.wur.nl/langu001/PPS_data_management/-/raw/master/README.txt","README.txt", method = "curl") -if(sum(grepl("0_build_project", file.list.wd.initial))==0) download.file("https://git.wur.nl/langu001/PPS_data_management/-/raw/master/0_build_project.R", destfile="0_build_project.R", method = "curl") -if(sum(grepl("1_data_cleaning", file.list.wd.initial))==0) download.file("https://git.wur.nl/langu001/PPS_data_management/-/raw/master/1_data_cleaning.R", destfile="1_data_cleaning.R", method = "curl") -if(sum(grepl("2_analysis.R", file.list.wd.initial))==0) download.file("https://git.wur.nl/langu001/PPS_data_management/-/raw/master/2_analysis.R", destfile="2_analysis.R", method = "curl") -if(sum(grepl("MiRAE_funcs", file.list.wd.initial))==0) download.file("https://git.wur.nl/langu001/PPS_data_management/-/raw/master/MiRAE_funcs.R", destfile="MiRAE_funcs.R", method = "curl") -if(sum(grepl("RawDataConvertScript", file.list.wd.initial))==0) download.file("https://git.wur.nl/langu001/PPS_data_management/-/raw/master/RawDataConvertScript.R", destfile="RawDataConvertScript.R", method = "curl") -if(sum(grepl("Meststof proef WUR.csv", file.list.wd.initial))==0) download.file("https://git.wur.nl/langu001/PPS_data_management/-/raw/master/Meststof%20proef%20WUR.csv", destfile="Meststof proef WUR.csv", method = "curl") -#if(sum(grepl("Meststof proef WUR_metadata.xlsx", file.list.wd.initial))==0 download.file("https://github.com/ALanguillaume/MiRAE/blob/master/data/raw/Meststof%20proef%20WUR_metadata.xlsx?raw=true", destfile="Meststof proef WUR_metadata.xlsx") - - - - - -####make directories for data -path.data.raw=paste(getwd(),"/data/raw/",sep="") -path.data.proc=paste(getwd(),"/data/processed/",sep="") -path.data.op=paste(getwd(),"/data/definitions_protocols/",sep="") - -path.writing=paste(getwd(),"/writing/",sep="") -path.results.raw=paste(getwd(),"/results/raw",sep="") -path.results.tab=paste(getwd(),"/results/tables",sep="") -path.results.im=paste(getwd(),"/results/figures",sep="") -path.results.rep=paste(getwd(),"/results/reports",sep="") - -path.scripts=paste(getwd(),"/scripts/",sep="") - - -dir.create(path.data.raw, showWarnings=F, recursive=T) -dir.create(path.data.proc, showWarnings=F, recursive=T) -dir.create(path.data.op, showWarnings=F, recursive=T) -dir.create(path.writing, showWarnings=F, recursive=T) -dir.create(path.results.raw, showWarnings=F, recursive=T) -dir.create(path.results.tab, showWarnings=F, recursive=T) -dir.create(path.results.im, showWarnings=F, recursive=T) -dir.create(path.results.rep, showWarnings=F, recursive=T) -dir.create(path.scripts, showWarnings=F, recursive=T) - - -####check presence of data files and list -file.list.wd= list.files(wd) -file.list.wd = file.list.wd[grep("metadata", file.list.wd,invert=T)] -file.list.wd = file.list.wd[grep("readme", file.list.wd,invert=T)] -script.list.wd= file.list.wd[grep("\\.R", file.list.wd)] -writing.list.wd= file.list.wd[unique(c(grep("\\.doc|\\.docx|\\.pdf|\\.txt|\\.rtf", file.list.wd)))] -data.list.wd= file.list.wd[unique(c(grep(".csv", file.list.wd),grep(".xls", file.list.wd)))] - - -###list files already in proper place -data.list.raw= list.files(path.data.raw) -writing.list= list.files(path.writing) -script.list= list.files(path.scripts) - -##remove metedata -data.list.raw= data.list.raw[grep("metadata",data.list.raw,invert=T)] -data.list.proc= list.files(path.data.proc) - -##if raw folder is empty, move data from main folder to raw - -##copy data to correct location -fc=file.copy(data.list.wd, path.data.raw, overwrite=F) -file.remove(data.list.wd[fc]) - -##copy writing to correct location - -fc=file.copy(writing.list.wd, path.writing, overwrite=F) -file.remove(writing.list.wd)[fc] - -##copy scripts to correct location - -fc=file.copy(script.list.wd, path.scripts, overwrite=F) -file.remove(script.list.wd)[fc] - -##list raw files for prcessing -data.list.raw= list.files(path.data.raw) -##remove metedata -data.list.raw= data.list.raw[grep("metadata",data.list.raw,invert=T)] - -##now read all excel workbooks and sheets within workbooks - -for(wb in data.list.raw){ - - -if(grepl(".xlsx",wb)){ - -wb.name=gsub(".xlsx","", wb) -file=paste(path.data.raw, wb,sep="/") -sheets=getSheetNames(file) - -} - -if(grepl(".csv",wb)){ - -wb.name=gsub(".csv","", wb) -file=paste(path.data.raw, wb,sep="/") -sheets="data" - -} - -##create workbook to store data and metadata -meta.wb.name=paste(wb.name,"_metadata",sep="") -meta.wb <- createWorkbook(meta.wb.name) - - -##store variable names -var.names=c() -unit.vec=c() -sheet.names=c() - -for(st in sheets){ - -if(grepl(".xlsx",wb)){ -t1<-read.xlsx(file,detectDates=T,sheet=st, colNames=F) -} - -if(grepl(".csv",wb)){ -t1<-read.csv(file, header =F,as.is =T) -} - -##skip empty worksheets -if(is.null(t1)) next - -##count NA to detect header in excel sheet -#na count function that sets "" to NA (for csv) -na.count.fun=function(x){ -x[which(x=="")]=NA -out=sum(!is.na(x)) -return(out) - } - -na.count=apply(t1,1, na.count.fun) -ncol=max(na.count) -head.row=which(na.count==ncol)[1] - -##make dataframe from sheet with colum names -colnames=as.character(t1[head.row,]) - -###remove NA from names -miss.name=which(is.na(colnames)|colnames=="NA") -colnames[miss.name]=paste("X",1:length(miss.name),sep="") - -###fish units from names using $ ##when units flag that specification of methods and defenitions of measurements (e.g. reported vs measured, dr vs fresh weight.) -colnames=lapply(colnames,function(x) unlist(strsplit(x,split="\\$"))) - -contains.unit=unlist(lapply(colnames,function(x) length(x)>1 )) - -units=rep("",length(colnames)) - -units[contains.unit]=unlist(lapply(colnames[contains.unit],function(x) x[2])) - -colnames=unlist(lapply(colnames,function(x) x[1])) - -###now re-read files starting with first data row (to allow proper data type) -if(grepl(".xlsx",wb)){ -new.frame <-read.xlsx(file,detectDates=T,sheet=st, colNames=F, startRow=(head.row+1)) -} - -if(grepl(".csv",wb)){ -new.frame<-read.csv(file, header =F,as.is =T, skip=(head.row)) -} - -colnames(new.frame)= colnames - - -var.names=c(var.names, colnames) -unit.vec=c(unit.vec, units) -sheet.names=c(sheet.names,rep(st,length(colnames))) - - -addWorksheet(meta.wb, st) -writeData(meta.wb, st, new.frame) - - -} - -##make data frame with workbook name, sheet names and variables and add column for definitions - -var.frame=data.frame("workbook"= wb.name,"sheet"= sheet.names,"variable"= var.names,"unit"=unit.vec,"definition"=NA,"unique identifier"=0,"personal information"=0, stringsAsFactors = F) - -#set up metadata -#mid.names=c("ID","Country","Name region","Name site","Minimum latitude","Maximum latitude", "Minimum longitude","Maximum longitude","Experiment/survey","Type of experiment","Type of survey","On-farm/on-station","Crops","Animals","Soil type") -#meta.data.frame=read.xlsx(paste(path.data.op,"Required_Metadata.xlsx",sep="/")) - -field.vec=c("Data ID", "Official title of the dataset", "Project name", "Description of project", "Author", "Author ID(ORCID)", "Contributor(s)", "Subject matter of research/Vocabulary", "Data origin", "Funder(s) or sponsor(s) of project", "creation date (m/d/yyyy)", "Embargo end date", "Citation", "keywords (AGROVOC)", "Country(ies) covered", "Point geographic coordinates in Dec. Degrees ", "Agro-Ecological Zone(s)(FAO) covered", "Years covered by data", "Crops covered by data", "Animals covered by data", "Start date of data collection ", "End date of data collection ", "License (default=CC-BY)", "Permission given by email", "Rights", "Contact email") - -field_name.vec=c("data.id", "data.title", "project.name", "project.description", "author", "orcid", "contributors", "subject.research", "data.origin", "donor", "date.creation", "date.embargo", "citation", "keywords.agrovoc", "countries", "longitude.latitude", "aez", "years", "crops", "animals", "date.collect.start", "date.collect.end", "licence", "permission", "rights", "contact.mail") - -meta.data.frame=data.frame(field= field.vec,field_name= field_name.vec,values=NA, stringsAsFactors = F) - - -###check if workbook with variable definitions and metadata already present in processed and make sure not overwritten -exist.wb= data.list.proc[which(data.list.proc==paste(wb.name,"_metadata.xlsx",sep=""))] - -if(length(exist.wb)>0){ -##read.data and extract -exist.file=paste(path.data.proc,exist.wb,sep="/") -existing.varframe=read.xlsx(exist.file,detectDates=T,sheet= "variable definitions") -existing.meta.data.frame=read.xlsx(exist.file,detectDates=T,sheet= "meta data") - -existing.meta.data.frame.id=apply(existing.meta.data.frame[,1:2],1,paste,collapse=";") -meta.data.frame.id=apply(meta.data.frame[,1:2],1,paste,collapse=";") - - -existing.varframe.id=apply(existing.varframe[,1:3],1,paste,collapse=";") -var.frame.id=apply(var.frame[,1:3],1,paste,collapse=";") - - -##overwrite new frames with existing values -meta.data.frame[na.omit(match(existing.meta.data.frame.id,meta.data.frame.id)),]<-existing.meta.data.frame[na.omit(match(meta.data.frame.id,existing.meta.data.frame.id)),] -var.frame[na.omit(match(existing.varframe.id,var.frame.id)),]<-existing.varframe[na.omit(match( var.frame.id,existing.varframe.id)),] - -##rbind additional rows -meta.data.frame=rbind(meta.data.frame, existing.meta.data.frame[which(!existing.meta.data.frame.id%in% meta.data.frame.id),]) -var.frame =rbind(var.frame, existing.varframe[which(!existing.varframe.id%in%var.frame.id),]) - - -} - - -##now add metadata to metadata workbook -addWorksheet(meta.wb, "meta data") -addWorksheet(meta.wb, "variable definitions") - -writeData(meta.wb, "meta data", meta.data.frame) -writeData(meta.wb, "variable definitions", var.frame) - -##now save workbook -saveWorkbook(meta.wb,file = paste(path.data.proc,meta.wb.name,".xlsx",sep=""), overwrite = T) ##later do not overrwrite but make sure merged/aggregated - - - -} - - - -###now run analysis scripts -source("./scripts/0_build_project.R") - diff --git a/scripts/RawDataConvertScript.R b/scripts/RawDataConvertScript.R deleted file mode 100644 index 657e06b..0000000 --- a/scripts/RawDataConvertScript.R +++ /dev/null @@ -1,222 +0,0 @@ - -##install all required packages - -# Install required packages if necessary -required_packages <- c("openxlsx","car", "emmeans","readr") - -for (package in required_packages) { - if(package %in% rownames(installed.packages()) == FALSE){ - install.packages(package, dependencies = TRUE) - } -} - -##required packages -library(openxlsx) - -###add ontology read -##add final metadata, implement automated inference -##make sure metdata and variable definitions are not overwritten if present!!!!! - -### -###set working directory -wd=getwd() -wd=gsub("scripts","", wd) -setwd(wd) - - -##download files if not present -file.list.wd.initial= list.files(wd) - - -####make directories for data -path.data.raw=paste(getwd(),"/data/raw/",sep="") -path.data.proc=paste(getwd(),"/data/processed/",sep="") -path.data.op=paste(getwd(),"/data/definitions_protocols/",sep="") - -path.writing=paste(getwd(),"/writing/",sep="") -path.results.raw=paste(getwd(),"/results/raw",sep="") -path.results.tab=paste(getwd(),"/results/tables",sep="") -path.results.im=paste(getwd(),"/results/figures",sep="") -path.results.rep=paste(getwd(),"/results/reports",sep="") - -path.scripts=paste(getwd(),"/scripts/",sep="") - - -####check presence of data files and list -file.list.wd= list.files(wd) -file.list.wd = file.list.wd[grep("metadata", file.list.wd,invert=T)] -file.list.wd = file.list.wd[grep("readme", file.list.wd,invert=T)] -script.list.wd= file.list.wd[grep("\\.R", file.list.wd)] -writing.list.wd= file.list.wd[unique(c(grep(".doc", file.list.wd),grep(".docx", file.list.wd),grep(".txt", file.list.wd),grep(".rtf", file.list.wd)))] -data.list.wd= file.list.wd[unique(c(grep(".csv", file.list.wd),grep(".xls", file.list.wd)))] - - -###list files already in proper place -data.list.raw= list.files(path.data.raw) -writing.list= list.files(path.writing) -script.list= list.files(path.scripts) - -##remove metedata -data.list.raw= data.list.raw[grep("metadata",data.list.raw,invert=T)] -data.list.proc= list.files(path.data.proc) -###add meta_data - - -##if raw folder is empty, move data from main folder to raw - -##copy data to correct location -fc=file.copy(data.list.wd, path.data.raw, overwrite=F) -file.remove(data.list.wd[fc]) - - -##list raw files for prcessing -data.list.raw= list.files(path.data.raw) -##remove metedata -data.list.raw= data.list.raw[grep("metadata",data.list.raw,invert=T)] - -##now read all excel workbooks and sheets within workbooks - -for(wb in data.list.raw){ - - -if(grepl(".xlsx",wb)){ - -wb.name=gsub(".xlsx","", wb) -file=paste(path.data.raw, wb,sep="/") -sheets=getSheetNames(file) - -} - -if(grepl(".csv",wb)){ - -wb.name=gsub(".csv","", wb) -file=paste(path.data.raw, wb,sep="/") -sheets="data" - -} - -##create workbook to store data and metadata -meta.wb.name=paste(wb.name,"_metadata",sep="") -meta.wb <- createWorkbook(meta.wb.name) - - -##store variable names -var.names=c() -unit.vec=c() -sheet.names=c() - -for(st in sheets){ - -if(grepl(".xlsx",wb)){ -t1<-read.xlsx(file,detectDates=T,sheet=st, colNames=F) -} - -if(grepl(".csv",wb)){ -t1<-read.csv(file, header =F,as.is =T) -} - -##skip empty worksheets -if(is.null(t1)) next - -##count NA to detect header in excel sheet -#na count function that sets "" to NA (for csv) -na.count.fun=function(x){ -x[which(x=="")]=NA -out=sum(!is.na(x)) -return(out) - } - -na.count=apply(t1,1, na.count.fun) -ncol=max(na.count) -head.row=which(na.count==ncol)[1] - -##make dataframe from sheet with colum names -colnames=as.character(t1[head.row,]) - -###remove NA from names -miss.name=which(is.na(colnames)|colnames=="NA") -colnames[miss.name]=paste("X",1:length(miss.name),sep="") - -###fish units from names using $ ##when units flag that specification of methods and defenitions of measurements (e.g. reported vs measured, dr vs fresh weight.) -colnames=lapply(colnames,function(x) unlist(strsplit(x,split="\\$"))) - -contains.unit=unlist(lapply(colnames,function(x) length(x)>1 )) - -units=rep("",length(colnames)) - -units[contains.unit]=unlist(lapply(colnames[contains.unit],function(x) x[2])) - -colnames=unlist(lapply(colnames,function(x) x[1])) - -new.frame= t1[(head.row+1):nrow(t1),1:length(colnames)] -colnames(new.frame)= colnames - - -var.names=c(var.names, colnames) -unit.vec=c(unit.vec, units) -sheet.names=c(sheet.names,rep(st,length(colnames))) - - -addWorksheet(meta.wb, st) -writeData(meta.wb, st, new.frame) - - -} - -##make data frame with workbook name, sheet names and variables and add column for definitions - -var.frame=data.frame("workbook"= wb.name,"sheet"= sheet.names,"variable"= var.names,"unit"=unit.vec,"definition"=NA,"unique identifier"=0,"personal information"=0, stringsAsFactors = F) - -#set up metadata -#mid.names=c("ID","Country","Name region","Name site","Minimum latitude","Maximum latitude", "Minimum longitude","Maximum longitude","Experiment/survey","Type of experiment","Type of survey","On-farm/on-station","Crops","Animals","Soil type") -#meta.data.frame=read.xlsx(paste(path.data.op,"Required_Metadata.xlsx",sep="/")) - -field.vec=c("Data ID", "Official title of the dataset", "Project name", "Description of project", "Author", "Author ID(ORCID)", "Contributor(s)", "Subject matter of research/Vocabulary", "Data origin", "Funder(s) or sponsor(s) of project", "creation date (m/d/yyyy)", "Embargo end date", "Citation", "keywords (AGROVOC)", "Country(ies) covered", "Point longitude coord. in Dec. Degrees ", "Agro-Ecological Zone(s)(FAO) covered", "Years covered by data", "Crops covered by data", "Animals covered by data", "Start date of data collection ", "End date of data collection ", "License (default=CC-BY)", "Permission given by email", "Rights", "Contact email") - -field_name.vec=c("data.id", "data.title", "project.name", "project.description", "author", "orcid", "contributors", "subject.research", "data.origin", "donor", "date.creation", "date.embargo", "citation", "keywords.agrovoc", "countries", "longitude", "aez", "years", "crops", "animals", "date.collect.start", "date.collect.end", "licence", "permission", "rights", "contact.mail") - -meta.data.frame=data.frame(field= field.vec,field_name= field_name.vec,values=NA, stringsAsFactors = F) - - -###check if workbook with variable definitions and metadata already present in processed and make sure not overwritten -exist.wb= data.list.proc[which(data.list.proc==paste(wb.name,"_metadata.xlsx",sep=""))] - -if(length(exist.wb)>0){ -##read.data and extract -exist.file=paste(path.data.proc,exist.wb,sep="/") -existing.varframe=read.xlsx(exist.file,detectDates=T,sheet= "variable definitions") -existing.meta.data.frame=read.xlsx(exist.file,detectDates=T,sheet= "meta data") - -existing.meta.data.frame.id=apply(existing.meta.data.frame[,1:2],1,paste,collapse=";") -meta.data.frame.id=apply(meta.data.frame[,1:2],1,paste,collapse=";") - - -existing.varframe.id=apply(existing.varframe[,1:3],1,paste,collapse=";") -var.frame.id=apply(var.frame[,1:3],1,paste,collapse=";") - - -##overwrite new frames with existing values -meta.data.frame[na.omit(match(existing.meta.data.frame.id,meta.data.frame.id)),]<-existing.meta.data.frame[na.omit(match(meta.data.frame.id,existing.meta.data.frame.id)),] -var.frame[na.omit(match(existing.varframe.id,var.frame.id)),]<-existing.varframe[na.omit(match( var.frame.id,existing.varframe.id)),] - -##rbind additional rows -meta.data.frame=rbind(meta.data.frame, existing.meta.data.frame[which(!existing.meta.data.frame.id%in% meta.data.frame.id),]) -var.frame =rbind(var.frame, existing.varframe[which(!existing.varframe.id%in%var.frame.id),]) - - -} - - -##now add metadata to metadata workbook -addWorksheet(meta.wb, "meta data") -addWorksheet(meta.wb, "variable definitions") - -writeData(meta.wb, "meta data", meta.data.frame) -writeData(meta.wb, "variable definitions", var.frame) - -##now save workbook -saveWorkbook(meta.wb,file = paste(path.data.proc,meta.wb.name,".xlsx",sep=""), overwrite = T) ##later do not overrwrite but make sure merged/aggregated - - - -} \ No newline at end of file diff --git a/scripts/extract_metadata_prev.R b/scripts/extract_metadata_prev.R new file mode 100644 index 0000000..67c511b --- /dev/null +++ b/scripts/extract_metadata_prev.R @@ -0,0 +1,183 @@ + +library(openxlsx) + +path.data.raw <- "./data/raw" +data.list.raw <- list.files(path.data.raw) +path.data.proc <- "./data/processed/" +data.list.proc <- list.files(path.data.proc) + +# wb = data.list.raw[2] + +for(wb in data.list.raw){ + + + if(grepl(".xlsx",wb)){ + + wb.name=gsub(".xlsx","", wb) + file=paste(path.data.raw, wb,sep="/") + sheets=getSheetNames(file) + + } + + if(grepl(".csv",wb)){ + + wb.name=gsub(".csv","", wb) + file=paste(path.data.raw, wb,sep="/") + sheets="data" + + } + + ##create workbook to store data and metadata + meta.wb.name=paste(wb.name,"_metadata",sep="") + meta.wb <- createWorkbook(meta.wb.name) + + + ##store variable names + var.names=c() + unit.vec=c() + sheet.names=c() + + # for(st in sheets){ + + st = sheets[1] + + if(grepl(".xlsx",wb)){ + t1<-read.xlsx(file,detectDates=T,sheet=st, colNames=F) + } + + if(grepl(".csv",wb)){ + t1<-read.csv(file, header =F,as.is =T) + } + + ##skip empty worksheets + if(is.null(t1)) next + + ##count NA to detect header in excel sheet + #na count function that sets "" to NA (for csv) + na.count.fun=function(x){ + x[which(x=="")]=NA + out=sum(!is.na(x)) + return(out) + } + + na.count=apply(t1,1, na.count.fun) + ncol=max(na.count) + head.row=which(na.count==ncol)[1] + + ##make dataframe from sheet with colum names + colnames=as.character(t1[head.row,]) + + ###remove NA from names + miss.name=which(is.na(colnames)|colnames=="NA") + colnames[miss.name]=paste("X",1:length(miss.name),sep="") + + ###fish units from names using $ ##when units flag that specification of methods and defenitions of measurements (e.g. reported vs measured, dr vs fresh weight.) + colnames=lapply(colnames,function(x) unlist(strsplit(x,split="\\$"))) + + contains.unit=unlist(lapply(colnames,function(x) length(x)>1 )) + + units=rep("",length(colnames)) + + units[contains.unit]=unlist(lapply(colnames[contains.unit],function(x) x[2])) + + colnames=unlist(lapply(colnames,function(x) x[1])) + + new.frame= t1[(head.row+1):nrow(t1),1:length(colnames)] + colnames(new.frame)= colnames + + + var.names=c(var.names, colnames) + unit.vec=c(unit.vec, units) + sheet.names=c(sheet.names,rep(st,length(colnames))) + + + addWorksheet(meta.wb, st) + writeData(meta.wb, st, new.frame) + + + # } + + ##make data frame with workbook name, sheet names and variables and add column for definitions + + var.frame=data.frame("workbook"= wb.name, + "sheet"= sheet.names, + "variable"= var.names, + "unit"=unit.vec, + "definition"=NA, + "unique identifier"=0, + "personal information"=0, + stringsAsFactors = F) + + #set up metadata + #mid.names=c("ID","Country","Name region","Name site","Minimum latitude","Maximum latitude", "Minimum longitude","Maximum longitude","Experiment/survey","Type of experiment","Type of survey","On-farm/on-station","Crops","Animals","Soil type") + #meta.data.frame=read.xlsx(paste(path.data.op,"Required_Metadata.xlsx",sep="/")) + + field.vec=c("Data ID", "Official title of the dataset", "Project name", + "Description of project", "Author", "Author ID(ORCID)", + "Contributor(s)", "Subject matter of research/Vocabulary", "Data origin", "Funder(s) or sponsor(s) of project", + "creation date (m/d/yyyy)", "Embargo end date", "Citation", + "keywords (AGROVOC)", "Country(ies) covered", "Point longitude coord. in Dec. Degrees ", + "Agro-Ecological Zone(s)(FAO) covered", "Years covered by data", "Crops covered by data", + "Animals covered by data", "Start date of data collection ", "End date of data collection ", + "License (default=CC-BY)", "Permission given by email", "Rights", "Contact email") + + field_name.vec=c("data.id", "data.title", "project.name", "project.description", + "author", "orcid", "contributors", "subject.research", + "data.origin", "donor", "date.creation", "date.embargo", + "citation", "keywords.agrovoc", "countries", "longitude", + "aez", "years", "crops", "animals", + "date.collect.start", "date.collect.end", "licence", "permission", + "rights", "contact.mail") + + meta.data.frame=data.frame(field = field.vec, + field_name = field_name.vec, + values = NA, + stringsAsFactors = F) + + + ###check if workbook with variable definitions and metadata already present in processed and make sure not overwritten + exist.wb= data.list.proc[which(data.list.proc==paste(wb.name,"_metadata.xlsx",sep=""))] + + if(length(exist.wb)>0){ + ##read.data and extract + exist.file=paste(path.data.proc,exist.wb,sep="/") + existing.varframe=read.xlsx(exist.file,detectDates=T,sheet= "variable definitions") + existing.meta.data.frame=read.xlsx(exist.file,detectDates=T,sheet= "meta data") + + existing.meta.data.frame.id=apply(existing.meta.data.frame[,1:2],1,paste,collapse=";") + meta.data.frame.id=apply(meta.data.frame[,1:2],1,paste,collapse=";") + + + existing.varframe.id=apply(existing.varframe[,1:3],1,paste,collapse=";") + var.frame.id=apply(var.frame[,1:3],1,paste,collapse=";") + + + ##overwrite new frames with existing values + meta.data.frame[na.omit(match(existing.meta.data.frame.id,meta.data.frame.id)),]<-existing.meta.data.frame[na.omit(match(meta.data.frame.id,existing.meta.data.frame.id)),] + var.frame[na.omit(match(existing.varframe.id,var.frame.id)),]<-existing.varframe[na.omit(match( var.frame.id,existing.varframe.id)),] + + ##rbind additional rows + meta.data.frame=rbind(meta.data.frame, existing.meta.data.frame[which(!existing.meta.data.frame.id%in% meta.data.frame.id),]) + var.frame =rbind(var.frame, existing.varframe[which(!existing.varframe.id%in%var.frame.id),]) + + + } + + + ##now add metadata to metadata workbook + addWorksheet(meta.wb, "meta data") + addWorksheet(meta.wb, "variable definitions") + + writeData(meta.wb, "meta data", meta.data.frame) + writeData(meta.wb, "variable definitions", var.frame) + + ##now save workbook + saveWorkbook(meta.wb,file = paste(path.data.proc,meta.wb.name,".xlsx",sep=""), overwrite = T) ##later do not overrwrite but make sure merged/aggregated + + + +} + + + + diff --git a/scripts/MiRAE_funcs.R b/scripts/funcs.R similarity index 100% rename from scripts/MiRAE_funcs.R rename to scripts/funcs.R -- GitLab