Skip to content
Snippets Groups Projects
Commit c76cf796 authored by hjmegens's avatar hjmegens
Browse files

pop genomics

parent da293f53
Branches near-real-time
No related tags found
No related merge requests found
## Path data:
/home/formacion/COMUNES/IAMZ/data/CIHEAM/sessions/Session5.7_PCA_LD/
### STEP 1. SNP calling using ANGSD (output in plink format) and basic filtering:
angsd -bam list_bam_leon.txt -minMapQ 30 -minQ 20 -minInd 13 -baq 1 -doCounts 1 -setMinDepth 30 -setMaxDepth 450 -GL 1 -out data_raw -ref UMD_5_genome_nospaces_60.fa -doMajorMinor 4 -doMaf 2 -SNP_pval 1e-6 -doGeno 4 -doPost 1 -doPlink 2 -P 10
angsd -bam /home/formacion/COMUNES/IAMZ/data/CIHEAM/sessions/Session5.7_PCA_LD/list_bam_leon.txt -minMapQ 30 -minQ 20 -minInd 13 -baq 1 -doCounts 1 -setMinDepth 30 -setMaxDepth 450 -GL 1 -out data_raw -ref /home/formacion/COMUNES/IAMZ/data/CIHEAM/sessions/Session5.7_PCA_LD/reference/UMD_5_genome_nospaces_60.fa -doMajorMinor 4 -doMaf 2 -SNP_pval 1e-6 -doGeno 4 -doPost 1 -doPlink 2 -P 10
# Edit tfam file (population should be in the first column):
cat data_raw.tfam | awk 'print {$2,$1,$3,$4,$5,$6}' > data_raw.tfam2
mv data_raw.tfam2 data_raw.tfam
# cat data_raw.tfam | awk '{print $2,$1,$3,$4,$5,$6}' > data_raw.tfam2
# mv data_raw.tfam2 data_raw.tfam
# vim data_raw.tfam (pop1: id1-5; pop2: ids6-10; pop3: id11-15
# copy (cp) files tfam and tped into your working directory.
mkdir PCA_LD_session
cp /home/formacion/COMUNES/IAMZ/data/CIHEAM/sessions/Session5.7_PCA_LD/data_raw.t* /your directory/
### STEP 2. Population stratification analysis: PCA, genomic distance matrix and cluster as implemented in Plink
# Filter data using plink:
plink --tfile data_raw --geno 0.3 --maf 0.05 --hwe 1e-5 --make-bed --out data_clean
plink --tfile data_raw --geno 0.1 --maf 0.05 --hwe 1e-5 --make-bed --out data_clean
# PCA:
plink --bfile data_clean --pca 3 header --out data.struc1
......@@ -20,8 +28,9 @@ plink --bfile data_clean --pca 3 header --out data.struc1
PCA <- read.table("data.struc1.eigenvec", header=T, quote="\"")
head (PCA)
library (ggplot2)
c <- ggplot(PCA, aes(y=PC1, x=PC2, colour=factor(FID)))
c + geom_point(size = 3)
c <- ggplot(PCA, aes(y=PC1, x=PC2, colour=factor(FID))) + geom_point(size = 3)
ggsave(c, file="PCA_PC1-PC2.pdf")
####################################################
# Other structure analysis:
......@@ -29,8 +38,6 @@ plink --bfile data_clean --cluster -K 3 --out data.struc2
plink --bfile data_clean --distance triangle 1-ibs --out data.struc3
### STEP 3. LD:
plink --bfile data_clean --r2 --ld-snp Chr1_10002364 --ld-window-kb 1000 --ld-window 500 --ld-window-r2 0.1 --out data.LD
......@@ -38,9 +45,9 @@ plink --bfile data_clean --r2 --ld-snp Chr1_10002364 --ld-window-kb 1000 --ld-wi
LD <- read.table("data.LD.ld", header=T, quote="\"")
head (LD)
LD$dist <- LD$BP_B - LD$BP_A
p <- ggplot(LD, aes(x=dist, y=R2))
p + stat_smooth(span = 1, se=TRUE)
library (ggplot2)
p <- ggplot(LD, aes(x=dist, y=R2)) + stat_smooth(span = 1, se=TRUE)
ggsave(p, file="LD_plot.pdf")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment