Skip to content
Snippets Groups Projects
Commit da293f53 authored by hjmegens's avatar hjmegens
Browse files

pop genomics

parent 4f713d0c
No related branches found
No related tags found
No related merge requests found
File added
### STEP 1. SNP calling using ANGSD (output in plink format) and basic filtering:
angsd -bam list_bam_leon.txt -minMapQ 30 -minQ 20 -minInd 13 -baq 1 -doCounts 1 -setMinDepth 30 -setMaxDepth 450 -GL 1 -out data_raw -ref UMD_5_genome_nospaces_60.fa -doMajorMinor 4 -doMaf 2 -SNP_pval 1e-6 -doGeno 4 -doPost 1 -doPlink 2 -P 10
# Edit tfam file (population should be in the first column):
cat data_raw.tfam | awk 'print {$2,$1,$3,$4,$5,$6}' > data_raw.tfam2
mv data_raw.tfam2 data_raw.tfam
### STEP 2. Population stratification analysis: PCA, genomic distance matrix and cluster as implemented in Plink
# Filter data using plink:
plink --tfile data_raw --geno 0.3 --maf 0.05 --hwe 1e-5 --make-bed --out data_clean
# PCA:
plink --bfile data_clean --pca 3 header --out data.struc1
#### Plot PCA using R (library ggplot2): ##########
PCA <- read.table("data.struc1.eigenvec", header=T, quote="\"")
head (PCA)
library (ggplot2)
c <- ggplot(PCA, aes(y=PC1, x=PC2, colour=factor(FID)))
c + geom_point(size = 3)
####################################################
# Other structure analysis:
plink --bfile data_clean --cluster -K 3 --out data.struc2
plink --bfile data_clean --distance triangle 1-ibs --out data.struc3
### STEP 3. LD:
plink --bfile data_clean --r2 --ld-snp Chr1_10002364 --ld-window-kb 1000 --ld-window 500 --ld-window-r2 0.1 --out data.LD
# Plot pattern of LD as the genomic distance increases in R (library ggplot2):
LD <- read.table("data.LD.ld", header=T, quote="\"")
head (LD)
LD$dist <- LD$BP_B - LD$BP_A
p <- ggplot(LD, aes(x=dist, y=R2))
p + stat_smooth(span = 1, se=TRUE)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment