Skip to content
Snippets Groups Projects
Commit 9944bcc2 authored by Voorrips, Roeland's avatar Voorrips, Roeland
Browse files

inferHaplotypes uses new criterion to select haplotype combinations (least nr...

inferHaplotypes uses new criterion to select haplotype combinations (least nr of new haplotypes, not only highest dosage of known haplotypes)
parent ae4e5739
No related branches found
No related tags found
No related merge requests found
......@@ -653,18 +653,36 @@ single_cycle_infer <- function(allhap, mrkdidsTable, ploidy, knownHap, progress)
# combinations
hclist <- list()
for (i in seq_along(mrkdidsTable)) {
# for the current mrkdid, gets dosages of all markers
# for the current mrkdid, get the matrix with all possible
# haplotype combinations:
ahc <- allHaploComb(did=names(mrkdidsTable)[i], #mrkDosage may be missing!
allhap=allhap, ploidy=ploidy, progress=progress)
# ... and get all possible haplotype combis that produce this mrkdid
# (could also be passed as parameter to avoid recalculating)
if (length(knownHap) > 0) {
#if there are haplotypes known to occur, select the haplotype combination
#that has the highest total dosage of these known haplotypes
# *****************************************************************
# versions before 20171006:
# select the haplotype combination(s) with the highest total dosage
# of known haplotypes:
sel_ahc <- ahc[knownHap,, drop=FALSE]
sel_dos <- colSums(sel_ahc) #for each haplotype comb the total dosage of selected haplotypes
sel_col <- sel_dos == max(sel_dos) #the combination(s) with the maximum dosage
ahc <- ahc[, sel_col, drop=FALSE] #keep only that/those combination(s)
# *****************************************************************
# version 20171006:
# select the haplotype combination(s) with the smallest number of
# new haplotypes, and among those select the combis
# with the highest total dosage of known haplotypes:
ahc_pres <- ahc > 0
nwhapcount <- colSums(ahc_pres[-knownHap,, drop=FALSE])
ahc <- ahc[, nwhapcount==min(nwhapcount), drop=FALSE] #least nr of new haplotypes
knownhapdos <- colSums(ahc[knownHap,, drop=FALSE])
ahc <- ahc[, knownhapdos==max(knownhapdos), drop=FALSE] #max dosage of known haplotypes
# (a different approach could be: for each possible set of haplotypes in
# addition to the known ones, from small to large, see how many
# individuals have one or more solutions, and stop adding haplotypes at
# some point (when extra haplotypes would probably only be used to
# fit errors in the SNP dosages);
# but then, no selection is made between solutions for a specific did)
# *****************************************************************
}
hclist[[length(hclist) + 1]] <- ahc
mima <- haplofrqMinMax(haplocomb=ahc)
......@@ -790,9 +808,9 @@ inferHaps_noF1s <- function(mrkDosage, indiv=NULL, ploidy,
knownNew <- sort(union(origHap, which(res$nPresent >= minfrac[1] * nind)))
haploLost <- setdiff(knownHap, knownNew)
if (length(haploLost) > 0) {
#The initial expectation was that within these cycles haplotypes can only
#be gained, not lost (except the origHap which need not be supported by
#the current data, but these have been added back to knownNew).
#The initial expectation was that within these cycles known haplotypes can
#only be gained, not lost (except the origHap which need not be supported
#by the current data, but these have been added back to knownNew).
#Therefore the following warning was generated if haplotypes were lost:
#warning(paste0("inferHaps_noF1s: lost in cycle ", cycle, ": ",
# paste(haploLost, collapse=" ")))
......@@ -1631,7 +1649,7 @@ hapOneBlock <- function(mrkDosage, hbname, ploidy, parents, F1,
#'using F1 population(s) (with parents) if present, and infer haplotypes for
#'non-F1 material as well
#'@usage inferHaplotypes(mrkDosage, indiv=NULL, ploidy, haploblock,
#'parents, F1, minfrac=c(0.1, 0.01), errfrac=0.03, DRrate=0.04, maxmrk=5,
#'parents=NULL, F1=NULL, minfrac=c(0.1, 0.01), errfrac=0.03, DRrate=0.04, maxmrk=5,
#'dropUnused=TRUE, maxparcombs=150000, minPseg=1e-8, knownHap=integer(0),
#'progress=TRUE)
#'@param mrkDosage matrix or data.frame. markers are in rows, individuals in
......@@ -1652,7 +1670,7 @@ hapOneBlock <- function(mrkDosage, hbname, ploidy, parents, F1,
#'haploblock name; in that case a haploblock list with one item containing
#'all marker names is generated implicitly.
#'@param parents a matrix with 2 columns and one row for each F1 population,
#'with the names of the mother and father of each population
#'with the names of the mother and father of each population.
#'@param F1 a list of character vectors. Each character vector has the names of
#'the individuals of one F1 population
#'@param minfrac vector of two fractions, default 0.1 and 0.01. A haplotype is
......@@ -1699,7 +1717,7 @@ hapOneBlock <- function(mrkDosage, hbname, ploidy, parents, F1,
#'inferHaps_noF1s (see there).
#'@return a list with for each haploblock one item that itself is a list
#'with items:\cr
#'message; if this is "" the the haploblock is processed and further
#'message; if this is "" the haploblock is processed and further
#' elements are present; else this message says why the haploblock was
#' skipped (currently only if it contains too many markers)\cr
#'hapdos: a matrix with the dosages of each haplotype (in rows) for each
......@@ -1730,7 +1748,7 @@ hapOneBlock <- function(mrkDosage, hbname, ploidy, parents, F1,
#' because of a lack of fit\cr
#'@export
inferHaplotypes <- function(mrkDosage, indiv=NULL, ploidy, haploblock,
parents, F1,
parents=NULL, F1=NULL,
minfrac=c(0.1, 0.01), errfrac=0.03, DRrate=0.04,
maxmrk=5,
dropUnused=TRUE, maxparcombs=150000,
......@@ -2662,6 +2680,11 @@ overviewByF1 <- function(haploblock, parents, F1, hapresults) {
#' parents, assuming polysomic inheritance, allowing Double Reduction\cr
#'- withDR.FALSE: individuals whose haplotype combination doesn't match that of
#' its parents, assuming polysomic inheritance, allowing Double Reduction\cr
#'The total number of individuals is \cr
#'match.NA + noDR.TRUE + noDR.FALSE == match.NA + withDR.TRUE + withDR.FALSE;\cr
#'the number of individuals with missing marker data or without assigned
#'haplotype dosages can be calculated by subtracting mrk or hap from that
#'total.\cr
#'F1stats (only if ovwF1 specified): a data.frame with for each F1 population,
#'"rest" and "all" one row, with columns\cr
#'- pop: the F1 population numbers, "rest" (all individuals not belonging to the
......
......@@ -268,7 +268,7 @@ calcMrkHaptable(ovwF1=ovwF1)
In this small example all haploblocks have 4 markers, so the table contains
only one row.
#Remarks
# Remarks
PolyHaplotypes does not behave exactly as a package should, because it creates
two or three variables in the global environment (.GlobalEnv): ahcinfo, ahclist
and possibly ahccompletelist. These variables are used to manage calculated
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment