diff --git a/R/PolyHaplotyper.R b/R/PolyHaplotyper.R index d01fe55c84d932eb12ffd7aa5c7f8022df3b3792..cf9eede1497665df3c70375abdfc3696565ce7c4 100644 --- a/R/PolyHaplotyper.R +++ b/R/PolyHaplotyper.R @@ -11,17 +11,19 @@ # ploidy level. # PolyHaplotyper should not be confused with PediHaplotyper, which is aimed -# at diploid individuals that are all part of a pedigree. In contrast, -# PolyHaplotyper can handle any ploidy level and unreletaed individuals. -# It can make use of F1 populations, but (so far) does not use segregation -# ratios. +# at diploid individuals that are all part of a pedigree, and where the +# (bi- or multi-allelic) marker alleles have been phased before. +# In contrast, PolyHaplotyper can handle any ploidy level and uses unphased +# bi-allelic marker genotypes (i.e. marker allele dosages). It can work with +# unrelated individuals and/or F1 populations, but (so far) does not use +# segregation ratios. #'@importFrom utils combn #the following line serves to stop devtools::check complaining about #some identifiers that do exist but it doesn't find because they are #loaded from RData files and/or created in the global evironment: -utils::globalVariables(names=c("ahcploidy", "ahclist"), +utils::globalVariables(names=c("ahcploidy", "ahclist", "ahcunsaved"), add=TRUE) # General approach (not using segregation ratios) @@ -87,16 +89,25 @@ loadAllHaploCombList <- function(ploidy) { # Checks variable ahcploidy in GlobalEnv. If that doesn't exist or # is not equal to ploidy, attempts to load ahclist into the GlobalEnv # from a file 'ahclist_nx.RData' in the current working directory (where n is - # the ploidy). If that is succesful, ploidy is stored in variable ahcploidy + # the ploidy). If that is successful, ploidy is stored in variable ahcploidy # in the GlobalEnv, else neither ahcploidy not ahclist is created or changed. # Return value is TRUE if the ahclist was already available or is loaded # succesfully, else FALSE. - listloaded <- exists("ahcploidy", envir=.GlobalEnv, inherits==FALSE) && - (ahcploidy == ploidy) && - exists("ahclist", envir=.GlobalEnv, inherits==FALSE) + listloaded <- FALSE + if (exists("ahcploidy", envir=.GlobalEnv, inherits==FALSE) && + exists("ahclist", envir=.GlobalEnv, inherits==FALSE)) { + listloaded <- ahcploidy == ploidy + if (!listloaded && exists("ahcunsaved", envir=.GlobalEnv, inherits==FALSE) + && ahcunsaved) { + #the currently loaded ahclist is for a different ploidy and unsaved; + #we save it before loading the ahclist for the current ploidy: + save(ahclist, file=paste0("ahclist_", ahcploidy,"x.RData")) + } + } if (!listloaded && file.exists(paste0("ahclist_", ploidy, "x.RData"))) { load(paste0("ahclist_", ploidy,"x.RData"), envir=.GlobalEnv) assign("ahcploidy", value=ploidy, envir=.GlobalEnv, inherits=FALSE) + assign("ahcunsaved", value=FALSE, envir=.GlobalEnv, inherits=FALSE) listloaded <- TRUE } listloaded @@ -104,7 +115,8 @@ loadAllHaploCombList <- function(ploidy) { #'@title get all haplotype combinations that result in the given SNP dosages #'@description get all haplotype combinations that result in the given SNP dosages -#'@usage allHaploComb(SNPdosages, did, allhap, ploidy, progress=FALSE) +#'@usage allHaploComb(SNPdosages=NULL, did=NULL, allhap, ploidy, +#'writeFile=TRUE, progress=FALSE) #'@param SNPdosages an integer vector with the dosages of each SNP in one #'individual, with the SNPs in the same order as in the columns of allhap. #'Either SNPdosages or did must be specified, if both are specified they @@ -116,6 +128,11 @@ loadAllHaploCombList <- function(ploidy) { #'same order as SNPdosages #'@param ploidy the ploidy of the individual; SNPdosages should all be in #'0:ploidy +#'@param writeFile The calculated haplotype combis are stored in list +#'ahclist in GlobalEnv. Default writeFile TRUE ensures that this list is written +#'to file each time a new SNPdid is calculated. With writeFile FALSE the list is +#'not written to file but a variable ahcunsaved with value FALSE is +#'created in GlobalEnv. #'@param progress whether to print a message when starting calculations #'for a did; default FALSE #'@details Each column of the return value represents one combination of @@ -124,7 +141,8 @@ loadAllHaploCombList <- function(ploidy) { #'@return an integer matrix with one row per haplotype (corresponding to allhap) #'and one column per combination of haplotypes, with the number of copies #'of each haplotype in each combination (the dosages of the haplotypes) -allHaploComb <- function(SNPdosages, did, allhap, ploidy, progress=FALSE) { +allHaploComb <- function(SNPdosages=NULL, did=NULL, allhap, ploidy, + writeFile=TRUE, progress=FALSE) { # TODO: implement parallel computing of combinations for new did combinations, # https://artax.karlin.mff.cuni.cz/r-help/library/Rdsm/html/Rdsm-package.html # seems useful (using shared memory) but may need to delay saving ahclist @@ -134,11 +152,11 @@ allHaploComb <- function(SNPdosages, did, allhap, ploidy, progress=FALSE) { #initial checks: nSNP <- ncol(allhap) - if (!missing(SNPdosages) && + if (!missing(SNPdosages) && !is.null(SNPdosages) && (length(SNPdosages) != nSNP || !all(SNPdosages %in% 0:ploidy))) stop("allHaploComb: invalid SNPdosages") - if (missing(did)) { - if (missing(SNPdosages)) { + if (missing(did) || is.null(did)) { + if (missing(SNPdosages) || is.null(SNPdosages)) { stop("allHaploComb: SNPdosages or did must be specified") } else did <- SNPdidfun(SNPdosages, ploidy=ploidy) } @@ -158,7 +176,7 @@ allHaploComb <- function(SNPdosages, did, allhap, ploidy, progress=FALSE) { #we create or copy the list to a local tmplist, modify that, #store it in .GlobalEnv and save it, and return the matrix with haplotype #combinations. - if (progress) cat(paste0("allHaploComb: calculation for did=", did, "\n")) + if (progress) cat(paste0("allHaploComb: calculation for SNPdid=", did, "\n")) if (listloaded) tmplist <- ahclist else tmplist <- list() if (length(tmplist) < nSNP) tmplist[[nSNP]] <- list() @@ -203,10 +221,128 @@ allHaploComb <- function(SNPdosages, did, allhap, ploidy, progress=FALSE) { tmplist[[nSNP]][[didix]] <- allmat names(tmplist[[nSNP]])[didix] <- did assign("ahclist", value=tmplist, envir=.GlobalEnv, inherits=FALSE) - save(ahclist, file=paste0("ahclist_", ploidy,"x.RData")) + if (writeFile) { + save(ahclist, file=paste0("ahclist_", ploidy,"x.RData")) + } else assign("ahcunsaved", value=TRUE, envir=.GlobalEnv, inherits=FALSE) allmat } #allHaploComb +getHapcombCount_1SNP <- function(SNPdosage, ploidy, nhap) { + #nhap = total nr of different haplotypes for nSNP SNPs = 2^nSNP + #returns the number of haplotype combinations for a given dosage d at one SNP + #currently not used + d <- SNPdosage + h <- nhap %/% 2 #as.integer(2 ^ (nSNP-1) + 0.001) + if (d == ploidy) C0 <- 1 else + C0 <- cumprod(seq(h, h+ploidy-d-1))[ploidy-d] / cumprod(seq_len(ploidy-d))[ploidy-d] + if (d == 0) C1 <- 1 else + C1 <- cumprod(seq(h, h+d-1))[d] / cumprod(seq_len(d))[d] + C0 * C1 +} #getHapcombCount_1SNP + +totHapcombCount <- function(ploidy, nhap) { + #nhap = total nr of different haplotypes for nSNP SNPs = 2^nSNP + #returns the total number of haplotype combinations + #currently not used + cumprod(seq(nhap, nhap+ploidy-1))[ploidy] / + cumprod(seq_len(ploidy))[ploidy] +} #totHapcombCount + +#'@title generate all haplotype combinations +#'@description generate a list which contains for each SNP dosage combination +#'at a given ploidy all matching haplotytpe combinations +#'@usage completeAllHaploComb(ploidy, nSNP, savesec=1800, printsec=60, +#'fname=NULL) +#'@param ploidy ploidy (may be even or odd) +#'@param nSNP number of SNPs in the haploblock +#'@param savesec number of seconds between successive saves of the intermediate +#'results +#'@param printsec number of seconds between printout of the currect set of +#'haplotypes (the last two are always equal at the time of printing) +#'@param fname filename (to which the extension .RData will be added) to store +#'the results. Intermediate saves will go to fname + the current set of +#'haplotypes; these intermediate files are temporary and will be deleted.\cr +#'If NULL (default), fname will be set to e.g. ahc_4x_nSNP6, where 4 is the +#'ploidy and nSNP = 6 +#'@return a list with for each SNPdid (each combination of SNP dosages) a +#'matrix with one row per haplotype and one column per haplotype combination, +#'containing the dosages of the haplotypes in each haplotype combination +#'@export +completeAllHaploComb <- function(ploidy, nSNP, savesec=1800, printsec=60, + fname=NULL) { + # this will generate a complete sub-list of an ahclist (see allHaploComb) + # for the given ploidy and number of SNPs + # and save it to a file called eg ahc_4x_nSNP7.RData + # every savesec seconds a temporary file will be saved with the total nr of + # SNPdids done so far appended to the filename + + if (is.null(fname)) fname <- paste0("ahc_", ploidy, "x_nSNP", nSNP) + + ndids <- as.integer((ploidy + 1) ^ nSNP + 0.001) + ahc <- list() + for (did in ndids:1) ahc[[did]] <- matrix(NA_integer_, nrow=ploidy, ncol=1) + lasthap <- rep(0, ndids) # last stored haplotype column for each did + allhap <- allHaplotypes(paste0("m", 1:nSNP)) + print(allhap) + lastfile <- "" + lastsave <- proc.time() + lastprint <- lastsave - printsec - 1 + #new approach: + #instead of taking all SNPdosages in turn and checking which hapcomb + #matches them, generate all hapcombs in turn and calculate their SNPdids + nhap <- nrow(allhap) + allset <- rep(nhap, ploidy) #current haplotype set: a full set has length ploidy + while(TRUE) { + SNPdos <- colSums(allhap[allset,]) + did <- SNPdidfun(SNPdos, ploidy=ploidy) + lasthap[did] <- lasthap[did] + 1 + if ((nc<-ncol(ahc[[did]])) < lasthap[did]) { + #we don't want to reallocate for each new column but increase size + #when needed by 50% + extmat <- matrix(NA_integer_, nrow=ploidy, ncol= nc %/% 2 + 2) + ahc[[did]] <- cbind(ahc[[did]], extmat) + } + ahc[[did]][,lasthap[did]] <- allset + # get the next allset: + len <- ploidy + allset[len] <- allset[len] - 1 + while (len > 0 && allset[len] <= 0) { + len <- len - 1 + if (len > 0) allset[len] <- allset[len] - 1 + } + if (len <= 0) break + if (len < ploidy) { + allset[(len+1):ploidy] <- allset[len] #nhap + #progress and save: only check when len < ploidy + if ((proc.time()-lastprint)[3] > printsec) { + cat(paste0("ploidy: ", ploidy, " nSNP: ", nSNP, + " allset: ", paste(allset, collapse=" "), "\n")) + lastprint <- proc.time() + if ((lastsave-lastprint)[3] > savesec) { + save(ahc, file=paste0(fname, "_", paste(allset, collapse="-"), + ".RData")) + if (file.exists(lastfile)) file.remove(lastfile) + lastfile <- fname + lastsave <- proc.time() + } + } + } # len < ploidy + } + # all elements of ahc are now matrices with one solution per column, + # in the form of sets like 1-1-2-4 (i.e. all occurring haplotypes listed). + # now we convert that to columns with the dosages of each haplotype: + # + for (did in seq_along(ahc)) { + #ncomb <- ncol(ahc[[did]]) + allmat <- matrix(NA_integer_, nrow=nhap, ncol=lasthap[did]) + for (col in seq_len(lasthap[did])) + allmat[,col] <- tabulate(ahc[[did]][,col], nbins=nhap) + ahc[[did]] <- allmat + } + save(ahc, file=paste0(fname, ".RData")) + if (file.exists(lastfile)) file.remove(lastfile) + invisible(ahc) +} #completeAllHaploComb #'@title calculate the SNP dosages resulting from haplotype combinations #'@description calculate the SNP dosages resulting from haplotype combinations @@ -711,15 +847,21 @@ haplotypeDosages <- function(haploblockname, ihl, #'names, SNP names are the row names or (if a data.frame) in a column named #'MarkerNames. All SNP dosages must be in 0:ploidy or NA. If a data.frame, #'additional columns may be present. -#'@param indiv NULL (default) or a character vector with names of individuals -#'to be selected. If NULL, all columns are selected; -#'if SNPdosages is a data.frame, that is probably not what is intended. +#'@param indiv NULL (default) or a character vector with names of all individuals +#'to be considered. If NULL, all columns are selected; +#'if SNPdosages is a data.frame, that is probably not what is intended.\cr +#'All indivs that are not in any of the P1, P2 or F1 vectors are considered +#'unrelated, i.e. we have no implementation for pedigrees (yet). #'@param ploidy all SNP dosages should be in 0:ploidy or NA #'@param haploblockname prefix to which the (zero-padded) haplotype numbers #'are added. Any separator (like '_') should be part of haploblockname -#'@param P1 character vector with sample name(s) for P1 (parent 1) -#'@param P2 character vector with sample name(s) for P2 (parent 2) -#'@param F1 character vector of sample names for F1 +#'@param P1 character vector with sample name(s) for P1 (parent 1), or a list +#'of such vectors in case there are multiple F1s. Note that there may be +#'multiple samples of a parent from which a consensus marker dosage combination +#'is calculated. +#'@param P2 as P1, for parent 2 +#'@param F1 character vector of sample names for F1, or a list +#'of such vectors in case there are multiple F1s #'@param minfrac vector of two fractions, default 0.1 and 0.01. A haplotype is #'considered to be certainly present if it occurs in at least a fraction #'minfrac[1] of all individuals; in the final stage for the "other" @@ -755,15 +897,46 @@ haplotypeDosages <- function(haploblockname, ihl, #'If dropUnused is TRUE the matrix has 0 rows if no unique haplotype #'combinations can be inferred for any individual #'@export -haplotypeDosagesF1 <- function(SNPdosages, indiv, ploidy, haploblockname, +haplotypeDosagesF1 <- function(SNPdosages, indiv=NULL, ploidy, haploblockname, P1, P2, F1, minfrac=c(0.1, 0.01), F1frac=0.05, dropUnused=TRUE, maxparcombs=150000) { dosmat <- checkSNPdosages(SNPdosages, indiv, ploidy) if (is.character(dosmat)) stop(dosmat) + if (!is.list(F1)) F1 <- list(F1) + #F1 is now a list with one vector of sample names for each F1 population + if (!all(do.call(c, F1) %in% colnames(dosmat))) + stop("Some F1 samples not present") + + P <- list(P1, P2) # a list of lists + for (p in 1:2) if (!is.list(P[[p]])) P[[p]] <- list(P[[p]]) + msg <- checkAllParentSets(P, npop=length(F1), indiv=colnames(dosmat)) + if (msg != "") stop(msg) + PF1 <- matrix(NA, nrow=length(F1), ncol=2, + dimnames=list(paste0("pop", seq_along(F1)), c("P1", "P2"))) + #PF1 will contain one sample name per population for each parent; + #dosmat will be changed so that all other parental samples are deleted and + #the sample in PF1 has the consensus SNP dosages per parent + for (p in 1:2) { + for (pop in seq_along(F1)) { + #it can be that this same parent has already been checked for an earlier + #population and then only one sample remains, with the consensus dosage: + P[[p]][[pop]] <- intersect(P[[p]][[pop]], colnames(dosmat)) + if (length(P[[p]][[pop]]) > 1) { + P[[p]][[pop]] <- sort(P[[p]][[pop]]) + Pcons <- getConsensusSNPdosages(dosmat=dosmat, indiv=P[[p]][[pop]]) + delsamp <- P[[p]][[pop]][-1] #only not the first sample + dosmat <- dosmat[, -(colnames(dosmat) %in% delsamp)] + dosmat[,colnames(dosmat) == P[[p]][[pop]][1]] <- Pcons + } + PF1[pop, p] <- P[[p]][[pop]][1] + } + } indiv <- colnames(dosmat) - P1 <- as.character(P1); P2<- as.character(P2); F1 <- as.character(F1) - if (!all(c(P1, P2) %in% indiv)) stop("Some parental samples missing") - if (!all(F1 %in% indiv)) stop("Some F1 samples missing") + # now PF1 has the names of the first samples of P1 and P2 for each pop, + # and dosmat has for each parent only one column with the consensus SNP dosages, + # with as colnames the sample names in PF1 + # Note that if the same parent is used multiple times all samples should + # be given each time (order may be different) ihl <- inferHaplotypes(SNPdosages=dosmat, indiv=indiv, ploidy=ploidy, minfrac=minfrac[1]) #without final inference @@ -812,9 +985,43 @@ haplotypeDosagesF1 <- function(SNPdosages, indiv, ploidy, haploblockname, list(P1combs=P1combs, P2combs=P2combs, parcombok=parcombok) } #calcParcombok within haplotypeDosagesF1 + for (pop in seq_along(F1)) { + #todo: analysis per F1; in which order: + #large to small F1, or small to large nr of parental combs, or ...? + #among F1's larger than (eg) 6 * ploidy: order by increasing nr + #of parental combinations based on initial inferHaplotypes; + #- when no solution found: calculate nr of parental combs based on all + # possibilities and reorder pops + #- when solution found: re-do inferHaplotypes with the known parental + # haplotypes present and re-order the remaining pops that have not been + # tried yet on the new nr of parental combs + #- after all larger pops have been done, do the small F1's in decreasing + # order of size (first based on inferHaplotypes, then if needed based + # on all possible combs) + #- after the last F1, a last inferHaplotypes with all parental combs given + # to score the "other" samples + } + + F1dat <- data.frame( + size = integer(length(F1)), #fixed: nr of F1 indiv with non-NA SNPdid + parcombcount = integer(length(F1)), #will change several times + nextcombs = rep(1, length(F1)), #next to try: + # 1= parcombs based on last inferHaplotypes, + # 2= all possible parcombs except tried earlier, + # 3= nothing (ready), + solved = rep(FALSE, length(F1)) + ) + + for (f in seq_along(F1)) { + dids <- ihl$SNPdids[names(ihl$SNPdids) %in% F1[[f]]] + F1dat$size[f] <- sum(is.na(dids)) + } + largeF1s <- which(F1$size > 6 * ploidy) + + F1SNPdidsTable <- table(ihl$SNPdids[names(ihl$SNPdids) %in% F1]) - P1combs <- getParentalCombs(parsamp=P1, ihl=ihl, useIH=TRUE) - P2combs <- getParentalCombs(parsamp=P2, ihl=ihl, useIH=TRUE) + P1combs <- getParentalCombs(parsamp=P[1], ihl=ihl, useIH=TRUE) + P2combs <- getParentalCombs(parsamp=P[2], ihl=ihl, useIH=TRUE) message <- "" if (ncol(P1combs) * ncol(P2combs) > maxparcombs) { message <- paste("more than", maxparcombs, "parental combinations; skipped,", @@ -946,15 +1153,17 @@ haplotypeDosagesF1 <- function(SNPdosages, indiv, ploidy, haploblockname, had[, colnames(had) %in% SNPdidind] <- SNPdidcomb } } - other <- setdiff(indiv, c(P1, P2, F1)) - parhaplo <- which(rowSums(parcomb) > 0) - iho <- inferHaplotypes(SNPdosages=dosmat, indiv=other, ploidy=ploidy, - minfrac=minfrac, sel_hap=parhaplo) - for (oSNPdid in names(iho$SNPdidsTable)) { - ocombs <- iho$hclist[[oSNPdid]] - if (ncol(ocombs) == 1) { - oSNPdidind <- names(iho$SNPdids)[iho$SNPdids == oSNPdid] - had[, colnames(had) %in% oSNPdidind] <- ocombs + other <- setdiff(indiv, c(do.call(c, F1), PF1)) #all except F1s and F1 parents + if (length(other) > 0) { + parhaplo <- which(rowSums(parcomb) > 0) + iho <- inferHaplotypes(SNPdosages=dosmat, indiv=other, ploidy=ploidy, + minfrac=minfrac, sel_hap=parhaplo) + for (oSNPdid in names(iho$SNPdidsTable)) { + ocombs <- iho$hclist[[oSNPdid]] + if (ncol(ocombs) == 1) { + oSNPdidind <- names(iho$SNPdids)[iho$SNPdids == oSNPdid] + had[, colnames(had) %in% oSNPdidind] <- ocombs + } } } #for all indiv with a unique combination of haplotypes we now have @@ -976,19 +1185,94 @@ haplotypeDosagesF1 <- function(SNPdosages, indiv, ploidy, haploblockname, # - Does the F1 have an advantage for scoring the other samples? # Compare this for all contigs +#'@title get consensus SNP dosages from one or more samples +#'@description get consensus SNP dosages from one or more samples +#'@usage getConsensusSNPdosages(dosmat, indiv, solveConflicts=TRUE) +#'@param dosmat a dosage matrix with SNPs in rows and individuals in columns. +#'row names are SNP names, column names are individual names. +#'@param indiv character vector with the names of the samples from which to +#'obtain consensus scores +#'@param solveConflicts if TRUE (default) and there are conflicting dosage +#'assignments between the samples for the same SNP, the one with highest +#'frequency is used. If there are more dosages with the maximum frequency or if +#'solveConflicts is FALSE and there are conflicting dosages, the consensus for +#'that SNP will be NA +#'@return a vector with one consensus dosage for each row of dosmat, and the +#'row names of dosmat as names +#'@details a consensus dosage is calculated if at least one of the dosages is +#'not NA, and if all non-NA dosages are equal. Any SNPs for which this is not +#'the case will have NA as consensus dosage. +getConsensusSNPdosages <- function(dosmat, indiv, solveConflicts=TRUE) { + if (is.vector(dosmat)) dosmat <- matrix(dosmat, ncol=1) + if (!all(indiv %in% colnames(dosmat))) + stop("not all indiv in dosmat") + dosmat <- dosmat[,colnames(dosmat) %in% indiv, drop=FALSE] + suppressWarnings({ #suppress warnings about all dosages NA + mindos <- apply(dosmat, MARGIN=1, FUN=min, na.rm=TRUE) + maxdos <- apply(dosmat, MARGIN=1, FUN=max, na.rm=TRUE) + }) + if (solveConflicts) { + for (i in which(mindos != maxdos)) { + tb <- table(dosmat[i,]) # length 0 if all NA + suppressWarnings({ maxtb <- which(tb == max(tb)) }) + if (length(maxtb) == 1) { + mindos[i] <- as.integer(names(tb)[maxtb]) + } else mindos[i] <- NA + } + } else { + #not solve conflicts, set any SNP with conflicting dosages to NA: + mindos[mindos != maxdos] <- NA #includes rows with all NA because -Inf != Inf + } + names(mindos) <- row.names(dosmat) + mindos +} #getConsensusSNPdosages + +#'@title check all parental sample sets +#'@description check that for P1 and P2 the correct number of sample sets are +#'present, that these contain no duplicates and that that they overlap +#'completely or not at all +#'@usage checkAllParentSets(P, npop, indiv) +#'@param P a list of 2 elements, each of which is a list of character vectors +#'containing the sample names for parents 1 and parents 2 +#'@param npop the number of F1 populations +#'@param indiv a character vector with all sample names, the parental samples +#'should all be in indiv +#'@return an error message, "" if all is well +checkAllParentSets <- function(P, npop, indiv) { + if (length(P) !=2 || length(P[[1]] != npop) || length(P[[2]]) != npop) + return("for each F1 population there should be one set of samples for each parent") + # check that no parental set has duplicate samples and that all samples are in indiv: + P <- c(P[[1]], P[[2]]) + for (i in seq_len(2*npop)) { + if (length(P[[i]]) != length(unique(P[[i]]))) + return("some parental sample sets contain duplicate samples") + if (!all(P[[i]] %in% indiv)) + stop("Some parental samples are missing from indiv") + } + #check that if two parents are the same, all their samples are the same: + for (i in 1:(length(P)-1)) for (j in (i+1):length(P)) { + if (length(intersect(P[[i]], P[[j]])) > 0) { + if (!setequal(P[[i]], P[[j]])) + return("some parental sample sets overlap partially") + } + } + return("") +} #checkAllParentSets #'@title get the most likely haplotype combinations for a (parental) individual #'@description get the most likely haplotype combinations for a (parental) #'individual based on all its samples #'@usage getParentalCombs(parsamp, ihl, useIH) #'@param parsamp character vector with all sample names for the individual -#'@param ihl a list as returned by inferHaplotypes +#'@param ihl a list as returned by inferHaplotypes, of which SNPdid, hclist and +#'allhap are used #'@param useIH TRUE if only the inferred haplotype combinations must be #'returned, FALSE for all possible haplotype combinations #'@return a matrix with one row for each haplotype and one column for each #'of the most likely combinations of haplotypes, with each element being #'the haplotype dosage in that combination getParentalCombs <- function(parsamp, ihl, useIH) { + #TODO: see if we can fill in missing SNP dosages by combining the samples #which SNPdids occur over all samples in parsamp? parSNPdidsTable <- table(ihl$SNPdids[names(ihl$SNPdids) %in% parsamp])