.packageName <- "GOstats"
 combGOGraph = function(g1, g2) .Defunct("join", package = "GOstats")

##given a set of LOCUSLINK IDs obtain the GO graph that has all
##GO ids that those genes are annotated at, and
makeGOGraph <- function (x, Ontology = "MF", removeRoot=TRUE)
{
    require(GO) || stop("no GO library")
    match.arg(Ontology, c("MF", "BP", "CC"))
    wh <- paste("GO", Ontology, "PARENTS", sep = "")
    dataenv = get(wh, mode="environment")
    newNodes <- mget(x, env = GOLOCUSID2GO, ifnotfound=NA)
    if( length(newNodes) == 1)
       bd = is.na(newNodes[[1]])
    else
       bd = is.na(newNodes)
    newNodes <- newNodes[!bd]

    newNodes <- lapply(newNodes, function(x) x[sapply(x,
                         function(x) {if (is.na(x$Ontology) )
                                          return(FALSE)
                                      else
                                          x$Ontology == Ontology})])
    oldEdges <- vector("list", length = 0)
    oldNodes <- vector("character", length = 0)
    for (i in 1:length(newNodes)) {
        newN <- unique(sapply(newNodes[[i]], function(x) x$GOID))
        done <- FALSE
        while (!done) {
            newN <- newN[!(newN %in% oldNodes)]
            if (length(newN) == 0)
                done <- TRUE
            else {
                oldNodes <- c(oldNodes, newN)
                numE <- length(newN)
                nedges <- mget(newN, env = dataenv, ifnotfound=NA)
                nedges <- nedges[!is.na(nedges)]
                oldEdges <- c(oldEdges, nedges)
                if (length(nedges) > 0)
                  newN <- sort(unique(unlist(nedges)))
                else newN <- NULL
            }
        }
    }
    rE <- vector("list", length = length(oldNodes))
    names(rE) <- oldNodes
    rE[names(oldEdges)] <- oldEdges
    rE <- lapply(rE, function(x) x <- which(oldNodes %in% x))
    names(rE) <- oldNodes

    rval = new("graphNEL", nodes = oldNodes, edgeL = lapply(rE,
        function(x) list(edges = x)), edgemode = "directed")

    if (removeRoot) {
        ## For compatibility with old GO packages, check for "GO:0003673" if
        ## "all" is not found.
        if (is.element("all", nodes(rval)))
          rval = removeNode("all", rval)
        else if (is.element("GO:0003673", nodes(rval)))
          rval = removeNode("GO:0003673", rval)
    }
    rval
}


 ## helper function, determines if there is a GO annotation for the
 ## desired mode
 hasGOannote <- function(x, which="MF") {
     if( is(x, "GOTerms") ) {
         cat = Ontology(x)
         if( !is.na(cat) && cat == which )
            return(TRUE) else return(FALSE)
     }
     if( is.list(x) ) {
         gT = sapply(x, function(y) is(y, "GOTerms"))
         if( any(gT) ) {
             if( all(gT) ) {
                 cats = sapply(x, Ontology)
                 return(cats == which)
             }
             else
                 stop("mixed arguments not allowed")
         }
     }
     if( !is.character(x) )
         stop("wrong argument")
     tm <- getGOOntology(x)
     return(tm == which)
 }


##start with one specific term and find it's more general terms
## if A has edges to B, C and D
##then at step 2, the nodes are A,B,C,D; new nodes B,C,D
##we need to find edges for each of them
##we're going to build a graphNEL
GOGraph = function(x, dataenv) {
    require(GO) || stop("no GO library")
    if (!is.environment(dataenv))
        stop("second argument must be an environment")
    ##this is the old oneGOGraph code - but it just works for
    ##multiple inputs
    oldEdges <- vector("list", length=0)
    oldNodes <- vector("character", length=0)
    newN <- x
    done <- FALSE
    while( !done ) {
        newN <- newN[!(newN %in% oldNodes)]
        if( length(newN) == 0 )
            done <- TRUE
        else {
            oldNodes <- c(oldNodes, newN)
            numE <- length(newN)
            nedges <- mget(newN, env=dataenv, ifnotfound=NA)
            nedges <- nedges[!is.na(nedges)]
            oldEdges <- c(oldEdges, nedges)
            if( length(nedges) > 0 )
                newN <- sort(unique(unlist(nedges)))
            else
                newN <- NULL
        }
    }
    rE <- vector("list", length=length(oldNodes))
    names(rE) <- oldNodes
    ##DON'T CHANGE: we must have the indices in the edgeL slot be
    ##   integer offsets  to the node list
    rE[names(oldEdges)] <- oldEdges
    rE <- lapply(rE, function(x) match(x, oldNodes))
    names(oldNodes) = oldNodes
    return(new("graphNEL", nodes=oldNodes, edgeL = lapply(rE,
               function(x) list(edges=x)),edgemode="directed" ))
}

##to be deprecated
oneGOGraph <- function(x, dataenv) {
    require(GO) || stop("no GO library")
    if( length(x) > 1 )
        stop("wrong number of GO terms")
    if (!is.environment(dataenv))
        stop("second argument must be an environment")
    GOGraph(x, dataenv)
}
 ##GOleaves: the leaves of the GO graph are those nodes that have no
 ##inedges
  GOLeaves <- function(inG) {
      nG <- nodes(inG)
      iE <- inEdges(nG, inG)
      nG[sapply(iE, length) == 0]
  }

 ##similarity functions - based on graphs
 simUI = function(g1, g2) {
     if(!is(g1, "graph") || !is(g2, "graph") )
         stop("only works for graphs")
     n1 = nodes(g1); n2 = nodes(g2)
    length(intersect(n1, n2))/length(union(n1, n2))
 }

 simLP = function(g1, g2) {
    if(!is(g1, "graph") || !is(g2, "graph") )
         stop("only works for graphs")
    require("RBGL") || stop("need RBGL for this similarity")
    ig <- intersection(g1, g2)
    lfi <- GOLeaves(ig)
    degs = degree(ig)
    root = names(degs$outDegree)[degs$outDegree == 0]
    paths = sp.between(ig, lfi, root)
    plens = sapply(paths, function(x) x$length)
    max(plens)
}

 ##a helper function to get the right GOIDs
 .getWHEC = function(llid, wh, eCodes) {
     x = GOLOCUSID2GO[[llid]]
     if( !is.null(eCodes) )
         x = dropECode(x, eCodes)
     unique(unlist(getOntology(x, wh)))
  }

 simLL = function(ll1, ll2, Ontology="MF", measure = "LP",
                      dropCodes=NULL) {
    wh = match.arg(Ontology, c("MF", "BP", "CC"))
    ll1GO = .getWHEC(ll1, wh, dropCodes)
    ll2GO = .getWHEC(ll2, wh, dropCodes)
    dataenv = get(paste("GO", wh, "PARENTS", sep=""),
                    mode="environment")
    g1 = GOGraph(ll1GO, dataenv)
    g2 = GOGraph(ll2GO, dataenv)
    if( length(g1) == 0 || length(g2) == 0 )
      return(NA)
    sm = match.arg(measure, c("LP", "UI"))
    sim = switch(sm,
           LP = simLP(g1, g2),
           UI = simUI(g1, g2))
    return(list(sim=sim, measure=measure, g1 = g1, g2 =g2))
  }


##three functions to get all the GO information for a set of GO terms
##FIXME: these need to be renovated - probably removed even..
 getGOOntology <- function(x) {
     if( !is.character(x) )
         stop("need a character argument")
     if(length(x) == 0 )
         return( character(0))
     wh <- mget(x, env=GOTERM, ifnotfound=NA)
     return( sapply(wh, Ontology) )
 }

 getGOParents <- function(x) {
     if( !is.character(x) )
         stop("need a character argument")
     if(length(x) == 0 )
         return(list())
     hasMF <- mget(x, env=GOMFPARENTS, ifnotfound=NA)
     hasBP <- mget(x, env=GOBPPARENTS, ifnotfound=NA)
     hasCC <- mget(x, env=GOCCPARENTS, ifnotfound=NA)
     lenx <- length(x)
     rval <- vector("list", length=lenx)
     names(rval) <- x
     rval <- vector("list", length=lenx)
     names(rval) <- x
     for(i in 1:lenx) {
         if( (length(hasMF[[i]]) > 1 ) || !is.na(hasMF[[i]]) )
             rval[[i]] <- list(Ontology="MF", Parents=hasMF[[i]])
         else if( (length(hasMF[[i]]) > 1 ) || !is.na(hasBP[[i]]) )
             rval[[i]] <- list(Ontology="BP", Parents=hasBP[[i]])
         else if( (length(hasMF[[i]]) > 1 ) || !is.na(hasCC[[i]]) )
             rval[[i]] <- list(Ontology="CC", Parents=hasCC[[i]])
         else
             stop(paste(x[i], "is not a member of any ontology"))
     }
     return(rval)
 }

 getGOChildren <- function(x) {
     if( !is.character(x) )
         stop("need a character argument")
     if(length(x) == 0 )
         return(list())
     hasMF <- mget(x, env=GOMFCHILDREN, ifnotfound=NA)
     hasBP <- mget(x, env=GOBPCHILDREN, ifnotfound=NA)
     hasCC <- mget(x, env=GOCCCHILDREN, ifnotfound=NA)
     lenx <- length(x)
     rval <- vector("list", length=lenx)
     names(rval) <- x
     for(i in 1:lenx) {
         if( (length(hasMF[[i]]) > 1 ) || !is.na(hasMF[[i]]) )
             rval[[i]] <- list(Ontology="MF", Children=hasMF[[i]])
         else if( (length(hasMF[[i]]) > 1 ) || !is.na(hasBP[[i]]) )
             rval[[i]] <- list(Ontology="BP", Children=hasBP[[i]])
         else if( (length(hasMF[[i]]) > 1 ) || !is.na(hasCC[[i]]) )
             rval[[i]] <- list(Ontology="CC", Children=hasCC[[i]])
         else
             rval[[i]] <- list()
     }
     return(rval)
 }

 getGOTerm <- function(x) {
     if( !is.character(x) )
         stop("need a character argument")
     if(length(x) == 0 )
         return(list())
     terms <- mget(x, env=GOTERM, ifnotfound=NA)
     ##cannot use is.na, because GOTerms objects are 0 length lists
     isNA = sapply(terms, function(x) !(is(x, "GOTerms")))
     if( any(isNA) )
         terms = terms[!isNA]

     ontology <- sapply(terms, Ontology)
     terms = sapply(terms, Term)
     return(split(terms, ontology))
 }
##now we just sample K from the LLs and generate the GO graph

GOHyperG <- function(x, lib="hgu95av2", what="MF") {

   ##a helper function
    getDataEnv <- function(name, lib) {
        get(paste(lib, name, sep=""), mode="environment")
    }

    ##first some checking

    require(lib, character.only=TRUE) || stop("need data package", lib)

    if( any(duplicated(x)) )
        stop("input IDs must be unique")

    match.arg(what, c("MF", "BP", "CC"))

    ##get the unique LocusLink IDs - GO is mapped to LL not to probe
    cLLs <- unlist(as.list(getDataEnv("LOCUSID", lib)))

    ##which ones do we have - don't worry about duplicates just yet
    ourLLs <- cLLs[match(x, cLLs)]

    ##map to the unique LLs for each GO term
    goV <- as.list(getDataEnv("GO2ALLPROBES", lib))

    ##we need to reduce this, so first we only use the terms our
    ##symbols map to
    ##FIXME: this should work but the mappings are broken

    #a2GO <- mget(x, getDataEnv("GO", lib), ifnotfound=NA)
    #a2GOsz <- mget(unlist(sapply(a2GO, names)),
    #               env=hgu95av2GO2ALLPROBES, ifnotfound=NA)
    #aa <- sapply(a2GOsz, length)
    #a2GO <- unique(unlist(sapply(a2GO, names)))

    whWeHave <- sapply(goV, function(y) {
        if( is.na(y) || length(y) == 0 )
            return(FALSE)
        lls = unique(unlist(mget(y, getDataEnv("LOCUSID", lib),
                        ifnotfound=NA)))
        any(x %in% lls) })

    goV <- goV[whWeHave]

    ##determine which of the GO terms are in the category we are
    ##working on
    goCat <- unlist(getGOOntology(names(goV)))
    goodGO <- goCat == what
    ##not every thing in goV has a category
    mm <- match(names(goCat), names(goV))
    mm <- mm[goodGO]
    goV <- goV[mm]


    ##this is the slow part
    goVcts = sapply(goV, function(x) {
        if(length(x) == 0 || is.na(x) ) return(NA)
        lls <- unique(unlist(mget(x, getDataEnv("LOCUSID", lib))))
        lls<-lls[!is.na(lls)]
        lls
    })
    bad <- sapply(goVcts, function(x) (length(x) == 1 && is.na(x)))
    goVcts = goVcts[!bad]
    goV = goV[!bad] ##we are going to output this...

    ##find out how many map which is m+n in phyper-speak
    cLLs <- unique(unlist(goVcts))
    nLL <- length(cLLs)

    goCounts <- sapply(goVcts, length)


    ourLLs <- unique(ourLLs[!is.na(ourLLs)])
    ours <-ourLLs[!duplicated(ourLLs)]
    ##need to get the number of interesting genes - these
    ##are the ones supplied that map to some GO term
    whGood <- ours[ours %in% cLLs]


    nInt = length(whGood)
    if( nInt == 0 )
       warning("no interesting genes found")

    useCts <- sapply(goVcts, function(x) sum(whGood %in% x))

    ##need the -1 because we are asking for evidence as extreme or
    ##more extreme than what we say - and we are using the upper tail
    pvs <- phyper(useCts-1, nInt, nLL-nInt, goCounts, lower.tail=FALSE)

    ord <- order(pvs)
    return(list(pvalues=pvs[ord], goCounts=goCounts[ord], chip=lib,
                go2Affy=goV, intCounts=useCts[ord], numLL=nLL,
                numInt = nInt, intLLs=x))
}

    ##
##Copyright R. Gentleman, 2004
##simple functions to get Evidence codes

##get then GO term names for a particular (sub)ontology
getOntology = function(inlist, ontology=c("MF", "BP", "CC")) {
   which = match.arg(ontology)
   onts = sapply(inlist, function(z) z$Ontology)
   onts = onts[!is.na(onts)]
   unique(names(inlist[onts %in% which]))
}


##get GO evidence codes
getEvidence = function(inlist) 
     sapply(inlist, function(z) z$Evidence)

##drop a specified set of evidence codes
dropECode = function(inlist, code = "IEA") {
    hasCode = sapply(inlist, function(z) z$Evidence)
    badVals = hasCode %in% code
    inlist[!badVals]
}
##Copyright R. Gentleman 2004, all rights reserved
##some functions to do the shortest path analysis

shortestPath <- function(g, GOnode)
{
    if( !is(g, "graph") )
        stop("first argument must be a graph")

    if( edgemode(g) != "undirected" )
        stop("only undirected graphs for now, sorry")

    if( !is.character(GOnode) || length(GOnode) > 1 )
        stop("bad GO node description")

    require("GO", character.only=TRUE) ||
                      stop("GO library not available")

    ##obtain the LLIDs at the GO term

    LLs <- unique(get(GOnode, GOLOCUSID))

    m1 <- match(LLs, nodes(g))
    notthere <- LLs[is.na(m1)]
    there <- LLs[!is.na(m1)]
    if( length(there) <= 1 )
        stop("no nodes correspond to the specified term")

    nthere <- length(there)
    rval <- vector("list", (nthere*(nthere-1))/2)
    nms <- rep("", (nthere*(nthere-1))/2)
    k <- 1
    there <- as.character(there)
    for(i in 1:(nthere-1))
        for(j in (i+1):nthere) {
            nms[k] <- paste(there[i], there[j], sep=":")
            rval[[k]] <- sp.between(g,there[i], there[j])
            k <- k+1
        }
    names(rval) <- nms
    return(list(shortestpaths=rval, nodesUsed=there,
                nodesNotUsed=notthere))
}

compGdist <- function(g, whNodes, verbose=FALSE) {
    require("RBGL") || stop("need RBGL for compGdist")
    rval <- NULL
    nNodes = length(whNodes)
    for(i in 1:nNodes) {
        vvX <- dijkstra.sp(g, whNodes[i])
        rval[[i]] <- vvX$distance[whNodes[-i]]
        if( verbose)
            print(paste("Processing node", i, "of", nNodes))
    }
    tfmatrix <- matrix(0, nr= nNodes, nc=nNodes)
    dimnames(tfmatrix) <- list(whNodes, whNodes)
    for(i in 1:nNodes){
        tfmatrix[i,names(rval[[i]])] <- rval[[i]]
    }
    return(tfmatrix)
}

compCorrGraph <- function(eSet, k=1, tau=0.6) {
    if( tau < 0 || tau > 1 ) stop("bad tau value")
    cB <- abs(cor(t(exprs(eSet))))
    cB[cB < tau] <- 0
    whE = cB>0
    ##FIXME: any two genes whose exprssion values are perfectly
    ##correlated will get dropped - seems pretty unlikely
    cB[whE] <- ((1-cB)[whE])^k
    ##set diag to zero, since we do not want self-edges
    diag(cB) = 0
    require("SparseM", quietly=TRUE) || stop("need SparseM package")
    v1<-as.matrix.csr(cB, nr=dim(cB)[1], nc=dim(cB)[2])
    rv <- sparseM2Graph(v1, geneNames(eSet))
}

 ##given a matrix and a logical vector return two lists
 ##the row names and the column names for each selected entry

 idx2dimnames = function(x, idx) {
     if(length(idx) != length(x) )
         stop("x and idx different lengths")
     rowV = row(x)[idx]
     colV = col(x)[idx]
     return(list(rowNames = dimnames(x)[[1]][rowV],
                 colNames = dimnames(x)[[2]][colV]))
 }

##take as input a distance matrix
##if the number of infs is 1 less than the number of rows
##then that nod is not connected
 notConn = function(dists) {
   allInf = (dists==Inf)
   dns = idx2dimnames(dists, allInf)
   tab1 = table(dns[[1]])
   whAll = tab1[tab1 == (nrow(dists)-1)]
   if( any(tab1 != (nrow(dists)-1) & tab1 != length(whAll)) )
      warning("inf dists seem to be wrong")
   return(names(whAll))
}

 reduce2Degreek <- function(graph, k) {
    if(edgemode(graph) != "undirected")
       stop("only undirected graphs for now")
    done <- FALSE
    while(!done) {
        degs <- degree(graph)
        dgsub <- degs[degs> (k-1)]
        if( length(dgsub) == 0 )
           stop("no nodes left :-(")
        if( length(degs) == length(dgsub))
           done <- TRUE
        else
           graph <- subGraph(names(dgsub), graph)
   }
   return(graph)
  }

 isTriad <- function(x, y, z, elz, ely) {
   if( all(c(x,y) %in% elz ) && all(c(x,z) %in%  ely) )
     return(TRUE)
   return(FALSE)
 }

 enumPairs <- function(iVec) {
   leni <- length(iVec)
   if(leni < 2) return(vector(mode(iVec), length=0))
   eP <- vector("list", length=choose(leni, 2)/2)
   k<-1
   for( i in 1:(leni-1) ) {
     for(j in (i+1):leni) {
       eP[[k]] <- c(iVec[i], iVec[j])
       k <- k+1
     }
   }
   return(eP)
  }


 triadCensus <- function(graph) {
   g1 <- reduce2Degreek(graph, 2) ##all members have to have degree 2
   triads <- NULL
   k <- 1
   el <- edges(g1)
   for( n1 in nodes(g1) ) {
      for(j in enumPairs(el[[n1]])) {
        if( isTriad(n1, j[1], j[2], el[[j[2]]], el[[j[1]]]) ) {
            cand <- sort(c(n1,j))
            dupd <- sapply(triads, function(x) all(x==cand))
            if( length(triads)>1 && any(dupd) )
                next
            triads[[k]] <- cand
            k<- k+1
        }
      }
   }
   return(triads)
 }



.First.lib <- function(libname, pkgname, where) {
    ## require's version specificity doesn't allow for patterns.
    ## These will simulate it, in a hackish manner
    installed <- installed.packages()

    ## Make sure the metadata is at least 1.6.x
    ## Metadata is only 'Suggests' tho, so ok if they
    ## don't have it.
    cur <- match("hgu95av2", installed[,"Package"])
    if (!is.na(cur)) {
        if (compareVersion(installed[cur,"Version"], "1.6.0") < 0)
            warning("Your version of hgu95av2 must be at least 1.6.x")
        else
            require("hgu95av2")
    }

    ## The other packages are dependencies, must have them.  Make
    ## sure the BioC packages are the devel versions (as of 30/06/04)

    cur <- match("GO", installed[,"Package"])
    if ((!is.na(cur))&&(compareVersion(installed[cur,"Version"], "1.6.0")
                      < 0))
        warning("You must have package GO version >= 1.6.0")
    else
        require("GO")

    cur <- match("graph", installed[,"Package"])
    if ((!is.na(cur))&&(compareVersion(installed[cur,"Version"], "1.4.1")
                      < 0))
        warning("You must have package graph version >= 1.4.1")
    else
        require("graph")

    cur <- match("annotate", installed[,"Package"])
    if ((!is.na(cur))&&(compareVersion(installed[cur,"Version"], "1.4.0")
                      < 0))
        warning("You must have package annotate version >= 1.4.0")
    else
        require("annotate")

    cur <- match("Biobase", installed[,"Package"])
    if ((!is.na(cur))&&(compareVersion(installed[cur,"Version"], "1.4.15")
                      < 0))
        warning("You must have package Biobase version >= 1.4.15")
    else
        require("Biobase")

    cur <- match("RBGL", installed[,"Package"])
    if ((!is.na(cur))&&(compareVersion(installed[cur,"Version"], "1.2.2")
                      < 0))
        warning("You must have package RBGL version >= 1.2.2")
    else
        require("RBGL")

    cur <- match("genefilter", installed[,"Package"])
    if ((!is.na(cur))&&(compareVersion(installed[cur,"Version"], "1.4.0")
                      < 0))
        warning("You must have package genefilter version >= 1.4.0")
    else
        require("genefilter")

    cur <- match("multtest", installed[,"Package"])
    if ((!is.na(cur))&&(compareVersion(installed[cur,"Version"], "1.4.1")
                      < 0))
        warning("You must have package multtest version >= 1.4.1")
    else
        require("multtest")


    if(.Platform$OS.type == "windows" && require(Biobase) && interactive()
        && .Platform$GUI ==  "Rgui"){
        addVigs2WinMenu("GOstats")
    }


}
