import { fetchSetSizes } from "./fetchSingleSet.js";
import { fetchSetsForGene, effectiveNumberOfGenes } from "./fetchSetsForGene.js";
import * as enrich from "./testEnrichment.js";
/**
* @param {string} species - The taxonomy ID of the species of interest, e.g., `"9606"` for human.
* @param {Array} genes - Array of unique integers containing user-supplied gene IDs, see {@linkcode fetchAllGenes} for details.
* @param {object} [options={}] - Optional parameters.
* @param {boolean} [options.includeSize=true] - Whether to include the size of each set in the output.
* @param {boolean} [options.testEnrichment=true] - Whether to compute the enrichment p-value for each set with {@linkcode testEnrichment}.
* The list and universe sizes will only count genes that are involved in at least one set, by checking {@linkcode fetchSetsForGene} and {@linkcode effectiveNumberOfGenes} respectively.
* @param {boolean} [options.forceDownload=false] - See {@linkcode fetchSetsForGene}.
*
* @return {Array} An array of objects, where each object corresponds to a set that has non-zero overlaps with `genes`.
* Each object contains:
*
* - `id`: the ID of the set in {@linkcode fetchAllSets}.
* - `count`: the number of genes in the set that overlap with genes in `genes`.
* - `size`: the size of each set.
* Only included if `includeSize = true`.
* - `pvalue`: the enrichment p-value.
* Only included if `testEnrichment = true`.
*
* @async
*/
export async function findOverlappingSets(species, genes, { includeSize = true, testEnrichment = true, forceDownload = false } = {}) {
await fetchSetsForGene(species, null, { forceDownload });
let promises = [];
let queried = new Set;
for (const g of genes) {
if (!queried.has(g)) {
promises.push(fetchSetsForGene(species, g));
queried.add(g);
}
}
let collected = await Promise.all(promises);
let output = countSetOverlaps(collected);
let sets_sizes = (includeSize || testEnrichment ? await fetchSetSizes(species) : null);
if (includeSize) {
for (const details of output) {
details.size = sets_sizes[details.id];
}
}
if (testEnrichment) {
let effective_list = 0; // only considering genes involved in at least one set.
for (const v of collected) {
effective_list += (v.length > 0);
}
let universe = await effectiveNumberOfGenes(species);
for (const details of output) {
details.pvalue = enrich.testEnrichment(details.count, effective_list, sets_sizes[details.id], universe);
}
}
return output;
}
/**
* This is a utility function that is called internally by {@linkcode findOverlappingSets}.
* However, it can be used directly to obtain overlap counts if the gene-to-set mappings are manually obtained.
*
* @param {Array} setsForSomeGenes - Array where each entry corresponds to a gene and contains an array of the set IDs containing that gene.
* Each inner array is typically the result of calling {@linkcode fetchSetsForGene}.
*
* @return {Array} An array of objects, where each object corresponds to a set that is present in at least one entry of `setsForSomeGenes`.
* Each object contains:
*
* - `id`: the ID of the set in {@linkcode fetchAllSets}.
* - `count`: the number of genes in the set that overlap with genes in `genes`.
*/
export function countSetOverlaps(setsForSomeGenes) {
var set_count = new Map;
for (const found of setsForSomeGenes) {
for (const set of found) {
let current = set_count.get(set);
if (typeof current == "undefined") {
set_count.set(set, 1);
} else {
set_count.set(set, current + 1);
}
}
}
let output = [];
for (const [id, count] of set_count) {
let id0 = Number(id);
let details = {
"id": id0,
"count": count
};
output.push(details);
}
return output;
}