fetchGenesForSomeSets.js

import { rangesToBlocks, consolidateRanges } from "./consolidateRanges.js";
import { extractRanges } from "./extractRanges.js";
import { fetchGenesForAllSets } from "./fetchGenesForAllSets.js";
import * as utils from "./utils.js";

/**
 * Fetch the gene membership of some sets in the Gesel database.
 * This can be more efficient than {@linkcode fetchGenesForAllSets} if only a few sets are of interest.
 *
 * Every time this function is called, information from the requested `sets` will be added to an in-memory cache.
 * Subsequent calls to this function will re-use as many of the cached sets as possible before making new requests to the Gesel database.
 * 
 * If {@linkcode fetchGenesForAllSets} was previously called, its cached data will be directly used by `fetchGenesForSomeSets` to avoid performing extra requests to the database.
 * If `sets` is large, it may be more efficient to call {@linkcode fetchGenesForAllSets} to prepare the cache before calling this function.
 * 
 * @param {string} species - The taxonomy ID of the species of interest, e.g., `"9606"` for human.
 * @param {Array} sets - Array of set IDs.
 * Each ID is a row index in the array returned by {@linkcode fetchAllSets}. 
 * @param {object} config - Configuration object, see {@linkcode newConfig}.
 *
 * @return {Array} Array of length equal to `sets`.
 * Each entry is a Uint32Array containing the IDs for all genes belonging to the corresponding set in `sets`.
 * Gene IDs refer to indices in {@linkcode fetchAllGenes}.
 *
 * @async
 */
export async function fetchGenesForSomeSets(species, sets, config) {
    if ("fetchGenesForAllSets" in config.cache) {
        const everything = await fetchGenesForAllSets(species, config);
        let output = [];
        for (const s of sets) {
            output.push(everything[s]);
        }
        return output;
    }

    let cache;
    if ("fetchGenesForSomeSets" in config.cache) {
        cache = config.cache.fetchGenesForSomeSets;
    } else {
        cache = new Map;
        config.cache.fetchGenesForSomeSets = cache;
    }
    let modified = false;

    const fname = species + "_set2gene.tsv";
    let spfound = cache.get(species);
    if (typeof spfound == "undefined") {
        const intervals = await utils.retrieveRanges(config, fname);
        spfound = {
            intervals: intervals,
            blocked: rangesToBlocks(intervals, config.consolidateBlockSize),
            prior: new Map
        };
        cache.set(species, spfound);
    }

    let needed = utils.setdiff(sets, spfound.prior); 
    if (needed.length > 0) {
        const consolidated = consolidateRanges(spfound.intervals, spfound.blocked, needed);
        const consolidated_parts = await config.fetchRanges(fname, consolidated.start, consolidated.end);

        extractRanges(
            consolidated_parts,
            consolidated.start,
            consolidated.end,
            spfound.intervals,
            consolidated.requested,
            (ii, sliced) => { spfound.prior.set(ii, utils.decodeIndicesFromBuffer(sliced)); }
        );
    }

    let output = [];
    for (const s of sets) {
        output.push(spfound.prior.get(s));
    }
    return output;
}