import { rangesToBlocks, consolidateRanges } from "./consolidateRanges.js";
import { extractRanges } from "./extractRanges.js";
import { fetchAllCollections } from "./fetchAllCollections.js";
import * as utils from "./utils.js";
async function initialize(species, config) {
let cache;
if ("fetchSomeCollections" in config.cache) {
cache = config.cache.fetchSomeCollections;
} else {
cache = new Map;
config.cache.fetchSomeCollections = cache;
}
const fname = species + "_collections.tsv";
let spfound = cache.get(species);
if (typeof spfound == "undefined") {
const { ranges, extra } = await utils.retrieveRangesWithExtras(config, fname);
spfound = {
intervals: ranges,
blocked: rangesToBlocks(ranges, config.consolidateBlockSize),
sizes: extra,
starts: [],
prior: new Map
};
let first = 0;
for (const s of extra) {
spfound.starts.push(first);
first += s;
}
cache.set(species, spfound);
}
return { fname, spfound };
}
/**
* Get the size of each gene set collection.
*
* @param {string} species - The taxonomy ID of the species of interest, e.g., `"9606"` for human.
* @param {object} config - Configuration object, see {@linkcode newConfig}.
* @return {Array} Number of sets in each collection.
* Each value corresponds to a collection in {@linkcode fetchAllCollections}.
* @async
*/
export async function fetchCollectionSizes(species, config) {
if ("fetchAllCollections" in config.cache) {
const everything = await fetchAllCollections(species, config);
let output = [];
for (const x of everything) {
output.push(x.size);
}
return output;
}
const { spfound } = await initialize(species, config);
return spfound.sizes;
}
/**
* Get the total number of gene set collections.
*
* @param {string} species - The taxonomy ID of the species of interest, e.g., `"9606"` for human.
* @param {object} config - Configuration object, see {@linkcode newConfig}.
* @return {number} Total number of collections for this species.
* @async
*/
export async function numberOfCollections(species, config) {
if ("fetchAllCollections" in config.cache) {
const everything = await fetchAllCollections(species, config);
return everything.length;
}
const { spfound } = await initialize(species, config);
return spfound.sizes.length;
}
/**
* Fetch the details of some gene set collections from the Gesel database.
* This can be more efficient than {@linkcode fetchAllCollections} when only a few collections are of interest.
*
* Every time this function is called, information from the requested `collections` will be added to an in-memory cache.
* Subsequent calls to this function will re-use as many of the cached collections as possible before making new requests to the Gesel database.
*
* If {@linkcode fetchAllCollections} was previously called, its cached data will be used by `fetchSomeCollections` to avoid extra requests to the database.
* If `collections` is large, it may be more efficient to call {@linkcode fetchAllCollections} to prepare the cache before calling this function.
*
* @param {string} species - The taxonomy ID of the species of interest, e.g., `"9606"` for human.
* @param {Array} collections - Array of collection IDs.
* Each entry is a row index into the array returned by {@linkcode fetchAllCollections}.
* @param {object} config - Configuration object, see {@linkcode newConfig}.
*
* @return {Array} Array of length equal to `collections`.
* Each entry is an object containing details about the corresponding collection in `collections`.
*
* @async
*/
export async function fetchSomeCollections(species, collections, config) {
if ("fetchAllCollections" in config.cache) {
const everything = await fetchAllCollections(species, config);
let output = [];
for (const c of collections) {
output.push(everything[c]);
}
return output;
}
const { fname, spfound } = await initialize(species, config);
let needed = utils.setdiff(collections, spfound.prior);
if (needed.length > 0) {
const consolidated = consolidateRanges(spfound.intervals, spfound.blocked, needed);
const consolidated_parts = await config.fetchRanges(fname, consolidated.start, consolidated.end);
const dec = new TextDecoder;
extractRanges(
consolidated_parts,
consolidated.start,
consolidated.end,
spfound.intervals,
consolidated.requested,
(ii, sliced) => {
const txt = dec.decode(sliced);
const split = txt.split("\t");
spfound.prior.set(ii, {
title: split[0],
description: split[1],
species: split[2],
maintainer: split[3],
source: split[4],
start: spfound.starts[ii],
size: spfound.sizes[ii]
});
}
);
}
let output = [];
for (const c of collections) {
output.push(spfound.prior.get(c));
}
return output;
}