gesel
Validating Gesel database files
Loading...
Searching...
No Matches
validate_genes.hpp
Go to the documentation of this file.
1#ifndef GESEL_VALIDATE_GENES_HPP
2#define GESEL_VALIDATE_GENES_HPP
3
4#include "check_genes.hpp"
5
6#include <cstdint>
7#include <string>
8#include <vector>
9#include <stdexcept>
10#include <filesystem>
11
17namespace gesel {
18
30inline uint64_t validate_genes(const std::string& prefix, const std::vector<std::string>& types) {
31 bool first = true;
32 uint64_t num_genes = 0;
33 for (auto t : types) {
34 auto candidate = internal::check_genes(prefix + t + ".tsv.gz");
35 if (first) {
36 num_genes = candidate;
37 first = false;
38 } else if (candidate != num_genes) {
39 throw std::runtime_error("inconsistent number of genes between types (" + std::to_string(num_genes) + " for " + types.front() + ", " + std::to_string(candidate) + " for " + t + ")");
40 }
41 }
42
43 if (first) {
44 throw std::runtime_error("at least one gene name type should be present");
45 }
46
47 return num_genes;
48}
49
59inline uint64_t validate_genes(const std::string& prefix) {
60 std::vector<std::string> types;
61
62 std::filesystem::path path(prefix);
63 auto dir = path.parent_path();
64 auto raw_prefix = path.filename().string();
65
66 for (const auto& entry : std::filesystem::directory_iterator(dir)) {
67 std::string name = entry.path().filename().string();
68 if (name.rfind(raw_prefix, 0) != 0) {
69 continue;
70 }
71 if (name.size() < 6) {
72 continue;
73 }
74 size_t ext_loc = name.size() - 7;
75 if (name.rfind(".tsv.gz", ext_loc) != ext_loc) {
76 continue;
77 }
78 types.push_back(name.substr(raw_prefix.size(), ext_loc - raw_prefix.size()));
79 }
80
81 return validate_genes(prefix, types);
82}
83
84}
85
86#endif
Validate Gesel database and gene files.
Definition validate_database.hpp:20
uint64_t validate_genes(const std::string &prefix, const std::vector< std::string > &types)
Definition validate_genes.hpp:30